@wahack/pi-coding-agent 15.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10031 -0
- package/README.md +36 -0
- package/examples/README.md +21 -0
- package/examples/custom-tools/README.md +104 -0
- package/examples/custom-tools/hello/index.ts +20 -0
- package/examples/extensions/README.md +142 -0
- package/examples/extensions/api-demo.ts +79 -0
- package/examples/extensions/chalk-logger.ts +25 -0
- package/examples/extensions/hello.ts +31 -0
- package/examples/extensions/pirate.ts +43 -0
- package/examples/extensions/plan-mode.ts +549 -0
- package/examples/extensions/reload-runtime.ts +38 -0
- package/examples/extensions/thinking-note.ts +13 -0
- package/examples/extensions/tools.ts +145 -0
- package/examples/extensions/with-deps/index.ts +36 -0
- package/examples/extensions/with-deps/package-lock.json +31 -0
- package/examples/extensions/with-deps/package.json +17 -0
- package/examples/hooks/README.md +56 -0
- package/examples/hooks/auto-commit-on-exit.ts +48 -0
- package/examples/hooks/confirm-destructive.ts +58 -0
- package/examples/hooks/custom-compaction.ts +115 -0
- package/examples/hooks/dirty-repo-guard.ts +51 -0
- package/examples/hooks/file-trigger.ts +40 -0
- package/examples/hooks/git-checkpoint.ts +52 -0
- package/examples/hooks/handoff.ts +149 -0
- package/examples/hooks/permission-gate.ts +33 -0
- package/examples/hooks/protected-paths.ts +29 -0
- package/examples/hooks/qna.ts +118 -0
- package/examples/hooks/status-line.ts +39 -0
- package/examples/sdk/01-minimal.ts +21 -0
- package/examples/sdk/02-custom-model.ts +49 -0
- package/examples/sdk/03-custom-prompt.ts +46 -0
- package/examples/sdk/04-skills.ts +43 -0
- package/examples/sdk/06-extensions.ts +82 -0
- package/examples/sdk/06-hooks.ts +61 -0
- package/examples/sdk/07-context-files.ts +35 -0
- package/examples/sdk/08-prompt-templates.ts +41 -0
- package/examples/sdk/08-slash-commands.ts +46 -0
- package/examples/sdk/09-api-keys-and-oauth.ts +54 -0
- package/examples/sdk/11-sessions.ts +47 -0
- package/examples/sdk/12-redis-sessions.ts +54 -0
- package/examples/sdk/13-sql-sessions.ts +61 -0
- package/examples/sdk/README.md +172 -0
- package/package.json +554 -0
- package/scripts/build-binary.ts +100 -0
- package/scripts/bundle-dist.ts +90 -0
- package/scripts/format-prompts.ts +68 -0
- package/scripts/generate-docs-index.ts +40 -0
- package/scripts/generate-template.ts +33 -0
- package/scripts/omp +42 -0
- package/scripts/omp.ts +19 -0
- package/src/async/index.ts +1 -0
- package/src/async/job-manager.ts +625 -0
- package/src/auto-thinking/classifier.ts +185 -0
- package/src/autoresearch/command-resume.md +14 -0
- package/src/autoresearch/dashboard.ts +436 -0
- package/src/autoresearch/git.ts +319 -0
- package/src/autoresearch/helpers.ts +218 -0
- package/src/autoresearch/index.ts +536 -0
- package/src/autoresearch/prompt-setup.md +43 -0
- package/src/autoresearch/prompt.md +103 -0
- package/src/autoresearch/resume-message.md +10 -0
- package/src/autoresearch/state.ts +273 -0
- package/src/autoresearch/storage.ts +699 -0
- package/src/autoresearch/tools/init-experiment.ts +272 -0
- package/src/autoresearch/tools/log-experiment.ts +524 -0
- package/src/autoresearch/tools/run-experiment.ts +407 -0
- package/src/autoresearch/tools/update-notes.ts +109 -0
- package/src/autoresearch/types.ts +168 -0
- package/src/bun-imports.d.ts +28 -0
- package/src/capability/context-file.ts +44 -0
- package/src/capability/extension-module.ts +34 -0
- package/src/capability/extension.ts +47 -0
- package/src/capability/fs.ts +117 -0
- package/src/capability/hook.ts +40 -0
- package/src/capability/index.ts +436 -0
- package/src/capability/instruction.ts +37 -0
- package/src/capability/mcp.ts +74 -0
- package/src/capability/prompt.ts +35 -0
- package/src/capability/rule-buckets.ts +66 -0
- package/src/capability/rule.ts +261 -0
- package/src/capability/settings.ts +34 -0
- package/src/capability/skill.ts +63 -0
- package/src/capability/slash-command.ts +40 -0
- package/src/capability/ssh.ts +41 -0
- package/src/capability/system-prompt.ts +34 -0
- package/src/capability/tool.ts +38 -0
- package/src/capability/types.ts +168 -0
- package/src/cli/agents-cli.ts +138 -0
- package/src/cli/args.ts +340 -0
- package/src/cli/auth-broker-cli.ts +895 -0
- package/src/cli/auth-gateway-cli.ts +611 -0
- package/src/cli/classify-install-target.ts +76 -0
- package/src/cli/claude-trace-cli.ts +795 -0
- package/src/cli/commands/init-xdg.ts +27 -0
- package/src/cli/completion-gen.ts +550 -0
- package/src/cli/config-cli.ts +418 -0
- package/src/cli/dry-balance-cli.ts +856 -0
- package/src/cli/extension-flags.ts +48 -0
- package/src/cli/file-processor.ts +133 -0
- package/src/cli/gallery-cli.ts +230 -0
- package/src/cli/gallery-fixtures/agentic.ts +407 -0
- package/src/cli/gallery-fixtures/codeintel.ts +187 -0
- package/src/cli/gallery-fixtures/edit.ts +194 -0
- package/src/cli/gallery-fixtures/fs.ts +220 -0
- package/src/cli/gallery-fixtures/index.ts +40 -0
- package/src/cli/gallery-fixtures/interaction.ts +49 -0
- package/src/cli/gallery-fixtures/memory.ts +81 -0
- package/src/cli/gallery-fixtures/misc.ts +250 -0
- package/src/cli/gallery-fixtures/search.ts +213 -0
- package/src/cli/gallery-fixtures/shell.ts +167 -0
- package/src/cli/gallery-fixtures/types.ts +57 -0
- package/src/cli/gallery-fixtures/web.ts +158 -0
- package/src/cli/gallery-screenshot.ts +279 -0
- package/src/cli/grep-cli.ts +160 -0
- package/src/cli/grievances-cli.ts +256 -0
- package/src/cli/initial-message.ts +58 -0
- package/src/cli/list-models.ts +194 -0
- package/src/cli/plugin-cli.ts +996 -0
- package/src/cli/read-cli.ts +57 -0
- package/src/cli/session-picker.ts +79 -0
- package/src/cli/setup-cli.ts +231 -0
- package/src/cli/shell-cli.ts +176 -0
- package/src/cli/ssh-cli.ts +179 -0
- package/src/cli/startup-cwd.ts +68 -0
- package/src/cli/stats-cli.ts +238 -0
- package/src/cli/tiny-models-cli.ts +127 -0
- package/src/cli/update-cli.ts +611 -0
- package/src/cli/usage-cli.ts +603 -0
- package/src/cli/web-search-cli.ts +132 -0
- package/src/cli/worktree-cli.ts +291 -0
- package/src/cli-commands.ts +79 -0
- package/src/cli.ts +200 -0
- package/src/commands/acp.ts +24 -0
- package/src/commands/agents.ts +57 -0
- package/src/commands/auth-broker.ts +99 -0
- package/src/commands/auth-gateway.ts +69 -0
- package/src/commands/commit.ts +46 -0
- package/src/commands/complete.ts +66 -0
- package/src/commands/completions.ts +60 -0
- package/src/commands/config.ts +51 -0
- package/src/commands/dry-balance.ts +43 -0
- package/src/commands/gallery.ts +52 -0
- package/src/commands/grep.ts +48 -0
- package/src/commands/grievances.ts +51 -0
- package/src/commands/install.ts +107 -0
- package/src/commands/launch.ts +169 -0
- package/src/commands/plugin.ts +78 -0
- package/src/commands/read.ts +38 -0
- package/src/commands/setup.ts +67 -0
- package/src/commands/shell.ts +29 -0
- package/src/commands/ssh.ts +60 -0
- package/src/commands/stats.ts +29 -0
- package/src/commands/tiny-models.ts +36 -0
- package/src/commands/update.ts +21 -0
- package/src/commands/usage.ts +35 -0
- package/src/commands/web-search.ts +42 -0
- package/src/commands/worktree.ts +56 -0
- package/src/commit/agentic/agent.ts +317 -0
- package/src/commit/agentic/fallback.ts +96 -0
- package/src/commit/agentic/index.ts +355 -0
- package/src/commit/agentic/prompts/analyze-file.md +22 -0
- package/src/commit/agentic/prompts/session-user.md +25 -0
- package/src/commit/agentic/prompts/split-confirm.md +1 -0
- package/src/commit/agentic/prompts/system.md +38 -0
- package/src/commit/agentic/state.ts +60 -0
- package/src/commit/agentic/tools/analyze-file.ts +146 -0
- package/src/commit/agentic/tools/git-file-diff.ts +191 -0
- package/src/commit/agentic/tools/git-hunk.ts +50 -0
- package/src/commit/agentic/tools/git-overview.ts +81 -0
- package/src/commit/agentic/tools/index.ts +54 -0
- package/src/commit/agentic/tools/propose-changelog.ts +144 -0
- package/src/commit/agentic/tools/propose-commit.ts +109 -0
- package/src/commit/agentic/tools/recent-commits.ts +81 -0
- package/src/commit/agentic/tools/schemas.ts +23 -0
- package/src/commit/agentic/tools/split-commit.ts +245 -0
- package/src/commit/agentic/topo-sort.ts +44 -0
- package/src/commit/agentic/trivial.ts +51 -0
- package/src/commit/agentic/validation.ts +183 -0
- package/src/commit/analysis/conventional.ts +64 -0
- package/src/commit/analysis/index.ts +4 -0
- package/src/commit/analysis/scope.ts +242 -0
- package/src/commit/analysis/summary.ts +105 -0
- package/src/commit/analysis/validation.ts +66 -0
- package/src/commit/changelog/detect.ts +40 -0
- package/src/commit/changelog/generate.ts +97 -0
- package/src/commit/changelog/index.ts +234 -0
- package/src/commit/changelog/parse.ts +44 -0
- package/src/commit/cli.ts +85 -0
- package/src/commit/git/diff.ts +148 -0
- package/src/commit/index.ts +5 -0
- package/src/commit/map-reduce/index.ts +69 -0
- package/src/commit/map-reduce/map-phase.ts +193 -0
- package/src/commit/map-reduce/reduce-phase.ts +49 -0
- package/src/commit/map-reduce/utils.ts +9 -0
- package/src/commit/message.ts +11 -0
- package/src/commit/model-selection.ts +92 -0
- package/src/commit/pipeline.ts +243 -0
- package/src/commit/prompts/analysis-system.md +148 -0
- package/src/commit/prompts/analysis-user.md +38 -0
- package/src/commit/prompts/changelog-system.md +50 -0
- package/src/commit/prompts/changelog-user.md +18 -0
- package/src/commit/prompts/file-observer-system.md +24 -0
- package/src/commit/prompts/file-observer-user.md +8 -0
- package/src/commit/prompts/reduce-system.md +50 -0
- package/src/commit/prompts/reduce-user.md +17 -0
- package/src/commit/prompts/summary-retry.md +3 -0
- package/src/commit/prompts/summary-system.md +38 -0
- package/src/commit/prompts/summary-user.md +13 -0
- package/src/commit/prompts/types-description.md +2 -0
- package/src/commit/shared-llm.ts +77 -0
- package/src/commit/types.ts +118 -0
- package/src/commit/utils/exclusions.ts +42 -0
- package/src/commit/utils.ts +58 -0
- package/src/config/api-key-resolver.ts +60 -0
- package/src/config/append-only-context-mode.ts +31 -0
- package/src/config/config-file.ts +317 -0
- package/src/config/file-lock.ts +164 -0
- package/src/config/keybindings.ts +628 -0
- package/src/config/mcp-schema.json +230 -0
- package/src/config/model-discovery.ts +554 -0
- package/src/config/model-registry.ts +2090 -0
- package/src/config/model-resolver.ts +1502 -0
- package/src/config/model-roles.ts +74 -0
- package/src/config/models-config-schema.ts +226 -0
- package/src/config/models-config.ts +129 -0
- package/src/config/prompt-templates.ts +185 -0
- package/src/config/resolve-config-value.ts +94 -0
- package/src/config/settings-schema.ts +3530 -0
- package/src/config/settings.ts +1178 -0
- package/src/config.ts +242 -0
- package/src/cursor.ts +340 -0
- package/src/dap/client.ts +760 -0
- package/src/dap/config.ts +189 -0
- package/src/dap/defaults.json +212 -0
- package/src/dap/index.ts +4 -0
- package/src/dap/session.ts +1441 -0
- package/src/dap/types.ts +610 -0
- package/src/debug/index.ts +515 -0
- package/src/debug/log-formatting.ts +58 -0
- package/src/debug/log-viewer.ts +908 -0
- package/src/debug/profiler.ts +162 -0
- package/src/debug/protocol-probe.ts +267 -0
- package/src/debug/raw-sse-buffer.ts +273 -0
- package/src/debug/raw-sse.ts +292 -0
- package/src/debug/report-bundle.ts +374 -0
- package/src/debug/system-info.ts +111 -0
- package/src/debug/terminal-info.ts +124 -0
- package/src/discovery/agents-md.ts +67 -0
- package/src/discovery/agents.ts +230 -0
- package/src/discovery/at-imports.ts +273 -0
- package/src/discovery/builtin-defaults.ts +39 -0
- package/src/discovery/builtin-rules/index.ts +54 -0
- package/src/discovery/builtin-rules/rs-box-leak.md +48 -0
- package/src/discovery/builtin-rules/rs-future-prelude.md +23 -0
- package/src/discovery/builtin-rules/rs-lazylock.md +51 -0
- package/src/discovery/builtin-rules/rs-match-ergonomics.md +67 -0
- package/src/discovery/builtin-rules/rs-parking-lot.md +44 -0
- package/src/discovery/builtin-rules/rs-result-type.md +19 -0
- package/src/discovery/builtin-rules/ts-bare-catch.md +38 -0
- package/src/discovery/builtin-rules/ts-import-type.md +42 -0
- package/src/discovery/builtin-rules/ts-no-any.md +56 -0
- package/src/discovery/builtin-rules/ts-no-deprecated-leftovers.md +44 -0
- package/src/discovery/builtin-rules/ts-no-dynamic-import.md +39 -0
- package/src/discovery/builtin-rules/ts-no-return-type.md +45 -0
- package/src/discovery/builtin-rules/ts-no-test-timers.md +55 -0
- package/src/discovery/builtin-rules/ts-no-tiny-functions.md +51 -0
- package/src/discovery/builtin-rules/ts-promise-with-resolvers.md +65 -0
- package/src/discovery/builtin-rules/ts-redundant-clear-guard.md +75 -0
- package/src/discovery/builtin-rules/ts-set-map.md +28 -0
- package/src/discovery/builtin.ts +906 -0
- package/src/discovery/claude-plugins.ts +386 -0
- package/src/discovery/claude.ts +584 -0
- package/src/discovery/cline.ts +83 -0
- package/src/discovery/codex.ts +522 -0
- package/src/discovery/cursor.ts +220 -0
- package/src/discovery/gemini.ts +383 -0
- package/src/discovery/github.ts +154 -0
- package/src/discovery/helpers.ts +1016 -0
- package/src/discovery/index.ts +81 -0
- package/src/discovery/mcp-json.ts +171 -0
- package/src/discovery/omp-extension-roots.ts +190 -0
- package/src/discovery/omp-plugins.ts +383 -0
- package/src/discovery/opencode.ts +398 -0
- package/src/discovery/plugin-dir-roots.ts +28 -0
- package/src/discovery/ssh.ts +153 -0
- package/src/discovery/substitute-plugin-root.ts +29 -0
- package/src/discovery/vscode.ts +105 -0
- package/src/discovery/windsurf.ts +147 -0
- package/src/edit/apply-patch/index.ts +87 -0
- package/src/edit/apply-patch/parser.ts +174 -0
- package/src/edit/diff.ts +999 -0
- package/src/edit/file-snapshot-store.ts +91 -0
- package/src/edit/hashline/block-resolver.ts +33 -0
- package/src/edit/hashline/diff.ts +290 -0
- package/src/edit/hashline/execute.ts +242 -0
- package/src/edit/hashline/filesystem.ts +130 -0
- package/src/edit/hashline/index.ts +5 -0
- package/src/edit/hashline/noop-loop-guard.ts +99 -0
- package/src/edit/hashline/params.ts +18 -0
- package/src/edit/index.ts +571 -0
- package/src/edit/modes/apply-patch.lark +19 -0
- package/src/edit/modes/apply-patch.ts +53 -0
- package/src/edit/modes/patch.ts +1891 -0
- package/src/edit/modes/replace.ts +1137 -0
- package/src/edit/normalize.ts +345 -0
- package/src/edit/notebook.ts +242 -0
- package/src/edit/read-file.ts +25 -0
- package/src/edit/renderer.ts +769 -0
- package/src/edit/streaming.ts +517 -0
- package/src/eval/__tests__/agent-bridge.test.ts +708 -0
- package/src/eval/__tests__/bridge-timeout.test.ts +64 -0
- package/src/eval/__tests__/budget-bridge.test.ts +69 -0
- package/src/eval/__tests__/completion-bridge.test.ts +412 -0
- package/src/eval/__tests__/helpers-local-roots.test.ts +58 -0
- package/src/eval/__tests__/idle-timeout.test.ts +80 -0
- package/src/eval/__tests__/js-context-manager.test.ts +241 -0
- package/src/eval/__tests__/kernel-spawn.test.ts +103 -0
- package/src/eval/agent-bridge.ts +319 -0
- package/src/eval/backend.ts +71 -0
- package/src/eval/bridge-timeout.ts +44 -0
- package/src/eval/budget-bridge.ts +48 -0
- package/src/eval/completion-bridge.ts +207 -0
- package/src/eval/concurrency-bridge.ts +34 -0
- package/src/eval/idle-timeout.ts +91 -0
- package/src/eval/index.ts +4 -0
- package/src/eval/js/context-manager.ts +502 -0
- package/src/eval/js/executor.ts +173 -0
- package/src/eval/js/index.ts +51 -0
- package/src/eval/js/shared/helpers.ts +283 -0
- package/src/eval/js/shared/indirect-eval.ts +30 -0
- package/src/eval/js/shared/local-module-loader.ts +342 -0
- package/src/eval/js/shared/prelude.ts +2 -0
- package/src/eval/js/shared/prelude.txt +246 -0
- package/src/eval/js/shared/rewrite-imports.ts +532 -0
- package/src/eval/js/shared/runtime.ts +352 -0
- package/src/eval/js/shared/types.ts +18 -0
- package/src/eval/js/tool-bridge.ts +162 -0
- package/src/eval/js/worker-core.ts +132 -0
- package/src/eval/js/worker-entry.ts +30 -0
- package/src/eval/js/worker-protocol.ts +47 -0
- package/src/eval/py/__tests__/prelude.test.ts +19 -0
- package/src/eval/py/display.ts +71 -0
- package/src/eval/py/executor.ts +742 -0
- package/src/eval/py/index.ts +68 -0
- package/src/eval/py/kernel.ts +748 -0
- package/src/eval/py/prelude.py +658 -0
- package/src/eval/py/prelude.ts +3 -0
- package/src/eval/py/runner.py +1133 -0
- package/src/eval/py/runtime.ts +276 -0
- package/src/eval/py/spawn-options.ts +126 -0
- package/src/eval/py/tool-bridge.ts +182 -0
- package/src/eval/session-id.ts +8 -0
- package/src/eval/types.ts +48 -0
- package/src/exa/index.ts +2 -0
- package/src/exa/mcp-client.ts +370 -0
- package/src/exa/types.ts +69 -0
- package/src/exec/bash-executor.ts +419 -0
- package/src/exec/exec.ts +53 -0
- package/src/exec/non-interactive-env.ts +48 -0
- package/src/export/custom-share.ts +65 -0
- package/src/export/html/index.ts +164 -0
- package/src/export/html/template.css +1051 -0
- package/src/export/html/template.generated.ts +2 -0
- package/src/export/html/template.html +46 -0
- package/src/export/html/template.js +2271 -0
- package/src/export/html/template.macro.ts +25 -0
- package/src/export/html/vendor/highlight.min.js +1213 -0
- package/src/export/html/vendor/marked.min.js +6 -0
- package/src/export/ttsr.ts +583 -0
- package/src/extensibility/custom-commands/bundled/ci-green/index.ts +54 -0
- package/src/extensibility/custom-commands/bundled/review/index.ts +489 -0
- package/src/extensibility/custom-commands/index.ts +2 -0
- package/src/extensibility/custom-commands/loader.ts +238 -0
- package/src/extensibility/custom-commands/types.ts +113 -0
- package/src/extensibility/custom-tools/index.ts +7 -0
- package/src/extensibility/custom-tools/loader.ts +269 -0
- package/src/extensibility/custom-tools/types.ts +270 -0
- package/src/extensibility/custom-tools/wrapper.ts +47 -0
- package/src/extensibility/extensions/compact-handler.ts +40 -0
- package/src/extensibility/extensions/get-commands-handler.ts +78 -0
- package/src/extensibility/extensions/index.ts +16 -0
- package/src/extensibility/extensions/loader.ts +572 -0
- package/src/extensibility/extensions/runner.ts +922 -0
- package/src/extensibility/extensions/types.ts +1322 -0
- package/src/extensibility/extensions/wrapper.ts +223 -0
- package/src/extensibility/hooks/index.ts +5 -0
- package/src/extensibility/hooks/loader.ts +257 -0
- package/src/extensibility/hooks/runner.ts +425 -0
- package/src/extensibility/hooks/tool-wrapper.ts +107 -0
- package/src/extensibility/hooks/types.ts +606 -0
- package/src/extensibility/legacy-pi-ai-shim.ts +24 -0
- package/src/extensibility/legacy-pi-coding-agent-shim.ts +15 -0
- package/src/extensibility/plugins/doctor.ts +65 -0
- package/src/extensibility/plugins/git-url.ts +367 -0
- package/src/extensibility/plugins/index.ts +9 -0
- package/src/extensibility/plugins/installer.ts +192 -0
- package/src/extensibility/plugins/legacy-pi-compat.ts +682 -0
- package/src/extensibility/plugins/loader.ts +313 -0
- package/src/extensibility/plugins/manager.ts +827 -0
- package/src/extensibility/plugins/marketplace/cache.ts +136 -0
- package/src/extensibility/plugins/marketplace/fetcher.ts +317 -0
- package/src/extensibility/plugins/marketplace/index.ts +6 -0
- package/src/extensibility/plugins/marketplace/manager.ts +770 -0
- package/src/extensibility/plugins/marketplace/registry.ts +196 -0
- package/src/extensibility/plugins/marketplace/source-resolver.ts +147 -0
- package/src/extensibility/plugins/marketplace/types.ts +191 -0
- package/src/extensibility/plugins/marketplace-auto-update.ts +49 -0
- package/src/extensibility/plugins/parser.ts +105 -0
- package/src/extensibility/plugins/types.ts +194 -0
- package/src/extensibility/shared-events.ts +343 -0
- package/src/extensibility/skills.ts +312 -0
- package/src/extensibility/slash-commands.ts +227 -0
- package/src/extensibility/tool-proxy.ts +25 -0
- package/src/extensibility/typebox.ts +418 -0
- package/src/extensibility/utils.ts +44 -0
- package/src/goals/index.ts +3 -0
- package/src/goals/runtime.ts +528 -0
- package/src/goals/state.ts +37 -0
- package/src/goals/tools/goal-tool.ts +251 -0
- package/src/hindsight/backend.ts +354 -0
- package/src/hindsight/bank.ts +156 -0
- package/src/hindsight/client.ts +598 -0
- package/src/hindsight/config.ts +175 -0
- package/src/hindsight/content.ts +210 -0
- package/src/hindsight/index.ts +8 -0
- package/src/hindsight/mental-models.ts +429 -0
- package/src/hindsight/seeds.json +32 -0
- package/src/hindsight/state.ts +488 -0
- package/src/hindsight/transcript.ts +71 -0
- package/src/index.ts +59 -0
- package/src/internal-urls/agent-protocol.ts +146 -0
- package/src/internal-urls/artifact-protocol.ts +107 -0
- package/src/internal-urls/docs-index.generated.ts +106 -0
- package/src/internal-urls/history-protocol.ts +113 -0
- package/src/internal-urls/index.ts +25 -0
- package/src/internal-urls/issue-pr-protocol.ts +584 -0
- package/src/internal-urls/json-query.ts +126 -0
- package/src/internal-urls/local-protocol.ts +287 -0
- package/src/internal-urls/mcp-protocol.ts +151 -0
- package/src/internal-urls/memory-protocol.ts +169 -0
- package/src/internal-urls/omp-protocol.ts +93 -0
- package/src/internal-urls/parse.ts +72 -0
- package/src/internal-urls/registry-helpers.ts +25 -0
- package/src/internal-urls/router.ts +105 -0
- package/src/internal-urls/rule-protocol.ts +45 -0
- package/src/internal-urls/skill-protocol.ts +96 -0
- package/src/internal-urls/types.ts +152 -0
- package/src/internal-urls/vault-protocol.ts +936 -0
- package/src/irc/bus.ts +292 -0
- package/src/lib/xai-http.ts +124 -0
- package/src/lsp/client.ts +1193 -0
- package/src/lsp/clients/biome-client.ts +264 -0
- package/src/lsp/clients/index.ts +50 -0
- package/src/lsp/clients/lsp-linter-client.ts +93 -0
- package/src/lsp/clients/swiftlint-client.ts +120 -0
- package/src/lsp/config.ts +502 -0
- package/src/lsp/defaults.json +493 -0
- package/src/lsp/diagnostics-ledger.ts +51 -0
- package/src/lsp/edits.ts +267 -0
- package/src/lsp/index.ts +2477 -0
- package/src/lsp/lspmux.ts +233 -0
- package/src/lsp/render.ts +694 -0
- package/src/lsp/startup-events.ts +13 -0
- package/src/lsp/types.ts +455 -0
- package/src/lsp/utils.ts +718 -0
- package/src/main.ts +1325 -0
- package/src/mcp/client.ts +484 -0
- package/src/mcp/config-writer.ts +225 -0
- package/src/mcp/config.ts +365 -0
- package/src/mcp/index.ts +29 -0
- package/src/mcp/json-rpc.ts +122 -0
- package/src/mcp/loader.ts +124 -0
- package/src/mcp/manager.ts +1275 -0
- package/src/mcp/oauth-discovery.ts +442 -0
- package/src/mcp/oauth-flow.ts +442 -0
- package/src/mcp/render.ts +132 -0
- package/src/mcp/smithery-auth.ts +104 -0
- package/src/mcp/smithery-connect.ts +145 -0
- package/src/mcp/smithery-registry.ts +477 -0
- package/src/mcp/timeout.ts +59 -0
- package/src/mcp/tool-bridge.ts +426 -0
- package/src/mcp/tool-cache.ts +117 -0
- package/src/mcp/transports/http.ts +519 -0
- package/src/mcp/transports/index.ts +6 -0
- package/src/mcp/transports/stdio.ts +528 -0
- package/src/mcp/types.ts +423 -0
- package/src/memories/index.ts +1150 -0
- package/src/memories/storage.ts +577 -0
- package/src/memory-backend/index.ts +18 -0
- package/src/memory-backend/local-backend.ts +39 -0
- package/src/memory-backend/off-backend.ts +25 -0
- package/src/memory-backend/resolve.ts +25 -0
- package/src/memory-backend/runtime.ts +66 -0
- package/src/memory-backend/types.ts +166 -0
- package/src/mnemopi/backend.ts +547 -0
- package/src/mnemopi/config.ts +160 -0
- package/src/mnemopi/index.ts +3 -0
- package/src/mnemopi/state.ts +584 -0
- package/src/modes/acp/acp-agent.ts +2407 -0
- package/src/modes/acp/acp-client-bridge.ts +154 -0
- package/src/modes/acp/acp-event-mapper.ts +929 -0
- package/src/modes/acp/acp-mode.ts +23 -0
- package/src/modes/acp/index.ts +2 -0
- package/src/modes/acp/terminal-auth.ts +37 -0
- package/src/modes/components/agent-dashboard.ts +1206 -0
- package/src/modes/components/agent-hub.ts +1071 -0
- package/src/modes/components/assistant-message.ts +307 -0
- package/src/modes/components/bash-execution.ts +220 -0
- package/src/modes/components/bordered-loader.ts +41 -0
- package/src/modes/components/branch-summary-message.ts +45 -0
- package/src/modes/components/btw-panel.ts +104 -0
- package/src/modes/components/chat-block.ts +111 -0
- package/src/modes/components/compaction-summary-message.ts +87 -0
- package/src/modes/components/copy-selector.ts +206 -0
- package/src/modes/components/countdown-timer.ts +75 -0
- package/src/modes/components/custom-editor.ts +398 -0
- package/src/modes/components/custom-message.ts +63 -0
- package/src/modes/components/diff.ts +277 -0
- package/src/modes/components/dynamic-border.ts +34 -0
- package/src/modes/components/error-banner.ts +33 -0
- package/src/modes/components/eval-execution.ts +158 -0
- package/src/modes/components/execution-shared.ts +101 -0
- package/src/modes/components/extensions/extension-dashboard.ts +399 -0
- package/src/modes/components/extensions/extension-list.ts +502 -0
- package/src/modes/components/extensions/index.ts +9 -0
- package/src/modes/components/extensions/inspector-panel.ts +317 -0
- package/src/modes/components/extensions/state-manager.ts +627 -0
- package/src/modes/components/extensions/types.ts +186 -0
- package/src/modes/components/footer.ts +274 -0
- package/src/modes/components/history-search.ts +280 -0
- package/src/modes/components/hook-editor.ts +167 -0
- package/src/modes/components/hook-input.ts +87 -0
- package/src/modes/components/hook-message.ts +66 -0
- package/src/modes/components/hook-selector.ts +660 -0
- package/src/modes/components/index.ts +38 -0
- package/src/modes/components/keybinding-hints.ts +65 -0
- package/src/modes/components/late-diagnostics-message.ts +60 -0
- package/src/modes/components/login-dialog.ts +164 -0
- package/src/modes/components/mcp-add-wizard.ts +1340 -0
- package/src/modes/components/message-frame.ts +88 -0
- package/src/modes/components/model-selector.ts +1271 -0
- package/src/modes/components/oauth-selector.ts +368 -0
- package/src/modes/components/omfg-panel.ts +141 -0
- package/src/modes/components/overlay-box.ts +108 -0
- package/src/modes/components/plan-review-overlay.ts +820 -0
- package/src/modes/components/plan-toc.ts +138 -0
- package/src/modes/components/plugin-selector.ts +95 -0
- package/src/modes/components/plugin-settings.ts +722 -0
- package/src/modes/components/queue-mode-selector.ts +56 -0
- package/src/modes/components/read-tool-group.ts +670 -0
- package/src/modes/components/segment-track.ts +52 -0
- package/src/modes/components/session-selector.ts +625 -0
- package/src/modes/components/settings-defs.ts +189 -0
- package/src/modes/components/settings-selector.ts +651 -0
- package/src/modes/components/show-images-selector.ts +45 -0
- package/src/modes/components/skill-message.ts +89 -0
- package/src/modes/components/status-line/component.ts +869 -0
- package/src/modes/components/status-line/context-thresholds.ts +79 -0
- package/src/modes/components/status-line/git-utils.ts +42 -0
- package/src/modes/components/status-line/index.ts +5 -0
- package/src/modes/components/status-line/presets.ts +106 -0
- package/src/modes/components/status-line/segments.ts +584 -0
- package/src/modes/components/status-line/separators.ts +55 -0
- package/src/modes/components/status-line/token-rate.ts +66 -0
- package/src/modes/components/status-line/types.ts +108 -0
- package/src/modes/components/theme-selector.ts +63 -0
- package/src/modes/components/thinking-selector.ts +52 -0
- package/src/modes/components/tiny-title-download-progress.ts +90 -0
- package/src/modes/components/tips.txt +19 -0
- package/src/modes/components/todo-reminder.ts +38 -0
- package/src/modes/components/tool-execution.ts +1024 -0
- package/src/modes/components/transcript-container.ts +608 -0
- package/src/modes/components/tree-selector.ts +978 -0
- package/src/modes/components/ttsr-notification.ts +122 -0
- package/src/modes/components/user-message-selector.ts +227 -0
- package/src/modes/components/user-message.ts +66 -0
- package/src/modes/components/visual-truncate.ts +63 -0
- package/src/modes/components/welcome.ts +493 -0
- package/src/modes/controllers/btw-controller.ts +105 -0
- package/src/modes/controllers/command-controller-shared.ts +109 -0
- package/src/modes/controllers/command-controller.ts +1566 -0
- package/src/modes/controllers/event-controller.ts +1054 -0
- package/src/modes/controllers/extension-ui-controller.ts +886 -0
- package/src/modes/controllers/input-controller.ts +1073 -0
- package/src/modes/controllers/mcp-command-controller.ts +2017 -0
- package/src/modes/controllers/omfg-controller.ts +283 -0
- package/src/modes/controllers/omfg-rule.ts +647 -0
- package/src/modes/controllers/selector-controller.ts +1108 -0
- package/src/modes/controllers/ssh-command-controller.ts +384 -0
- package/src/modes/controllers/streaming-reveal.ts +279 -0
- package/src/modes/controllers/tan-command-controller.ts +173 -0
- package/src/modes/controllers/todo-command-controller.ts +485 -0
- package/src/modes/data/emojis.json +1 -0
- package/src/modes/emoji-autocomplete.ts +285 -0
- package/src/modes/gradient-highlight.ts +87 -0
- package/src/modes/image-references.ts +117 -0
- package/src/modes/index.ts +17 -0
- package/src/modes/interactive-mode.ts +3370 -0
- package/src/modes/internal-url-autocomplete.ts +143 -0
- package/src/modes/loop-limit.ts +140 -0
- package/src/modes/magic-keywords.ts +20 -0
- package/src/modes/markdown-prose.ts +247 -0
- package/src/modes/oauth-manual-input.ts +69 -0
- package/src/modes/orchestrate.ts +42 -0
- package/src/modes/print-mode.ts +126 -0
- package/src/modes/prompt-action-autocomplete.ts +260 -0
- package/src/modes/rpc/host-tools.ts +186 -0
- package/src/modes/rpc/host-uris.ts +235 -0
- package/src/modes/rpc/rpc-client.ts +963 -0
- package/src/modes/rpc/rpc-mode.ts +947 -0
- package/src/modes/rpc/rpc-subagents.ts +265 -0
- package/src/modes/rpc/rpc-types.ts +458 -0
- package/src/modes/runtime-init.ts +116 -0
- package/src/modes/session-observer-registry.ts +146 -0
- package/src/modes/setup-version.ts +11 -0
- package/src/modes/setup-wizard/index.ts +99 -0
- package/src/modes/setup-wizard/lazy.ts +16 -0
- package/src/modes/setup-wizard/scenes/glyph.ts +96 -0
- package/src/modes/setup-wizard/scenes/outro.ts +35 -0
- package/src/modes/setup-wizard/scenes/providers.ts +69 -0
- package/src/modes/setup-wizard/scenes/sign-in.ts +205 -0
- package/src/modes/setup-wizard/scenes/splash.ts +201 -0
- package/src/modes/setup-wizard/scenes/theme.ts +299 -0
- package/src/modes/setup-wizard/scenes/types.ts +48 -0
- package/src/modes/setup-wizard/scenes/web-search.ts +129 -0
- package/src/modes/setup-wizard/wizard-overlay.ts +275 -0
- package/src/modes/shared.ts +47 -0
- package/src/modes/theme/dark.json +95 -0
- package/src/modes/theme/defaults/alabaster.json +93 -0
- package/src/modes/theme/defaults/amethyst.json +96 -0
- package/src/modes/theme/defaults/anthracite.json +93 -0
- package/src/modes/theme/defaults/basalt.json +91 -0
- package/src/modes/theme/defaults/birch.json +95 -0
- package/src/modes/theme/defaults/dark-abyss.json +91 -0
- package/src/modes/theme/defaults/dark-arctic.json +104 -0
- package/src/modes/theme/defaults/dark-aurora.json +95 -0
- package/src/modes/theme/defaults/dark-catppuccin.json +107 -0
- package/src/modes/theme/defaults/dark-cavern.json +91 -0
- package/src/modes/theme/defaults/dark-copper.json +95 -0
- package/src/modes/theme/defaults/dark-cosmos.json +90 -0
- package/src/modes/theme/defaults/dark-cyberpunk.json +102 -0
- package/src/modes/theme/defaults/dark-dracula.json +98 -0
- package/src/modes/theme/defaults/dark-eclipse.json +91 -0
- package/src/modes/theme/defaults/dark-ember.json +95 -0
- package/src/modes/theme/defaults/dark-equinox.json +90 -0
- package/src/modes/theme/defaults/dark-forest.json +96 -0
- package/src/modes/theme/defaults/dark-github.json +105 -0
- package/src/modes/theme/defaults/dark-gruvbox.json +112 -0
- package/src/modes/theme/defaults/dark-lavender.json +95 -0
- package/src/modes/theme/defaults/dark-lunar.json +89 -0
- package/src/modes/theme/defaults/dark-midnight.json +95 -0
- package/src/modes/theme/defaults/dark-monochrome.json +94 -0
- package/src/modes/theme/defaults/dark-monokai.json +98 -0
- package/src/modes/theme/defaults/dark-nebula.json +90 -0
- package/src/modes/theme/defaults/dark-nord.json +97 -0
- package/src/modes/theme/defaults/dark-ocean.json +101 -0
- package/src/modes/theme/defaults/dark-one.json +100 -0
- package/src/modes/theme/defaults/dark-poimandres.json +142 -0
- package/src/modes/theme/defaults/dark-rainforest.json +91 -0
- package/src/modes/theme/defaults/dark-reef.json +91 -0
- package/src/modes/theme/defaults/dark-retro.json +92 -0
- package/src/modes/theme/defaults/dark-rose-pine.json +96 -0
- package/src/modes/theme/defaults/dark-sakura.json +95 -0
- package/src/modes/theme/defaults/dark-slate.json +95 -0
- package/src/modes/theme/defaults/dark-solarized.json +97 -0
- package/src/modes/theme/defaults/dark-solstice.json +90 -0
- package/src/modes/theme/defaults/dark-starfall.json +91 -0
- package/src/modes/theme/defaults/dark-sunset.json +99 -0
- package/src/modes/theme/defaults/dark-swamp.json +90 -0
- package/src/modes/theme/defaults/dark-synthwave.json +103 -0
- package/src/modes/theme/defaults/dark-taiga.json +91 -0
- package/src/modes/theme/defaults/dark-terminal.json +95 -0
- package/src/modes/theme/defaults/dark-tokyo-night.json +101 -0
- package/src/modes/theme/defaults/dark-tundra.json +91 -0
- package/src/modes/theme/defaults/dark-twilight.json +91 -0
- package/src/modes/theme/defaults/dark-volcanic.json +91 -0
- package/src/modes/theme/defaults/graphite.json +92 -0
- package/src/modes/theme/defaults/index.ts +199 -0
- package/src/modes/theme/defaults/light-arctic.json +107 -0
- package/src/modes/theme/defaults/light-aurora-day.json +91 -0
- package/src/modes/theme/defaults/light-canyon.json +91 -0
- package/src/modes/theme/defaults/light-catppuccin.json +106 -0
- package/src/modes/theme/defaults/light-cirrus.json +90 -0
- package/src/modes/theme/defaults/light-coral.json +95 -0
- package/src/modes/theme/defaults/light-cyberpunk.json +96 -0
- package/src/modes/theme/defaults/light-dawn.json +90 -0
- package/src/modes/theme/defaults/light-dunes.json +91 -0
- package/src/modes/theme/defaults/light-eucalyptus.json +95 -0
- package/src/modes/theme/defaults/light-forest.json +100 -0
- package/src/modes/theme/defaults/light-frost.json +95 -0
- package/src/modes/theme/defaults/light-github.json +115 -0
- package/src/modes/theme/defaults/light-glacier.json +91 -0
- package/src/modes/theme/defaults/light-gruvbox.json +108 -0
- package/src/modes/theme/defaults/light-haze.json +90 -0
- package/src/modes/theme/defaults/light-honeycomb.json +95 -0
- package/src/modes/theme/defaults/light-lagoon.json +91 -0
- package/src/modes/theme/defaults/light-lavender.json +95 -0
- package/src/modes/theme/defaults/light-meadow.json +91 -0
- package/src/modes/theme/defaults/light-mint.json +95 -0
- package/src/modes/theme/defaults/light-monochrome.json +101 -0
- package/src/modes/theme/defaults/light-ocean.json +99 -0
- package/src/modes/theme/defaults/light-one.json +99 -0
- package/src/modes/theme/defaults/light-opal.json +91 -0
- package/src/modes/theme/defaults/light-orchard.json +91 -0
- package/src/modes/theme/defaults/light-paper.json +95 -0
- package/src/modes/theme/defaults/light-poimandres.json +142 -0
- package/src/modes/theme/defaults/light-prism.json +90 -0
- package/src/modes/theme/defaults/light-retro.json +98 -0
- package/src/modes/theme/defaults/light-sand.json +95 -0
- package/src/modes/theme/defaults/light-savanna.json +91 -0
- package/src/modes/theme/defaults/light-solarized.json +102 -0
- package/src/modes/theme/defaults/light-soleil.json +90 -0
- package/src/modes/theme/defaults/light-sunset.json +99 -0
- package/src/modes/theme/defaults/light-synthwave.json +98 -0
- package/src/modes/theme/defaults/light-tokyo-night.json +111 -0
- package/src/modes/theme/defaults/light-wetland.json +91 -0
- package/src/modes/theme/defaults/light-zenith.json +89 -0
- package/src/modes/theme/defaults/limestone.json +94 -0
- package/src/modes/theme/defaults/mahogany.json +97 -0
- package/src/modes/theme/defaults/marble.json +93 -0
- package/src/modes/theme/defaults/obsidian.json +91 -0
- package/src/modes/theme/defaults/onyx.json +91 -0
- package/src/modes/theme/defaults/pearl.json +93 -0
- package/src/modes/theme/defaults/porcelain.json +91 -0
- package/src/modes/theme/defaults/quartz.json +96 -0
- package/src/modes/theme/defaults/sandstone.json +95 -0
- package/src/modes/theme/defaults/titanium.json +90 -0
- package/src/modes/theme/light.json +93 -0
- package/src/modes/theme/mermaid-cache.ts +29 -0
- package/src/modes/theme/shimmer.ts +235 -0
- package/src/modes/theme/theme-schema.json +459 -0
- package/src/modes/theme/theme.ts +2676 -0
- package/src/modes/turn-budget.ts +31 -0
- package/src/modes/types.ts +359 -0
- package/src/modes/ultrathink.ts +41 -0
- package/src/modes/utils/context-usage.ts +339 -0
- package/src/modes/utils/copy-targets.ts +360 -0
- package/src/modes/utils/hotkeys-markdown.ts +61 -0
- package/src/modes/utils/keybinding-matchers.ts +51 -0
- package/src/modes/utils/tools-markdown.ts +27 -0
- package/src/modes/utils/ui-helpers.ts +801 -0
- package/src/modes/workflow.ts +42 -0
- package/src/plan-mode/approved-plan.ts +186 -0
- package/src/plan-mode/plan-handoff.ts +37 -0
- package/src/plan-mode/plan-protection.ts +31 -0
- package/src/plan-mode/state.ts +6 -0
- package/src/priority.json +41 -0
- package/src/prompts/agents/designer.md +66 -0
- package/src/prompts/agents/explore.md +58 -0
- package/src/prompts/agents/frontmatter.md +11 -0
- package/src/prompts/agents/init.md +33 -0
- package/src/prompts/agents/librarian.md +119 -0
- package/src/prompts/agents/oracle.md +55 -0
- package/src/prompts/agents/plan.md +48 -0
- package/src/prompts/agents/reviewer.md +140 -0
- package/src/prompts/agents/task.md +16 -0
- package/src/prompts/ci-green-request.md +36 -0
- package/src/prompts/dry-balance-bench.md +8 -0
- package/src/prompts/goals/goal-budget-limit.md +16 -0
- package/src/prompts/goals/goal-continuation.md +28 -0
- package/src/prompts/goals/goal-mode-active.md +23 -0
- package/src/prompts/memories/consolidation.md +30 -0
- package/src/prompts/memories/read-path.md +11 -0
- package/src/prompts/memories/stage_one_input.md +6 -0
- package/src/prompts/memories/stage_one_system.md +21 -0
- package/src/prompts/review-custom-request.md +22 -0
- package/src/prompts/review-headless-request.md +16 -0
- package/src/prompts/review-request.md +69 -0
- package/src/prompts/steering/user-interjection.md +10 -0
- package/src/prompts/system/agent-creation-architect.md +50 -0
- package/src/prompts/system/agent-creation-user.md +6 -0
- package/src/prompts/system/auto-continue.md +1 -0
- package/src/prompts/system/auto-thinking-difficulty-local.md +14 -0
- package/src/prompts/system/auto-thinking-difficulty.md +12 -0
- package/src/prompts/system/background-tan-dispatch.md +8 -0
- package/src/prompts/system/btw-user.md +8 -0
- package/src/prompts/system/commit-message-system.md +14 -0
- package/src/prompts/system/custom-system-prompt.md +64 -0
- package/src/prompts/system/eager-todo.md +13 -0
- package/src/prompts/system/empty-stop-retry.md +6 -0
- package/src/prompts/system/irc-incoming.md +7 -0
- package/src/prompts/system/manual-continue.md +7 -0
- package/src/prompts/system/memory-consolidation-system.md +8 -0
- package/src/prompts/system/memory-extraction-system.md +26 -0
- package/src/prompts/system/omfg-user.md +50 -0
- package/src/prompts/system/orchestrate-notice.md +40 -0
- package/src/prompts/system/plan-mode-active.md +109 -0
- package/src/prompts/system/plan-mode-approved.md +25 -0
- package/src/prompts/system/plan-mode-compact-instructions.md +16 -0
- package/src/prompts/system/plan-mode-reference.md +11 -0
- package/src/prompts/system/plan-mode-subagent.md +33 -0
- package/src/prompts/system/plan-mode-tool-decision-reminder.md +9 -0
- package/src/prompts/system/project-prompt.md +52 -0
- package/src/prompts/system/subagent-system-prompt.md +64 -0
- package/src/prompts/system/subagent-user-prompt.md +3 -0
- package/src/prompts/system/subagent-yield-reminder.md +12 -0
- package/src/prompts/system/system-prompt.md +258 -0
- package/src/prompts/system/tiny-title-system.md +8 -0
- package/src/prompts/system/title-system.md +16 -0
- package/src/prompts/system/ttsr-interrupt.md +7 -0
- package/src/prompts/system/ttsr-tool-reminder.md +5 -0
- package/src/prompts/system/ultrathink-notice.md +3 -0
- package/src/prompts/system/web-search.md +25 -0
- package/src/prompts/system/workflow-notice.md +70 -0
- package/src/prompts/tools/apply-patch.md +65 -0
- package/src/prompts/tools/ask.md +30 -0
- package/src/prompts/tools/ast-edit.md +39 -0
- package/src/prompts/tools/ast-grep.md +42 -0
- package/src/prompts/tools/async-result.md +8 -0
- package/src/prompts/tools/bash.md +46 -0
- package/src/prompts/tools/browser.md +73 -0
- package/src/prompts/tools/checkpoint.md +16 -0
- package/src/prompts/tools/debug.md +34 -0
- package/src/prompts/tools/eval.md +92 -0
- package/src/prompts/tools/find.md +36 -0
- package/src/prompts/tools/github.md +21 -0
- package/src/prompts/tools/goal.md +18 -0
- package/src/prompts/tools/image-gen.md +7 -0
- package/src/prompts/tools/inspect-image-system.md +20 -0
- package/src/prompts/tools/inspect-image.md +32 -0
- package/src/prompts/tools/irc.md +59 -0
- package/src/prompts/tools/job.md +19 -0
- package/src/prompts/tools/lsp-late-diagnostic.md +8 -0
- package/src/prompts/tools/lsp.md +42 -0
- package/src/prompts/tools/memory-edit.md +8 -0
- package/src/prompts/tools/patch.md +70 -0
- package/src/prompts/tools/read.md +84 -0
- package/src/prompts/tools/recall.md +5 -0
- package/src/prompts/tools/reflect.md +5 -0
- package/src/prompts/tools/render-mermaid.md +9 -0
- package/src/prompts/tools/replace.md +30 -0
- package/src/prompts/tools/resolve.md +9 -0
- package/src/prompts/tools/retain.md +6 -0
- package/src/prompts/tools/rewind.md +13 -0
- package/src/prompts/tools/search-tool-bm25.md +32 -0
- package/src/prompts/tools/search.md +24 -0
- package/src/prompts/tools/ssh.md +31 -0
- package/src/prompts/tools/task-summary.md +17 -0
- package/src/prompts/tools/task.md +88 -0
- package/src/prompts/tools/todo.md +62 -0
- package/src/prompts/tools/web-search.md +10 -0
- package/src/prompts/tools/write.md +14 -0
- package/src/registry/agent-lifecycle.ts +218 -0
- package/src/registry/agent-registry.ts +151 -0
- package/src/sdk.ts +2558 -0
- package/src/secrets/index.ts +123 -0
- package/src/secrets/obfuscator.ts +298 -0
- package/src/secrets/regex.ts +21 -0
- package/src/session/agent-session.ts +10121 -0
- package/src/session/agent-storage.ts +455 -0
- package/src/session/artifacts.ts +135 -0
- package/src/session/auth-broker-config.ts +131 -0
- package/src/session/auth-storage.ts +29 -0
- package/src/session/blob-store.ts +255 -0
- package/src/session/client-bridge.ts +85 -0
- package/src/session/history-storage.ts +348 -0
- package/src/session/indexed-session-storage.ts +430 -0
- package/src/session/messages.ts +541 -0
- package/src/session/redis-session-storage.ts +170 -0
- package/src/session/session-dump-format.ts +209 -0
- package/src/session/session-history-format.ts +246 -0
- package/src/session/session-manager.ts +3676 -0
- package/src/session/session-storage.ts +529 -0
- package/src/session/shake-types.ts +43 -0
- package/src/session/sql-session-storage.ts +314 -0
- package/src/session/streaming-output.ts +1330 -0
- package/src/session/tool-choice-queue.ts +213 -0
- package/src/session/yield-queue.ts +173 -0
- package/src/slash-commands/acp-builtins.ts +70 -0
- package/src/slash-commands/builtin-registry.ts +1798 -0
- package/src/slash-commands/helpers/context-report.ts +39 -0
- package/src/slash-commands/helpers/format.ts +46 -0
- package/src/slash-commands/helpers/marketplace-manager.ts +25 -0
- package/src/slash-commands/helpers/mcp.ts +532 -0
- package/src/slash-commands/helpers/parse.ts +85 -0
- package/src/slash-commands/helpers/ssh.ts +195 -0
- package/src/slash-commands/helpers/stats-dashboard.ts +85 -0
- package/src/slash-commands/helpers/todo.ts +279 -0
- package/src/slash-commands/helpers/usage-report.ts +95 -0
- package/src/slash-commands/marketplace-install-parser.ts +99 -0
- package/src/slash-commands/types.ts +135 -0
- package/src/ssh/config-writer.ts +183 -0
- package/src/ssh/connection-manager.ts +509 -0
- package/src/ssh/ssh-executor.ts +189 -0
- package/src/ssh/sshfs-mount.ts +140 -0
- package/src/ssh/utils.ts +8 -0
- package/src/stt/downloader.ts +71 -0
- package/src/stt/index.ts +3 -0
- package/src/stt/recorder.ts +351 -0
- package/src/stt/setup.ts +52 -0
- package/src/stt/stt-controller.ts +160 -0
- package/src/stt/transcribe.py +70 -0
- package/src/stt/transcriber.ts +91 -0
- package/src/stubs/natives/index.ts +814 -0
- package/src/stubs/natives/package.json +7 -0
- package/src/stubs/tui/index.ts +282 -0
- package/src/stubs/tui/package.json +7 -0
- package/src/system-prompt.ts +611 -0
- package/src/task/agents.ts +167 -0
- package/src/task/commands.ts +132 -0
- package/src/task/discovery.ts +122 -0
- package/src/task/executor.ts +2133 -0
- package/src/task/index.ts +1419 -0
- package/src/task/name-generator.ts +1577 -0
- package/src/task/omp-command.ts +26 -0
- package/src/task/output-manager.ts +88 -0
- package/src/task/parallel.ts +116 -0
- package/src/task/render.ts +1381 -0
- package/src/task/repair-args.ts +129 -0
- package/src/task/subprocess-tool-registry.ts +88 -0
- package/src/task/types.ts +336 -0
- package/src/task/worktree.ts +514 -0
- package/src/telemetry-export.ts +144 -0
- package/src/thinking.ts +167 -0
- package/src/tiny/compiled-runtime.ts +179 -0
- package/src/tiny/device.ts +111 -0
- package/src/tiny/dtype.ts +101 -0
- package/src/tiny/models.ts +242 -0
- package/src/tiny/text.ts +165 -0
- package/src/tiny/title-client.ts +543 -0
- package/src/tiny/title-protocol.ts +56 -0
- package/src/tiny/worker.ts +568 -0
- package/src/tool-discovery/mode.ts +24 -0
- package/src/tool-discovery/tool-index.ts +256 -0
- package/src/tools/approval.ts +189 -0
- package/src/tools/archive-reader.ts +721 -0
- package/src/tools/ask.ts +928 -0
- package/src/tools/ast-edit.ts +642 -0
- package/src/tools/ast-grep.ts +452 -0
- package/src/tools/auto-generated-guard.ts +322 -0
- package/src/tools/bash-command-fixup.ts +37 -0
- package/src/tools/bash-interactive.ts +408 -0
- package/src/tools/bash-interceptor.ts +67 -0
- package/src/tools/bash-pty-selection.ts +14 -0
- package/src/tools/bash-skill-urls.ts +248 -0
- package/src/tools/bash.ts +1386 -0
- package/src/tools/browser/attach.ts +175 -0
- package/src/tools/browser/launch.ts +660 -0
- package/src/tools/browser/readable.ts +112 -0
- package/src/tools/browser/registry.ts +197 -0
- package/src/tools/browser/render.ts +216 -0
- package/src/tools/browser/tab-protocol.ts +105 -0
- package/src/tools/browser/tab-supervisor.ts +628 -0
- package/src/tools/browser/tab-worker-entry.ts +21 -0
- package/src/tools/browser/tab-worker.ts +1226 -0
- package/src/tools/browser.ts +343 -0
- package/src/tools/checkpoint.ts +136 -0
- package/src/tools/conflict-detect.ts +718 -0
- package/src/tools/context.ts +39 -0
- package/src/tools/debug.ts +1067 -0
- package/src/tools/eval-backends.ts +27 -0
- package/src/tools/eval-render.ts +752 -0
- package/src/tools/eval.ts +577 -0
- package/src/tools/fetch.ts +1926 -0
- package/src/tools/file-recorder.ts +35 -0
- package/src/tools/find.ts +609 -0
- package/src/tools/fs-cache-invalidation.ts +28 -0
- package/src/tools/gh-cache-invalidation.ts +255 -0
- package/src/tools/gh-format.ts +12 -0
- package/src/tools/gh-renderer.ts +481 -0
- package/src/tools/gh.ts +3720 -0
- package/src/tools/github-cache.ts +637 -0
- package/src/tools/grouped-file-output.ts +210 -0
- package/src/tools/image-gen.ts +1517 -0
- package/src/tools/index.ts +599 -0
- package/src/tools/inspect-image-renderer.ts +132 -0
- package/src/tools/inspect-image.ts +174 -0
- package/src/tools/irc.ts +723 -0
- package/src/tools/job.ts +557 -0
- package/src/tools/json-tree.ts +243 -0
- package/src/tools/jtd-to-json-schema.ts +219 -0
- package/src/tools/jtd-to-typescript.ts +136 -0
- package/src/tools/jtd-utils.ts +102 -0
- package/src/tools/list-limit.ts +40 -0
- package/src/tools/match-line-format.ts +20 -0
- package/src/tools/memory-edit.ts +59 -0
- package/src/tools/memory-recall.ts +100 -0
- package/src/tools/memory-reflect.ts +88 -0
- package/src/tools/memory-render.ts +202 -0
- package/src/tools/memory-retain.ts +91 -0
- package/src/tools/output-meta.ts +754 -0
- package/src/tools/output-schema-validator.ts +132 -0
- package/src/tools/path-utils.ts +1054 -0
- package/src/tools/plan-mode-guard.ts +108 -0
- package/src/tools/puppeteer/00_stealth_tampering.txt +63 -0
- package/src/tools/puppeteer/01_stealth_activity.txt +20 -0
- package/src/tools/puppeteer/02_stealth_hairline.txt +11 -0
- package/src/tools/puppeteer/03_stealth_botd.txt +384 -0
- package/src/tools/puppeteer/04_stealth_iframe.txt +81 -0
- package/src/tools/puppeteer/05_stealth_webgl.txt +75 -0
- package/src/tools/puppeteer/06_stealth_screen.txt +72 -0
- package/src/tools/puppeteer/07_stealth_fonts.txt +97 -0
- package/src/tools/puppeteer/08_stealth_audio.txt +51 -0
- package/src/tools/puppeteer/09_stealth_locale.txt +46 -0
- package/src/tools/puppeteer/10_stealth_plugins.txt +206 -0
- package/src/tools/puppeteer/11_stealth_hardware.txt +8 -0
- package/src/tools/puppeteer/12_stealth_codecs.txt +40 -0
- package/src/tools/puppeteer/13_stealth_worker.txt +74 -0
- package/src/tools/read.ts +2929 -0
- package/src/tools/render-mermaid.ts +69 -0
- package/src/tools/render-utils.ts +838 -0
- package/src/tools/renderers.ts +77 -0
- package/src/tools/report-tool-issue.ts +534 -0
- package/src/tools/resolve.ts +276 -0
- package/src/tools/review.ts +253 -0
- package/src/tools/search-tool-bm25.ts +351 -0
- package/src/tools/search.ts +1580 -0
- package/src/tools/sqlite-reader.ts +828 -0
- package/src/tools/ssh.ts +349 -0
- package/src/tools/todo.ts +982 -0
- package/src/tools/tool-errors.ts +62 -0
- package/src/tools/tool-result.ts +94 -0
- package/src/tools/tool-timeouts.ts +30 -0
- package/src/tools/tts.ts +133 -0
- package/src/tools/write.ts +1217 -0
- package/src/tools/yield.ts +269 -0
- package/src/tui/code-cell.ts +216 -0
- package/src/tui/file-list.ts +55 -0
- package/src/tui/hyperlink.ts +175 -0
- package/src/tui/index.ts +12 -0
- package/src/tui/output-block.ts +240 -0
- package/src/tui/status-line.ts +54 -0
- package/src/tui/tree-list.ts +84 -0
- package/src/tui/types.ts +15 -0
- package/src/tui/utils.ts +103 -0
- package/src/utils/block-context.ts +312 -0
- package/src/utils/changelog.ts +132 -0
- package/src/utils/clipboard.ts +193 -0
- package/src/utils/command-args.ts +76 -0
- package/src/utils/commit-message-generator.ts +151 -0
- package/src/utils/edit-mode.ts +41 -0
- package/src/utils/enhanced-paste.ts +230 -0
- package/src/utils/event-bus.ts +33 -0
- package/src/utils/external-editor.ts +65 -0
- package/src/utils/file-display-mode.ts +45 -0
- package/src/utils/file-mentions.ts +281 -0
- package/src/utils/git.ts +1833 -0
- package/src/utils/image-loading.ts +132 -0
- package/src/utils/image-resize.ts +309 -0
- package/src/utils/jj.ts +248 -0
- package/src/utils/lang-from-path.ts +239 -0
- package/src/utils/markit.ts +89 -0
- package/src/utils/open.ts +55 -0
- package/src/utils/session-color.ts +68 -0
- package/src/utils/shell-snapshot.ts +187 -0
- package/src/utils/sixel.ts +69 -0
- package/src/utils/title-generator.ts +373 -0
- package/src/utils/tool-choice.ts +33 -0
- package/src/utils/tools-manager.ts +363 -0
- package/src/web/kagi.ts +305 -0
- package/src/web/parallel.ts +353 -0
- package/src/web/scrapers/artifacthub.ts +207 -0
- package/src/web/scrapers/arxiv.ts +83 -0
- package/src/web/scrapers/aur.ts +162 -0
- package/src/web/scrapers/biorxiv.ts +133 -0
- package/src/web/scrapers/bluesky.ts +262 -0
- package/src/web/scrapers/brew.ts +172 -0
- package/src/web/scrapers/cheatsh.ts +68 -0
- package/src/web/scrapers/chocolatey.ts +196 -0
- package/src/web/scrapers/choosealicense.ts +95 -0
- package/src/web/scrapers/cisa-kev.ts +87 -0
- package/src/web/scrapers/clojars.ts +154 -0
- package/src/web/scrapers/coingecko.ts +177 -0
- package/src/web/scrapers/crates-io.ts +97 -0
- package/src/web/scrapers/crossref.ts +136 -0
- package/src/web/scrapers/devto.ts +147 -0
- package/src/web/scrapers/discogs.ts +306 -0
- package/src/web/scrapers/discourse.ts +197 -0
- package/src/web/scrapers/dockerhub.ts +138 -0
- package/src/web/scrapers/docs-rs.ts +653 -0
- package/src/web/scrapers/fdroid.ts +134 -0
- package/src/web/scrapers/firefox-addons.ts +191 -0
- package/src/web/scrapers/flathub.ts +223 -0
- package/src/web/scrapers/github-gist.ts +58 -0
- package/src/web/scrapers/github.ts +704 -0
- package/src/web/scrapers/gitlab.ts +401 -0
- package/src/web/scrapers/go-pkg.ts +266 -0
- package/src/web/scrapers/hackage.ts +140 -0
- package/src/web/scrapers/hackernews.ts +189 -0
- package/src/web/scrapers/hex.ts +105 -0
- package/src/web/scrapers/huggingface.ts +321 -0
- package/src/web/scrapers/iacr.ts +89 -0
- package/src/web/scrapers/index.ts +252 -0
- package/src/web/scrapers/jetbrains-marketplace.ts +159 -0
- package/src/web/scrapers/lemmy.ts +203 -0
- package/src/web/scrapers/lobsters.ts +175 -0
- package/src/web/scrapers/mastodon.ts +292 -0
- package/src/web/scrapers/maven.ts +138 -0
- package/src/web/scrapers/mdn.ts +173 -0
- package/src/web/scrapers/metacpan.ts +222 -0
- package/src/web/scrapers/musicbrainz.ts +250 -0
- package/src/web/scrapers/npm.ts +98 -0
- package/src/web/scrapers/nuget.ts +183 -0
- package/src/web/scrapers/nvd.ts +222 -0
- package/src/web/scrapers/ollama.ts +239 -0
- package/src/web/scrapers/open-vsx.ts +106 -0
- package/src/web/scrapers/opencorporates.ts +292 -0
- package/src/web/scrapers/openlibrary.ts +336 -0
- package/src/web/scrapers/orcid.ts +286 -0
- package/src/web/scrapers/osv.ts +176 -0
- package/src/web/scrapers/packagist.ts +160 -0
- package/src/web/scrapers/pub-dev.ts +143 -0
- package/src/web/scrapers/pubmed.ts +211 -0
- package/src/web/scrapers/pypi.ts +112 -0
- package/src/web/scrapers/rawg.ts +110 -0
- package/src/web/scrapers/readthedocs.ts +120 -0
- package/src/web/scrapers/reddit.ts +95 -0
- package/src/web/scrapers/repology.ts +251 -0
- package/src/web/scrapers/rfc.ts +201 -0
- package/src/web/scrapers/rubygems.ts +103 -0
- package/src/web/scrapers/searchcode.ts +189 -0
- package/src/web/scrapers/sec-edgar.ts +261 -0
- package/src/web/scrapers/semantic-scholar.ts +171 -0
- package/src/web/scrapers/snapcraft.ts +187 -0
- package/src/web/scrapers/sourcegraph.ts +336 -0
- package/src/web/scrapers/spdx.ts +108 -0
- package/src/web/scrapers/spotify.ts +198 -0
- package/src/web/scrapers/stackoverflow.ts +120 -0
- package/src/web/scrapers/terraform.ts +277 -0
- package/src/web/scrapers/tldr.ts +47 -0
- package/src/web/scrapers/twitter.ts +94 -0
- package/src/web/scrapers/types.ts +397 -0
- package/src/web/scrapers/utils.ts +109 -0
- package/src/web/scrapers/vimeo.ts +133 -0
- package/src/web/scrapers/vscode-marketplace.ts +187 -0
- package/src/web/scrapers/w3c.ts +156 -0
- package/src/web/scrapers/wikidata.ts +344 -0
- package/src/web/scrapers/wikipedia.ts +84 -0
- package/src/web/scrapers/youtube.ts +325 -0
- package/src/web/search/index.ts +292 -0
- package/src/web/search/provider.ts +157 -0
- package/src/web/search/providers/anthropic.ts +318 -0
- package/src/web/search/providers/base.ts +89 -0
- package/src/web/search/providers/brave.ts +152 -0
- package/src/web/search/providers/codex.ts +591 -0
- package/src/web/search/providers/exa.ts +400 -0
- package/src/web/search/providers/gemini.ts +460 -0
- package/src/web/search/providers/jina.ts +111 -0
- package/src/web/search/providers/kagi.ts +86 -0
- package/src/web/search/providers/kimi.ts +196 -0
- package/src/web/search/providers/parallel.ts +225 -0
- package/src/web/search/providers/perplexity.ts +730 -0
- package/src/web/search/providers/searxng.ts +313 -0
- package/src/web/search/providers/synthetic.ts +114 -0
- package/src/web/search/providers/tavily.ts +176 -0
- package/src/web/search/providers/utils.ts +128 -0
- package/src/web/search/providers/zai.ts +333 -0
- package/src/web/search/render.ts +262 -0
- package/src/web/search/types.ts +482 -0
- package/src/web/search/utils.ts +17 -0
- package/src/workspace-tree.ts +286 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
// Auto-generated by scripts/generate-docs-index.ts - DO NOT EDIT
|
|
2
|
+
|
|
3
|
+
export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.md","adding-a-provider.md","ai-schema-normalize.md","approval-mode.md","auth-broker-gateway.md","bash-tool-runtime.md","blob-artifact-architecture.md","compaction.md","config-usage.md","custom-tools.md","environment-variables.md","extension-loading.md","extensions.md","fs-scan-cache-architecture.md","gemini-manifest-extensions.md","handoff-generation-pipeline.md","hooks.md","install-id.md","keybindings.md","local-models.md","lsp-config.md","macos-signing-notarization.md","marketplace.md","mcp-config.md","mcp-protocol-transports.md","mcp-runtime-lifecycle.md","mcp-server-tool-authoring.md","memory.md","mnemosyne-memory-backend.md","models.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","plugin-manager-installer-plumbing.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","python-repl.md","render-mermaid.md","resolve-tool-runtime.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","skills.md","skills/authoring-extensions.md","skills/authoring-hooks.md","skills/authoring-marketplaces.md","skills/examples/hello-extension/README.md","skills/examples/mini-marketplace/README.md","skills/examples/safety-hook/README.md","slash-command-internals.md","system-prompt-customization.md","task-agent-discovery.md","theme.md","tools/ask.md","tools/ast-edit.md","tools/ast-grep.md","tools/bash.md","tools/browser.md","tools/checkpoint.md","tools/debug.md","tools/edit.md","tools/eval.md","tools/find.md","tools/github.md","tools/inspect_image.md","tools/irc.md","tools/job.md","tools/lsp.md","tools/read.md","tools/recall.md","tools/reflect.md","tools/render_mermaid.md","tools/resolve.md","tools/retain.md","tools/rewind.md","tools/search.md","tools/search_tool_bm25.md","tools/ssh.md","tools/task.md","tools/todo.md","tools/web_search.md","tools/write.md","tree.md","ttsr-injection-lifecycle.md","tui-core-renderer.md","tui-runtime-internals.md","tui.md"];
|
|
4
|
+
|
|
5
|
+
export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
6
|
+
"ERRATA-GPT5-HARMONY.md": "# ERRATA — GPT-5 Harmony-Header Leakage\n\nHistorical research note, not a current runtime contract. The statistics below\ncome from the named local stats database snapshot, not from checked-in tests or\nruntime code.\n\n## 1. The problem\n\nOpenAI frames tool calls in the Harmony chat protocol:\n\n```\n<|start|>assistant<|channel|>commentary to=functions.<NAME><|message|>{ARGS}<|call|>\n```\n\n`<|channel|>commentary to=functions.NAME` is the **routing header** —\ncontrol tokens consumed by the runtime to dispatch the call. These\ntokens never appear as content under normal operation; the runtime\nstrips them.\n\nThe defect: gpt-5 models occasionally emit, **as ordinary content\ninside `{ARGS}`**, the **plain-text shadow** of these routing tokens —\nthe same characters without the `<|…|>` brackets — and continue\nproducing more pseudo-routing structure (channel name, body marker,\nmultilingual spam, fake tool-result framing). The contamination lives\ninside the visible tool argument and is dispatched to the tool as if it\nwere intended content.\n\n**Critical detail.** The actual `<|start|>` / `<|channel|>` /\n`<|message|>` / `<|call|>` special tokens almost never appear in tool\nargs. What leaks is the bracket-less spelling — `analysis to=functions.X\ncode …` — because OpenAI applies a logit mask suppressing the\ncontrol-token IDs inside the args region. The mass that would have gone\nto those special tokens redistributes onto the un-bracketed plain-text\nrepresentation the model also learned. This makes the leak structurally\ninvisible to the routing parser and lands it in the tool input verbatim.\n\nManifestation in tool args (real corpus example):\n\n```\n~ add_function(iso, ctx, ns, \"installSystemChangeObserver\",\n os_install_system_change_observer);】【\"】【analysis to=functions.edit\n code above เงินไทยฟรีuser to=functions.edit code …\n```\n\nThe leading code is real and intended. Everything after the first\nnon-Latin token through the next clean structural boundary is corruption.\n\n---\n\n## 2. Observed statistics & failure modes\n\nSource: `~/.omp/stats.db` (`ss_tool_calls`, `ss_assistant_msgs`), through\n2026-05-10. 1.05M tool calls scanned.\n\n### 2.1 Rate\n\n| Model | Leaks in tool args | Calls | per million |\n| ------------- | -----------------: | ------: | ----------: |\n| gpt-5.4 | 37 | 226,957 | 163 |\n| gpt-5.3-codex | 17 | 112,243 | 151 |\n| gpt-5.5 | 2 | 80,750 | 25 |\n| gpt-5.2-codex | 0 | — | — |\n\nPlus 15 hits in assistant visible text / thinking blobs.\n\n### 2.2 Tool distribution\n\n| Tool | Hits |\n| ------------------------------ | -----: |\n| `edit` | 38 |\n| `eval` | 11 |\n| `report_tool_issue` | 3 |\n| `grep`/`read`/`search`/`yield` | 1 each |\n\nConcentrated in tools with free-form (non-JSON-schema) argument formats.\n\n### 2.3 Leak shape (deterministic)\n\n```\nLEAK ::= JUNK_PREFIX MARKER CHANNEL_BODY (LEAK)?\nMARKER ::= \"to=functions.\" TOOL_NAME\nCHANNEL_BODY ::= \" code \" (SPAM | reasoning_prose | fake_tool_output)*\nJUNK_PREFIX ::= (GLITCH_TOKEN | CHANNEL_WORD | NON_LATIN_RUN | \"}\" | \"】【\")+\n```\n\n**Cascading is common.** Of 96 marker occurrences across 71 contaminated\nrecords, 39 contain ≥2 markers and 7 contain ≥3 — the model emits\nmultiple fake `to=functions.X code …` blocks back-to-back, often with\nfake `code_output\\nCell N:\\n…` framing between them. Once the\nplain-text scaffolding is in the residual stream, the prefix now _looks\nlike_ a fresh tool envelope start, so the macro prior over continuations\nkeeps voting for more scaffolding. Self-amplifying.\n\n### 2.4 Glitch tokens\n\nSingle-token identifiers in `o200k_base` whose embeddings appear to be\nnear-init from underrepresentation in post-training. ASCII residue\nimmediately before the marker in the natural corpus:\n\n| Surface string | Single-token | Token ID | Hits in corpus |\n| ----------------- | :----------: | -------: | ------------------------------: |\n| `Japgolly` | ✅ | 199,745 | 1 |\n| `Jsii` | ✅ | 114,318 | (subtoken of `Jsii_commentary`) |\n| `Jsii_commentary` | — (3 toks) | — | 2 |\n| `changedFiles` | — (2 toks) | — | 8 |\n| `RTLU` | — (2 toks) | — | 3 |\n\n`Japgolly` is in the last 0.13% of the vocabulary — the same family of\nGitHub-corpus residue that produced `SolidGoldMagikarp` in the 2023\nGPT-2 vocabulary (Rumbelow & Watkins). `SolidGoldMagikarp` itself\ntokenizes to 5 tokens in `o200k_base` — that specific token was retired,\nbut the class wasn't.\n\nFor the multi-token entries, the corpus-level signature is the surface\nstring; the underlying glitch trigger is a sub-token (e.g. `Jsii` inside\n`Jsii_commentary`). The detector list (`G` signal) keys on the surface\nstrings.\n\nStable across unrelated sessions. Treated as a high-precision detector\nsignal.\n\n### 2.5 Channel-word leakage\n\n`analysis` (5), `assistant` (5), `commentary` (3), `user` (1) appear\ndirectly preceding `to=`. Always bare words; never `<|channel|>analysis`\nor any other bracketed form. Consistent with §1 — the brackets are\nmasked, the words are not.\n\n### 2.6 Non-Latin spam residue\n\n96 marker hits, by script: CJK 40, Cyrillic 12, Telugu/Kannada/Malayalam\n18, Thai 8, Georgian 7, Armenian 7, Arabic 1. Recurring fragments are\nChinese gambling SEO (`大发时时彩`, `天天中彩票`), Georgian/Abkhaz junk,\nand Thai casino spam — well-known low-quality crawl residue.\n\nThis is the same script distribution observed in the controlled\nreproduction (§7.3), independent of the prompt's natural language.\n\n### 2.7 Failure-mode breakdown for the `edit` tool\n\nThe `edit` tool exists in two variants in the corpus:\n\n| Variant | Calls | Recovery |\n| ------------------------------------ | ----: | ---------------------------------------------------------------------------------------------------------------------------------------------------- |\n| Patch-DSL (`§PATH`/anchor/`«»≔` ops) | 27 | **Recoverable** by op-truncation (§3.3) |\n| JSON-schema (`{path,edits:[…]}`) | 11 | **Not recoverable** — contamination is escaped _inside_ JSON strings, parser accepts it cleanly, content would be written verbatim into source files |\n\nFor Patch-DSL leaks specifically:\n\n- 20/27 cases: contamination on the last input line; nothing follows.\n- 7/27 cases: contamination mid-input; what follows is one of: a\n duplicate replay of an earlier file/anchor, intended content for a\n _different_ tool call (the model started its next call inline), or\n pure hallucination. Post-contamination content is never trustworthy.\n\n### 2.8 Mechanism (confirmed)\n\n**Prior collapse from null-embedding glitch tokens, into a\ncontrol-token-masked basin whose mass redistributes onto the\nplain-text shadow of the Harmony protocol.**\n\nStep by step:\n\n1. The model is mid-`{ARGS}` of a Harmony tool call. The runtime applies\n a logit mask suppressing structural control tokens (`<|channel|>`,\n `<|message|>`, `<|call|>`, `<|start|>`, `<|end|>`) inside the args\n region. Without this mask, normal generation would constantly\n hallucinate envelope-closes; with it, those token IDs have logit\n `-∞` in args.\n2. A glitch token `g` is sampled. By construction `g` was in the BPE\n merge corpus but barely in LM/RL training, so its **input embedding\n `e_g` ≈ near-init noise of small norm**.\n3. At position t+1, the residual update `h_{t+1} ≈ LN(h_t + e_g + Attn +\nMLP)` is dominated by the prefix-derived terms; the just-emitted-token\n signal is effectively absent. Generation diversity normally comes\n from `e_x` steering the residual into different sub-regions —\n stripped here.\n4. The next-token distribution therefore collapses onto the **conditional\n prior over continuations of the prefix, with local conditioning\n removed**. In a tool-calling rollout context, that prior is sharply\n peaked on Harmony scaffolding (control tokens + routing tokens) —\n that's what RL trained.\n5. The mask zeros the control-token IDs. Mass redistributes onto the\n **next-best continuation**: the un-bracketed surface-form spelling of\n the same protocol (`analysis`, `commentary`, ` to=functions.X`,\n `code`). This spelling is unmasked because those characters are\n ordinary tokens.\n6. Once a few tokens of plain-text scaffolding land in the residual\n stream, the prefix now resembles a fresh envelope start. The macro\n prior keeps voting for more scaffolding. Cascading (§2.3) follows.\n7. Multilingual spam after the marker is the same prior-collapse\n continuation, drawn from the training neighborhood of the glitch\n token (often ESL/auto-generated multilingual web junk — exactly the\n crawl residue in §2.6).\n\n**Two corollaries the corpus data demanded but only the experiment\nexplained:**\n\n- **The brackets never appear** (§1, §2.5). The mask is what makes the\n leak land in plain text instead of as a real envelope-close.\n- **Counterintuitive grammar dependency** (§7.4). The leak is _worse_ in\n formats closest to OpenAI's training distribution. Off-distribution\n custom grammars dampen the macro-prior basin; the official\n `*** Begin Patch` format is the strongest collapse target.\n\nThe 2023 SolidGoldMagikarp paper documented mechanism (1)+(2)+(4). The\nnew piece is (5): when constrained decoding masks the natural collapse\ntarget, the mass laundered through the un-masked plain-text shadow\nbecomes a structurally-invisible exfiltration channel.\n",
|
|
7
|
+
"adding-a-provider.md": "# Adding a provider\n\nA provider is described in two halves:\n\n- **Catalog half** (`packages/catalog`): one entry in the `CATALOG_PROVIDERS`\n table (`packages/catalog/src/provider-models/descriptors.ts`) carrying the\n `id`, `defaultModel`, runtime model-discovery factory, and catalog-generation\n wiring. `KnownProvider`, `PROVIDER_DESCRIPTORS`, and\n `DEFAULT_MODEL_PER_PROVIDER` are derived from this table.\n- **Auth half** (`packages/ai`): one declarative `ProviderDefinition` in the\n registry carrying env-key fallbacks and login/refresh flows. The\n `OAuthProvider` union, the env-key map, the `/login` provider list, the\n `refreshOAuthToken` / `AuthStorage.login` dispatch, and the coding-agent\n callback maps are derived from the registry.\n\n**Scope.** This is for a provider that reuses an existing wire API\n(`openai-completions`, `anthropic-messages`, `google-generative-ai`, …) — the\ncommon case for gateways and API-key providers, since stream dispatch keys on\n`model.api`, not `model.provider`. Adding a *new wire protocol* (a new\n`KnownApi`) is a separate task that also touches `stream.ts` dispatch,\n`api-registry.ts`, and the catalog `types.ts`.\n\n## Shape\n\nFor the common case, a provider is **one catalog entry + one def file + one registry line**:\n\n1. **Add an entry to `CATALOG_PROVIDERS`** in\n `packages/catalog/src/provider-models/descriptors.ts` with the `id`,\n `defaultModel`, the plain API-key env var(s) as `envVars`, and (usually) a\n `createModelManagerOptions` factory. For a\n simple OpenAI-compatible gateway, build the factory in\n `packages/catalog/src/provider-models/openai-compat.ts` or inline with the\n exported `createSimpleOpenAICompletionsOptions(providerId, baseUrl, config)`.\n2. **Create `packages/ai/src/registry/<id>.ts`** exporting one\n `export const <camelId>Provider = { … } as const satisfies ProviderDefinition;`\n with the auth fields (`login`, …). Plain env-var names live in the catalog\n entry's `envVars`; set `envKeys` only for computed resolvers (Foundry/ADC/\n Bedrock-style probes).\n3. **Add it to the `ALL` array** in `packages/ai/src/registry/registry.ts`\n (one import + one array entry). `ALL` order is the `/login` list order for\n loginable providers.\n\nThat is the full change for:\n- env-key-only providers,\n- providers with a simple inline API-key login flow,\n- most OpenAI-compatible gateways.\n\nFor a **non-trivial provider-local OAuth flow**, put the implementation in\n`packages/ai/src/registry/oauth/<vendor>.ts` and lazy-import it from the def\nfile. The shared OAuth flow infrastructure it builds on lives in the same\n`registry/oauth/` directory.\n\nDescriptors, the default-model map, env-key map, login list, and refresh\ndispatch all update automatically; the `KnownProvider` union gains the new id\nfrom the catalog table and `OAuthProvider` from the registry.\n\n## Field reference\n\n**Catalog table entry** (`ProviderCatalogEntry`, see\n`packages/catalog/src/provider-models/descriptor-types.ts` for JSDoc):\n\n| Field | Effect |\n|---|---|\n| `id` | Required. Member of `KnownProvider`. |\n| `defaultModel` | Required. Preferred model when no explicit selection is made. |\n| `envVars` | Env var name(s), in order, for the runtime API-key fallback (`getEnvApiKey`). |\n| `createModelManagerOptions` | Runtime model-discovery factory. Present (and not `specialModelManager`) ⇒ appears in `PROVIDER_DESCRIPTORS`. |\n| `allowUnauthenticated` | Runtime creates a model manager even without a key. |\n| `dynamicModelsAuthoritative` | Successful discovery replaces bundled models. |\n| `catalogDiscovery` | `{ label, envVars?, oauthProvider?, allowUnauthenticated? }` for offline catalog generation (`generate-models.ts`). `envVars` here overrides the entry-level list when generation uses different credentials (e.g. `cursor`). |\n| `specialModelManager` | Bespoke runtime factory (`google-antigravity` / `google-gemini-cli` / `openai-codex`); excluded from `PROVIDER_DESCRIPTORS`. |\n\n**Registry definition** (`ProviderDefinition`, see\n`packages/ai/src/registry/types.ts`):\n\n| Field | Effect |\n|---|---|\n| `id`, `name` | Required. `name` shows in the `/login` list. |\n| `envKeys` | Computed env fallback for `getEnvApiKey`, overriding the catalog entry's `envVars`: a var name string or a `() => string \\| undefined` resolver. Omit when `envVars` covers it. |\n| `login` | Interactive login. Present ⇒ member of `OAuthProvider`, shown in `/login`, dispatchable via `AuthStorage.login`. Returns an api-key `string` or `OAuthCredentials`. |\n| `refreshToken` | OAuth refresher; omit for static-token providers (the dispatch returns credentials unchanged). |\n| `storeCredentialsAs` | Store credentials under a different provider id (e.g. `openai-codex-device` ⇒ `openai-codex`). |\n| `callbackPort` | Present ⇒ entry in the auth-broker `CALLBACK_PORTS` map. |\n| `pasteCodeFlow` | OAuth flow needs a pasted code/redirect URL ⇒ member of `PASTE_CODE_LOGIN_PROVIDERS`. |\n\n## Conventions\n\n- Use `... as const satisfies ProviderDefinition` so the literal `id` is preserved\n for the union derivation.\n- `login` / `refreshToken` for simple API-key or validation-based flows can live\n directly in the provider def file (export the named login function there so\n tests can import it directly).\n- `login` / `refreshToken` for heavy provider-local OAuth flows MUST reach the\n adjacent `registry/oauth/*` module via a dynamic-import\n thunk (`const { loginX } = await import(\"./oauth/x\"); return loginX(cb);`),\n keeping those flows out of the eager startup graph.\n- All OAuth code lives under `registry/oauth/`: the shared flow infra\n (`callback-server`, `pkce`, `google-oauth-shared`, `types`, the runtime API\n `index`) plus every provider flow, including the `github-copilot` / `kimi` /\n `openai-codex` helpers reused by the streaming and usage layers. The non-OAuth\n API-key helpers (`api-key-login`, `api-key-validation`) sit beside the def\n files in `registry/`, since they back simple paste-an-API-key logins.\n- For a simple OpenAI-compatible gateway, build the manager inline with the\n exported `createSimpleOpenAICompletionsOptions(providerId, baseUrl, config)` —\n no edits to `openai-compat.ts` required.\n- A `ProviderDefinition` may also be registered at runtime by an extension via\n `registerOAuthProvider` (the `AuthStorage.login` dispatcher handles built-ins\n and extensions through the same path).\n",
|
|
8
|
+
"ai-schema-normalize.md": "# AI tool-schema normalization\n\n`@oh-my-pi/pi-ai` exposes one unified schema normalizer that providers consume\nbefore tools are sent on the wire. All walkers live in\n`packages/ai/src/utils/schema/normalize.ts`; the operational contract is\n`packages/ai/src/utils/schema/CONSTRAINTS.md`.\n\nThere is no separate `strict-mode.ts` module any more — OpenAI strict-mode\nsanitization, OpenAI Responses `oneOf` rewriting, Google/Vertex/Gemini-CLI\nsanitization, Cloud Code Assist Claude sanitization, and MCP sanitization all\nshare the same option-driven walk.\n\n## Entry points\n\nAll exports live under `@oh-my-pi/pi-ai/utils/schema`:\n\n- `normalizeSchema(value, options)` — generic option-driven walker.\n- `normalizeSchemaForGoogle(value)` — Gemini / Vertex / Gemini CLI.\n- `normalizeSchemaForCCA(value)` — Cloud Code Assist Claude (Antigravity + GCA).\n- `normalizeSchemaForMCP(value)` — MCP inputSchemas before they enter the\n custom-tool registry. `tool-bridge.ts` runs every MCP `inputSchema` through\n this dispatcher.\n- `normalizeSchemaForOpenAIResponses(schema)` (alias\n `sanitizeSchemaForOpenAIResponses`) — rewrites `oneOf` → `anyOf` for the\n Responses family.\n- `sanitizeSchemaForStrictMode(schema)` and\n `enforceStrictSchema(schema)` / `tryEnforceStrictSchema(schema)` — the\n OpenAI strict-mode pipeline (sanitize → enforce). All three are exported\n from `normalize.ts`.\n- `adaptSchemaForStrict(schema, strict)` from `./adapt` — thin composer that\n wraps `tryEnforceStrictSchema` for provider call sites and consults\n `PI_NO_STRICT` (env `PI_NO_STRICT`) for the global bypass.\n\nRemoved in the unified-flow refactor:\n\n- `strict-mode.ts` (merged into `normalize.ts`).\n- `sanitize-google.ts` and `normalize-cca.ts` (replaced by\n `normalizeSchemaFor*` dispatchers).\n- `StringEnum` helper — use `z.enum([...])` directly; Zod's emitted JSON\n Schema is already wire-compatible with Google and other providers.\n- `sanitizeSchemaFor{Google,CCA,MCP}` / `prepareSchemaForCCA` — renamed to\n `normalizeSchemaFor{Google,CCA,MCP}`.\n\n## Dispatcher mapping\n\n| Provider transport(s) | Dispatcher |\n| ------------------------------------------------------------------ | ------------------------------------------- |\n| `openai-completions`, `openai-responses`, `openai-codex-responses` | `adaptSchemaForStrict` (sanitize + enforce) |\n| `openai-responses` family (`oneOf` → `anyOf` only) | `normalizeSchemaForOpenAIResponses` |\n| `google-generative-ai`, `google-vertex`, Gemini CLI | `normalizeSchemaForGoogle` |\n| Cloud Code Assist Claude (Antigravity + GCA, `claude-*` model ids) | `normalizeSchemaForCCA` |\n| MCP `inputSchema` ingestion | `normalizeSchemaForMCP` |\n| `anthropic-messages` (native, not CCA) | per-provider whitelist in `anthropic.ts` |\n\nGemini CLI / Antigravity CCA MUST run the full `normalizeSchemaForCCA`\npipeline (not just the first keyword-stripping pass) to keep parity with the\nshared Google Claude path.\n\n## Walk semantics\n\n`normalizeSchema` first detoxifies serialized Zod-instance-shaped inputs, upgrades them to\nJSON Schema 2020-12, dereferences the tree, then walks it with the option set\npinned by the dispatcher. Each node:\n\n1. Renames `snake_case` combinator/property keys to camelCase\n (`any_of` → `anyOf`, etc.; collisions follow python-genai\n `pop(from)`/`set(to)` semantics — snake_case wins).\n2. Applies the `handle_null_fields` collapse for nullable unions before\n recursing into children.\n3. Strips keys the target provider does not support, optionally lifting\n human-meaningful keys (`pattern`, `format`, min/max, `default`,\n `examples`, ...) into the sibling `description` via the spill formatter\n (`spill.ts`). Structural/meta keys (`$ref`, `$defs`,\n `additionalProperties`) are not spilled.\n4. Normalizes type unions (`type: [\"T\", \"null\"]` → `type: \"T\"` + nullable\n marker on Google, plain `type: \"T\"` on CCA).\n5. Collapses object-only / same-type combiners, optionally lossy-collapses\n mixed-type combiners (CCA only), and runs the residual-combiner fixpoint.\n6. Validates against AJV 2020 when `validateAndFallback` is set (CCA path)\n and emits the per-tool fallback `{ \"type\": \"object\", \"properties\": {} }`\n on residual incompatibility — `type` array, `type: \"null\"`, `nullable`\n key, or any remaining `anyOf`/`oneOf`/`allOf`.\n\n## OpenAI strict-mode pipeline\n\n`adaptSchemaForStrict(schema, strict)` runs `tryEnforceStrictSchema`,\nwhich composes:\n\n1. **Sanitize** (`sanitizeSchemaForStrictMode`): strips non-structural\n keywords (`format`, `pattern`, min/max, `examples`, `default`,\n `if`/`then`/`else`, `not`, `unevaluated*`, `patternProperties`,\n `dependent*`, `content*`, `min/maxProperties`, `$dynamicRef`, etc.). The\n `default` value is inlined into the sibling `description` as\n ` (default: X)` before being dropped, unless `description` already\n contains `(default:` or no `description` exists.\n2. **Enforce** (`enforceStrictSchema`): every object node gets\n `additionalProperties: false`, every property goes into `required`, and\n optional properties become nullable unions\n (`anyOf: [<original>, { \"type\": \"null\" }]`). Tuple `prefixItems` are\n strictified recursively.\n\nThe two passes use cache/cycle guards, so refs, `allOf`, and nullable wrapping\nstay deterministic without recursing forever. `tryEnforceStrictSchema` is\nfail-open: if anything throws, it returns `{ strict: false, schema: upgraded }`\nso callers MUST emit `strict: true` only when enforcement actually succeeded.\n\n### Edge cases the strict-mode normalizer handles\n\n- **Local `$ref` inlining.** OpenAI strict mode rejects\n `{ \"$ref\": \"...\", \"description\": \"...\" }` with sibling keys. The\n sanitizer pre-resolves local `#/...` refs against the root and merges\n with **sibling keys winning** over the resolved def — same precedence\n as `openai-python`'s `_ensure_strict_json_schema`. Recursive refs are\n guarded by the per-walk epoch.\n- **Single-item `allOf`.** A `{ \"allOf\": [X], ...siblings }` collapses to\n `{ ...X, ...siblings }` with the inlined entry's keys winning over the\n original siblings (matches `openai-python`'s `_pydantic.py:79-83`). Multi-\n item `allOf` is left intact for the downstream validator to reject if\n needed.\n- **Type-array branches and nullable unions.** When a node has\n `type: [\"T\", \"U\"]`, the sanitizer emits one variant schema per type,\n pruning type-specific keywords (e.g. `properties`/`required` only stay on\n the `object` variant, `items` only on the `array` variant). The shared\n `description` is **hoisted onto the `anyOf` wrapper** instead of being\n duplicated on every branch — so a strict nullable union becomes\n `{ anyOf: [T, { type: \"null\" }], description: \"...\" }`, not\n `anyOf: [{ ..., description }, { ..., description }]`.\n- **Enum/const without a `type`.** Both sanitize and enforce paths call\n `inferStrictPrimitiveTypeFromEnumOrConst` to infer the primitive `type`\n from `enum` / `const` values. Mixed-primitive enums (`[1, \"two\", null]`),\n enums containing objects/arrays, and non-primitive `const` values\n (`{a:1}`, `[1,2,3]`) cannot be described by a single `type` keyword and\n trigger the strict-mode fail-open path — emitting a typeless schema\n would just be rejected on the wire by OpenAI.\n\n## Performance: static fingerprint cache\n\n`resolveProviderModels` in `packages/ai/src/model-manager.ts` and\n`readModelCache`/`writeModelCache` in `model-cache.ts` cooperate via a\nschema-v3 `static_fingerprint` column on the `model_cache` SQLite table.\n\n- `fingerprintStatic(staticModels)` hashes the static catalog slice\n (`Bun.hash(JSON.stringify(models))` in base36) and memoizes the result\n in a per-process `WeakMap` keyed by the array reference. Multiple\n cold-start arms calling `resolveProviderModels` with the same\n `staticModels` array pay the JSON+hash cost once.\n- On cache read, if the network fetch is being skipped, the cached row is\n fresh + authoritative, and the cached `static_fingerprint` matches the\n current one, `resolveProviderModels` returns the cached models verbatim\n — the cache already incorporates the same static state, so re-running\n `mergeDynamicModels(static, cache)` would just rebuild the same objects.\n- `mergeModelSources` and `mergeDynamicModels` short-circuit on\n empty-source inputs (the common shape after `(static, [])` or for\n providers without a static catalog), avoiding Map churn entirely.\n\nCache rows written before schema v3 are dropped by the cache-version\ncheck; the column defaults to `''` for any row that survives a version\nupgrade so the fingerprint-equality check naturally fails closed and the\nfull merge re-runs.\n\n## Related\n\n- `docs/models.md` — registry, equivalence, compat flags\n (`supportsStrictMode`, `toolStrictMode`, `disableStrictTools`).\n- `docs/provider-streaming-internals.md` — how the normalized schemas are\n used downstream during the provider stream loop.\n- `docs/mcp-server-tool-authoring.md` — MCP `inputSchema` ingestion via\n `normalizeSchemaForMCP`.\n- `packages/ai/src/utils/schema/CONSTRAINTS.md` — operational contract for\n every normalization rule.\n",
|
|
9
|
+
"approval-mode.md": "# Tool approval mode\n\nTool approval has two independent inputs:\n\n1. **Tool declaration** — every tool may declare an `approval` tier:\n - `read`: reads data or updates UI-only session metadata.\n - `write`: mutates workspace/session state but does not execute arbitrary code.\n - `exec`: executes code, shells out, drives a browser, spawns agents, or performs similarly broad actions.\n2. **User policy** — `tools.approval.<toolName>: allow | deny | prompt` overrides the mode for that tool unless a non-yolo safety override forces a prompt.\n\nTools without an `approval` declaration are treated as `exec`. This is the safe default for MCP and unknown custom tools.\n\n## Modes\n\nConfigure with `tools.approvalMode`:\n\n| Mode | Auto-approves | Prompts for |\n| ---------------- | ----------------------- | --------------- |\n| `always-ask` | `read` | `write`, `exec` |\n| `write` | `read`, `write` | `exec` |\n| `yolo` (default) | `read`, `write`, `exec` | none |\n\n`--auto-approve` and `--yolo` force `tools.approvalMode: yolo` for the session.\n\n## User overrides\n\n`tools.approval` is honored in every mode:\n\n```yaml\ntools:\n approvalMode: write\n approval:\n bash: prompt\n read: allow\n mcp__filesystem__delete: deny\n```\n\nResolution per tool call:\n\n1. Compute the tool's approval decision from `tool.approval(args)`; omitted means `exec`.\n2. Normalize `tools.approval.<tool>` if present; invalid values are ignored.\n3. In `yolo` mode, the user policy is used when present; otherwise the call is allowed. Safety `override` reasons do not force a prompt in `yolo`.\n4. In non-yolo modes, if the tool sets `override: true`, `deny` is blocked and all other cases prompt, even if user policy says `allow`.\n5. Otherwise, a valid user policy wins.\n6. Otherwise, the active mode auto-approves or prompts by tier.\n\n## Safety overrides\n\nA tool can force a prompt with object-form approval:\n\n```ts\napproval: { tier: \"exec\", override: true, reason: \"Critical pattern detected\" }\n```\n\n`bash` uses this for critical destructive patterns such as `rm -rf /`, fork bombs, remote-fetch-then-execute, writes to `/etc/passwd`, and host shutdown commands. These surface as `reason` in the approval prompt, but in `yolo` mode they are auto-approved unless a user policy for the tool is set to `prompt` or `deny`.\n\n## Per-tool prompt details\n\nTools can add approval-prompt body lines with `formatApprovalDetails(args)`. The standard prompt includes:\n\n- `Allow tool: <name>`\n- `Origin: MCP server tool` for unannotated `mcp__...` tools\n- `Reason: <reason>` when the tool decision supplies one\n- tool-specific details such as command, path, code, browser action, or subagent assignment\n\n## Defining approval on tools\n\nBuilt-in and custom tools share the same shape:\n\n```ts\nexport type ToolTier = \"read\" | \"write\" | \"exec\";\nexport type ToolApprovalDecision = ToolTier | { tier: ToolTier; reason?: string; override?: boolean };\nexport type ToolApproval = ToolApprovalDecision | ((args: unknown) => ToolApprovalDecision);\n\napproval?: ToolApproval;\nformatApprovalDetails?: (args: unknown) => string | string[] | undefined;\n```\n\nExamples:\n\n```ts\napproval: \"read\";\n\napproval: (args) => (LSP_READONLY_ACTIONS.has(args.action) ? \"read\" : \"write\");\n\napproval: (args) =>\n isCritical(args.command)\n ? { tier: \"exec\", override: true, reason: \"Critical pattern detected\" }\n : \"exec\";\n```\n\n## Subagents\n\nSubagents run headless with `tools.approvalMode: yolo` so they do not stall waiting for UI. The parent `task` approval is the authorization boundary. User `tools.approval.<tool>` settings continue to control whether a tool is allowed, prompted, or blocked.\n",
|
|
10
|
+
"auth-broker-gateway.md": "# Auth Broker and Auth Gateway\n\nThe auth broker and auth gateway are two cooperating HTTP services that move OAuth refresh tokens and provider access tokens off developer laptops and into a single broker host.\n\n- **`omp auth-broker serve`** holds the canonical SQLite credential vault, performs OAuth refreshes, and exposes a small REST API (`/v1/snapshot`, `/v1/snapshot/stream`, `/v1/credential/:id/refresh`, `/v1/credential/:id/disable`, `/v1/credential`, `/v1/usage`, `/v1/healthz`).\n- **`omp auth-gateway serve`** is a forward-proxy. It accepts OpenAI Chat Completions, Anthropic Messages, OpenAI Responses, and pi-native stream requests, resolves the broker-backed credential, and dispatches through `pi-ai` provider logic. Clients (containerised omp, llm-git, the macOS usage widget, …) never see the access token.\n\nTransport security between operator, broker, and gateway is delegated to the operator (Tailscale / Wireguard / reverse proxy + TLS). Every endpoint except `/v1/healthz` (broker) and `/healthz` (gateway) requires a bearer token.\n\nSource: `packages/ai/src/auth-broker/`, `packages/ai/src/auth-gateway/`, `packages/coding-agent/src/cli/auth-broker-cli.ts`, `packages/coding-agent/src/cli/auth-gateway-cli.ts`, `packages/coding-agent/src/session/auth-broker-config.ts`.\n\n## Data flow\n\n```\n ┌────────────────────────────────────────────────────────────┐\n │ broker host │\n │ │\n developer ──▶ │ ┌──────────────────────────┐ ┌────────────────────┐ │\n laptop / │ │ omp auth-broker serve │◀──▶│ SQLite agent.db │ │\n CI / robomp │ │ - holds refresh tokens │ │ (canonical writer)│ │\n │ │ - background refresher │ └────────────────────┘ │\n │ │ /v1/{snapshot,refresh,…}│ │\n │ └─────────┬────────────────┘ │\n │ │ bearer ($CONFIG_DIR/auth-broker.token) │\n │ ▼ │\n │ ┌──────────────────────────┐ │\n │ │ omp auth-gateway serve │ RemoteAuthCredentialStore │\n │ │ /v1/{chat,messages,…} │ receives snapshot stream, │\n │ │ /v1/usage,/v1/models │ refreshes credentials by id │\n │ │ /v1/credentials/check │ via the broker on expiry │\n │ └─────────┬────────────────┘ │\n └────────────┼───────────────────────────────────────────────┘\n │ bearer ($CONFIG_DIR/auth-gateway.token)\n ▼\n gateway clients\n (llm-git, macOS widget, robomp containers, IDE plugins, …)\n │\n ▼ provider request with broker-resolved credential\n api.anthropic.com / api.openai.com / …\n```\n\nThe broker is the only writer of OAuth refresh tokens. Clients (including the gateway itself) load a redacted snapshot in which every `refresh` field has been replaced with `REMOTE_REFRESH_SENTINEL`; when an access token expires the client calls `POST /v1/credential/:id/refresh` and the broker performs the refresh server-side. `RemoteAuthCredentialStore` rejects local replace/upsert/delete-by-provider mutations, with errors pointing at `omp auth-broker login` / `omp auth-broker logout`.\n\n## auth-broker\n\n### CLI\n\n```\nomp auth-broker serve [--bind=host:port] # boot the broker\nomp auth-broker token [--regenerate] [--json] # print or rotate the bearer token\nomp auth-broker login [<provider>] [--via=user@host] [--dry-run]\nomp auth-broker logout [<provider>]\nomp auth-broker list [--json]\nomp auth-broker import <file|dir> [--provider=<id>] [--include-disabled] [--dry-run] [--json]\nomp auth-broker migrate --from-local [--include-oauth] [--include-env] [--dry-run] [--json]\nomp auth-broker status [--json]\n```\n\n- `serve` opens the local SQLite store at `getAgentDbPath()` and binds an HTTP listener (default `127.0.0.1:8765`). On startup a token is ensured at `<config-dir>/auth-broker.token` (mode `0600`, `0700` parent dir). The background refresher refreshes any OAuth credential whose `expires - Date.now() < refreshSkewMs` (default 5 min) every `refreshIntervalMs` (default 60 s).\n- `token` prints the cached bearer or generates a new one. `--regenerate` rotates it.\n- `login [<provider>]` runs the per-provider OAuth flow locally — when no provider is supplied, it falls back to an interactive numbered picker. With `--via=user@host` it shells out `ssh -L <callback-port>:127.0.0.1:<callback-port> user@host omp auth-broker login <provider>` so the OAuth callback hits the local browser but the credential is written on the broker host (`--via` requires `<provider>`). Built-in callback ports: `anthropic:54545`, `openai-codex:1455`, `google-gemini-cli:8085`, `google-antigravity:51121`, `gitlab-duo:8080`. The OAuth dance is driven in-process via `AuthStorage.login()` — there is no longer a `pi-ai` bin to spawn.\n- `logout [<provider>]` deletes every credential row for `<provider>`. With no argument it shows an interactive numbered picker of currently-stored providers.\n- `list` enumerates every registered OAuth provider id/name (the union of built-ins + `registerOAuthProvider` custom providers). `--json` emits a machine-readable array.\n- `import <file|dir>` imports CLIProxyAPI-style JSON credentials into the local SQLite store. Maps `type` field → omp provider (`claude → anthropic`, `codex → openai-codex`, `gemini → google-gemini-cli`, `antigravity → google-antigravity`, `gemini-cli → google-gemini-cli`).\n- `migrate --from-local` uploads local SQLite credentials to the configured broker (`POST /v1/credential`). Local API keys are included by default; local OAuth rows are skipped unless `--include-oauth` is set; environment-derived API keys are skipped unless `--include-env` is set. Re-runs are idempotent against the broker snapshot.\n- `status` health-pings the configured remote broker.\n\n### Endpoints\n\n| Method | Path | Auth | Purpose |\n| ------ | ---------------------------- | ------ | ------------------------------------------------------- |\n| `GET` | `/v1/healthz` | none | Liveness + version |\n| `GET` | `/v1/snapshot` | bearer | Redacted snapshot (refresh tokens replaced by sentinel) |\n| `GET` | `/v1/snapshot/stream` | bearer | SSE snapshot stream with delta events and keepalives |\n| `POST` | `/v1/credential` | bearer | Upsert one OAuth or API-key credential |\n| `POST` | `/v1/credential/:id/refresh` | bearer | Force-refresh one OAuth credential |\n| `POST` | `/v1/credential/:id/disable` | bearer | Disable one credential with a recorded cause |\n| `GET` | `/v1/usage` | bearer | Aggregate `UsageReport[]` across credentials |\n\nRequests use `Authorization: Bearer <token>`. The server compares against an in-memory token allow-list; the gateway’s implementation uses a timing-safe comparison.\n\n### Background refresher\n\n`AuthBrokerRefresher` iterates active OAuth credentials at `refreshIntervalMs` cadence and refreshes any within `refreshSkewMs` of expiry. Refreshes are single-flighted per credential id so a slow refresh cannot be retriggered. The refresher distinguishes:\n\n- **definitive failures** (`invalid_grant`, `invalid_token`, `revoked`, unauthorized refresh-token, 401/403 not from a network blip) — credentials are passed to `AuthStorage.disableCredentialById(id, cause)` so the next snapshot pull surfaces a clean delete on the client;\n- **transient failures** (timeout / ECONNREFUSED / fetch failed) — left in place for the next sweep.\n\n## auth-gateway\n\n### CLI\n\n```\nomp auth-gateway serve [--bind=host:port] [--no-auth]\nomp auth-gateway token [--regenerate] [--json]\nomp auth-gateway status [--json]\nomp auth-gateway check [--strict] [--json]\n```\n\n- `serve` requires `OMP_AUTH_BROKER_URL` (or `auth.broker.url` in `config.yml`) — the gateway is itself a broker client. It calls `AuthBrokerClient.fetchSnapshot()`, wraps it in `RemoteAuthCredentialStore`, and constructs an `AuthStorage` that resolves access tokens through the broker. Default bind is `127.0.0.1:4000`. The gateway token is stored at `<config-dir>/auth-gateway.token` (`0600`); `--no-auth` disables the bearer check entirely (loopback-only use).\n- `token` / `status` manage and inspect the gateway bearer token and upstream broker readiness.\n- `check` probes broker-backed credentials through the gateway store. Without `--strict` it uses provider usage probes; `--strict` also exercises each credential against its chat-completion endpoint and can consume a small amount of quota.\n\n### Endpoints\n\n| Method | Path | Auth | Purpose |\n| ------ | ----------------------- | ------ | ------------------------------------------------------------ |\n| `GET` | `/healthz` | none | Liveness + version |\n| `GET` | `/v1/usage` | bearer | Aggregate `UsageReport[]` (proxied through `AuthStorage`) |\n| `GET` | `/v1/models` | bearer | Bundled-model catalog filtered to providers with credentials |\n| `GET` | `/v1/credentials/check` | bearer | Per-credential auth health probe |\n| `POST` | `/v1/chat/completions` | bearer | OpenAI Chat Completions wire format |\n| `POST` | `/v1/messages` | bearer | Anthropic Messages wire format |\n| `POST` | `/v1/responses` | bearer | OpenAI Responses wire format |\n| `POST` | `/v1/pi/stream` | bearer | Native `pi-ai` stream wire format |\n\nThe model id is read from the top-level `model` field for foreign wire formats and from the pi-native request body for `/v1/pi/stream`. The gateway picks the first bundled `Model<Api>` matching that id, parses the inbound wire format into an omp `Context`, resolves the provider credential from broker-backed `AuthStorage`, dispatches through `streamSimple()`, and re-encodes the result to the inbound format (SSE for streamed responses).\n\nThere is no raw provider passthrough path. All supported routes go through `pi-ai` provider logic so credential-specific request shaping, OAuth refresh-on-auth-error, and provider quirks stay centralized.\n\n`idleTimeout` on the underlying `Bun.serve` is set to `255 s` so long thinking-budget calls do not get killed by Bun’s default idle timeout.\n\n## Usage cache: server-side 5-min jitter + client-side 15 s single-flight\n\nTwo layers cache the aggregate provider-usage report. Both are intentional and stacked.\n\n### Server-side cache (broker `AuthStorage`)\n\n`AuthStorage` caches each credential’s `UsageReport` in the broker’s SQLite store at a **5-minute per-credential TTL with ±25 % jitter**. Anthropic and OpenAI rate-limit `/usage` aggressively per source IP, and a synchronized 5-credential fan-out trips 429s every cycle; the jitter decorrelates refresh times within a few cycles. On fetch failure the store keeps the **last-good** report for up to 24 h with a short jittered re-poll window — so a transient upstream blip never blanks out the widget.\n\nConstants: `USAGE_REPORT_TTL_MS = 5 * 60_000`, `USAGE_LAST_GOOD_RETENTION_MS = 24 * 60 * 60_000` (`packages/ai/src/auth-storage.ts`).\n\n### Client-side single-flight (`RemoteAuthCredentialStore`)\n\nWhen the gateway (or any other broker client) calls `fetchUsageReports()` / `getUsageReport(provider, credential)`, `RemoteAuthCredentialStore` coalesces concurrent calls into a single `GET /v1/usage` round-trip and caches the result for **15 s** in memory.\n\n- `USAGE_CACHE_TTL_MS = 15_000` (`packages/ai/src/auth-broker/remote-store.ts`).\n- A single `#usageInflight` promise is shared across all callers; a per-caller `AbortSignal` is **raced** against the shared promise, not threaded into it, so one caller’s abort never cascades into a peer’s in-flight request.\n- On fetch failure the rejected promise is logged and the awaited value is `null` — callers (`AuthStorage.fetchUsageReports`, `#getUsageReport`) treat a `null` report as \"no usage signal for this cycle\" and proceed without it. **This is the 15 s TTL fallback**: the client absorbs transient broker outages by suppressing the error, returning `null` to ranking, and re-attempting after the 15 s window.\n\nThe 15 s client window deliberately sits below the broker’s 5 min server cache, so almost every client poll is served from the broker’s already-cached value; the client cache exists to absorb the parallel fan-out generated by `AuthStorage.#rankOAuthSelections` into a single broker round-trip.\n\n## Client snapshot cache\n\n`discoverAuthStorage()` persists the broker snapshot to `~/.omp/cache/auth-broker-snapshot.enc` after the initial `/v1/snapshot` fetch and after later broker-sourced full snapshots. The file is AES-256-GCM encrypted with `SHA-256(OMP_AUTH_BROKER_TOKEN)` and authenticated with the broker URL as additional data, so changing either the token or URL makes the cache unreadable. The file is written atomically with mode `0600`.\n\nFreshness is anchored to the broker-stamped `snapshot.generatedAt`, not local write time. Default TTL is 1 h (`OMP_AUTH_BROKER_SNAPSHOT_TTL_MS`); `0` disables the cache and restores the old always-fetch boot path. When the cached snapshot is still fresh, `omp` boots from it and skips the blocking `/v1/snapshot` query. `RemoteAuthCredentialStore` still starts its normal SSE / long-poll background sync immediately, so deleted or rotated credentials reconcile after startup, and expired OAuth access tokens still refresh through `POST /v1/credential/:id/refresh`.\n\nIf the broker is down at boot and a fresh cache exists, startup now succeeds from the cached snapshot. If the cache is missing, expired, corrupt, written for a different URL, or encrypted with a different token, startup falls back to the live fetch and fails the same way it did before if the broker is unreachable.\n\n## Operator opt-in\n\nThe broker is **off** unless `OMP_AUTH_BROKER_URL` (or `auth.broker.url` in `config.yml`) is set. When set, `discoverAuthStorage` in `packages/coding-agent/src/sdk.ts` swaps the local SQLite credential store for `RemoteAuthCredentialStore` and every API call resolves credentials through the broker.\n\n### Environment variables\n\n| Variable | Purpose | Required when |\n| ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- |\n| `OMP_AUTH_BROKER_URL` | Base URL of the remote auth-broker (e.g. `https://broker.tailnet:8765`). Selecting this puts the client in broker mode — local SQLite is bypassed. | Any time the omp client should resolve credentials through a broker (and required by `omp auth-gateway serve`). |\n| `OMP_AUTH_BROKER_TOKEN` | Bearer token used for every broker endpoint except `/v1/healthz`. | When `OMP_AUTH_BROKER_URL` is set and no token is available from `auth.broker.token` or `<config-dir>/auth-broker.token`. |\n| `OMP_AUTH_BROKER_SNAPSHOT_TTL_MS` | Freshness window for the encrypted local snapshot cache. Default `3600000` (1 h); `0` disables cache reads and writes. | Optional in broker mode. |\n| `OMP_AUTH_BROKER_SNAPSHOT_CACHE` | Path override for the encrypted local snapshot cache. Default `~/.omp/cache/auth-broker-snapshot.enc` (or XDG cache equivalent). | Optional in broker mode. |\n\nResolution order in `resolveAuthBrokerConfig()`:\n\n1. `OMP_AUTH_BROKER_URL` env (else `auth.broker.url` from `config.yml`, resolved through `resolveConfigValue`);\n2. `OMP_AUTH_BROKER_TOKEN` env (else `auth.broker.token` from `config.yml`, else `<config-dir>/auth-broker.token`);\n3. URL set but no token resolvable → hard error pointing at the token file path.\n\nThe gateway has no dedicated env vars — it inherits `OMP_AUTH_BROKER_*` because it is itself a broker client.\n\n### `config.yml` keys\n\n| Key | Default | Purpose |\n| ------------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `auth.broker.url` | unset | Same as `OMP_AUTH_BROKER_URL`; env wins. Hidden from the settings UI. Values are resolved as a literal, an environment variable name, or `!<shell command>` to use trimmed stdout. |\n| `auth.broker.token` | unset | Same as `OMP_AUTH_BROKER_TOKEN`; env wins. Values are resolved the same way. |\n\n### Token files\n\n| Path | Owner | Mode |\n| --------------------------------- | ---------------------------------------------------- | ----------------------------- |\n| `<config-dir>/auth-broker.token` | `omp auth-broker serve` (created at first start) | `0600` in a `0700` parent dir |\n| `<config-dir>/auth-gateway.token` | `omp auth-gateway serve` (skipped under `--no-auth`) | `0600` in a `0700` parent dir |\n\n`<config-dir>` resolves to `~/.omp/` (respecting `PI_CONFIG_DIR`).\n\n## Interaction with the local API-key resolution order\n\nThe broker only owns OAuth credentials and provider-API-key credentials that were uploaded to it. The standard credential ladder in `models.md` (`Auth and API key resolution order`) is preserved, with one addition committed alongside the gateway:\n\n- `AuthStorage.setConfigApiKey / removeConfigApiKey / clearConfigApiKeys` let a `models.yml` `apiKey` beat a stored OAuth token **without** overriding an explicit `--api-key`. This is what allows a broker-resolved OAuth credential to be reliably shadowed by a per-environment `models.yml` config key when both are present.\n\n## See also\n\n- [`secrets.md`](./secrets.md) — secret obfuscation around tokens that _do_ leak through (e.g. `OMP_AUTH_BROKER_TOKEN` in shell output).\n- [`models.md`](./models.md) — provider auth resolution order; the broker plugs in at layers 2–3 (stored credentials).\n- [`environment-variables.md`](./environment-variables.md) — full env reference including `OMP_AUTH_BROKER_URL` / `OMP_AUTH_BROKER_TOKEN`.\n",
|
|
11
|
+
"bash-tool-runtime.md": "# Bash tool runtime\n\nThis document describes the **`bash` tool** runtime path used by agent tool calls, from command normalization to execution, truncation/artifacts, and rendering.\n\nIt also calls out where behavior diverges in interactive TUI, print mode, RPC mode, and user-initiated bang (`!`) shell execution.\n\n## Scope and runtime surfaces\n\nThere are two different bash execution surfaces in coding-agent:\n\n1. **Tool-call surface** (`toolName: \"bash\"`): used when the model calls the bash tool.\n - Entry point: `BashTool.execute()`.\n - Parameters include `command`, optional `env`, `timeout`, `cwd`, `pty`, and, when `async.enabled` is true, `async`.\n2. **User bang-command surface** (`!cmd` from interactive input or RPC `bash` command): session-level helper path.\n - Entry point: `AgentSession.executeBash()`.\n\nBoth eventually use `executeBash()` in `src/exec/bash-executor.ts` for non-PTY execution, but only the tool-call path runs normalization/interception, optional managed background-job handling, and tool renderer logic.\n\nSet `bash.enabled: false` in settings to remove the model-facing `bash` tool from the active tool registry. This does not disable user-initiated bang commands or RPC `bash` requests.\n\n## End-to-end tool-call pipeline\n\n## 1) Input handling and parameter merge\n\n`BashTool.execute()` currently handles input before execution as follows:\n\n- validates optional `env` names against shell-variable syntax,\n- when `bash.stripTrailingHeadTail` is enabled (default), applies conservative native fixups that remove safe trailing `| head` / `| tail` pipes and redundant trailing `2>&1`,\n- extracts a leading single-line `cd <path> && ...` into `cwd` when `cwd` was not supplied,\n- rejects `async: true` when `async.enabled` is false.\n\nThere are no structured `head` or `tail` tool parameters in the current schema. Output limiting is handled by `OutputSink` truncation/artifacts, and the optional trailing-pipe fixup exists to avoid hiding output before the harness can capture it.\n\n## 2) Optional interception (blocked-command path)\n\nIf `bashInterceptor.enabled` is true, `BashTool` loads rules from settings and runs `checkBashInterception()` against the normalized command.\n\nInterception behavior:\n\n- command is blocked **only** when:\n - regex rule matches, and\n - the suggested tool is present in `ctx.toolNames`.\n- invalid regex rules are silently skipped.\n- on block, `BashTool` throws `ToolError` with message:\n - `Blocked: ...`\n - original command included.\n\nDefault rule patterns (defined in code) target common misuses:\n\n- file readers (`cat`, `head`, `tail`, ...)\n- search tools (`grep`, `rg`, ...)\n- file finders (`find`, `fd`, ...)\n- in-place editors (`sed -i`, `perl -i`, `awk -i inplace`)\n- shell redirection writes (`echo ... > file`, heredoc redirection)\n\n### Caveat\n\n`InterceptionResult` includes `suggestedTool`, but `BashTool` currently surfaces only the message text (no structured suggested-tool field in `details`).\n\n## 3) CWD validation and timeout clamping\n\n`cwd` is resolved relative to session cwd (`resolveToCwd`), then validated via `stat`:\n\n- missing path -> `ToolError(\"Working directory does not exist: ...\")`\n- non-directory -> `ToolError(\"Working directory is not a directory: ...\")`\n\nTimeout is clamped to `[1, 3600]` seconds and converted to milliseconds.\n\n## 4) Artifact allocation\n\nBefore execution, the tool allocates an artifact path/id (best-effort) for truncated output storage.\n\n- artifact allocation failure is non-fatal (execution continues without artifact spill file),\n- artifact id/path are passed into execution path for full-output persistence on truncation.\n\n## 5) PTY vs non-PTY execution selection\n\n`BashTool` chooses PTY execution only when all are true:\n\n- tool input `pty === true`\n- `PI_NO_PTY !== \"1\"`\n- tool context has UI (`ctx.hasUI === true` and `ctx.ui` set)\n\nOtherwise it uses non-interactive `executeBash()`.\n\nThat means print mode and non-UI RPC/tool contexts always use non-PTY.\n\n## Non-interactive execution engine (`executeBash`)\n\n## Shell session reuse model\n\n`executeBash()` caches native `Shell` instances in a process-global map keyed by:\n\n- shell path,\n- configured command prefix,\n- snapshot path,\n- serialized shell env,\n- optional agent session key.\n\nSession-level bang-command executions pass `sessionKey: this.sessionId`.\n\nTool-call executions pass `sessionKey: this.session.getSessionId?.()`, when available. In both surfaces, a session key isolates shell reuse per session; without one, reuse falls back to shell config/snapshot/env.\n\n## Shell config and snapshot behavior\n\nAt each call, executor loads settings shell config (`shell`, `env`, optional `prefix`).\n\nIf selected shell includes `bash`, it attempts `getOrCreateSnapshot()`:\n\n- snapshot captures aliases/functions/options from user rc,\n- snapshot creation is best-effort,\n- failure falls back to no snapshot.\n\nIf `prefix` is configured, command becomes:\n\n```text\n<prefix> <command>\n```\n\n## Streaming and cancellation\n\n`Shell.run()` streams chunks to `OutputSink` and optional `onChunk` callback.\n\nCancellation:\n\n- aborted signal triggers `shellSession.abort(...)`,\n- timeout from native result is mapped to `cancelled: true` + annotation text,\n- explicit cancellation similarly returns `cancelled: true` + annotation.\n\nNo exception is thrown inside executor for timeout/cancel; it returns structured `BashResult` and lets caller map error semantics.\n\n## Interactive PTY path (`runInteractiveBashPty`)\n\nWhen PTY is enabled, tool runs `runInteractiveBashPty()` which opens an overlay console component and drives a native `PtySession`.\n\nBehavior highlights:\n\n- xterm-headless virtual terminal renders viewport in overlay,\n- keyboard input is normalized (including Kitty sequences and application cursor mode handling),\n- `esc` while running kills the PTY session,\n- terminal resize propagates to PTY (`session.resize(cols, rows)`).\n\nEnvironment hardening defaults are injected for unattended runs:\n\n- pagers disabled (`PAGER=cat`, `GIT_PAGER=cat`, etc.),\n- editor prompts disabled (`GIT_EDITOR=true`, `EDITOR=true`, ...),\n- terminal/auth prompts reduced (`GIT_TERMINAL_PROMPT=0`, `SSH_ASKPASS=/usr/bin/false`, `CI=1`),\n- package-manager/tool automation flags for non-interactive behavior.\n\nPTY output is normalized (`CRLF`/`CR` to `LF`, `sanitizeText`) and written into `OutputSink`, including artifact spill support.\n\nOn PTY startup/runtime error, sink receives `PTY error: ...` line and command finalizes with undefined exit code.\n\n## Output handling: streaming, truncation, artifact spill\n\nBoth PTY and non-PTY paths use `OutputSink`.\n\n## OutputSink semantics\n\n- keeps an in-memory UTF-8-safe tail buffer (`DEFAULT_MAX_BYTES`, currently 50KB),\n- tracks total bytes/lines seen,\n- if artifact path exists and output overflows (or file already active), writes full stream to artifact file,\n- when memory threshold overflows, trims in-memory buffer to tail (UTF-8 boundary safe),\n- marks `truncated` when overflow/file spill occurs.\n\n`dump()` returns:\n\n- `output` (possibly annotated prefix),\n- `truncated`,\n- `totalLines/totalBytes`,\n- `outputLines/outputBytes`,\n- `artifactId` if artifact file was active.\n\n### Long-output caveat\n\nRuntime truncation is byte-threshold based in `OutputSink` (50KB default). It does not enforce a hard 2000-line cap in this code path.\n\n### Shell output minimizer\n\nNon-PTY execution also passes shell-minimizer settings into the native `Shell` session. When the minimizer rewrites verbose output, the executor replaces the sink's visible text with the minimized text and, when possible, saves the raw original capture as a separate `bash-original` artifact referenced by a `[raw output: artifact://<id>]` footer.\n\n## Live tool updates and async jobs\n\nFor non-PTY foreground execution, `BashTool` uses a separate `TailBuffer` for partial updates and emits `onUpdate` snapshots while command is running.\n\nFor PTY execution, live rendering is handled by custom UI overlay, not by `onUpdate` text chunks.\n\nWhen `async.enabled` is true and the call passes `async: true`, `BashTool` starts a managed bash job, returns a running job result with a job id, and stores completion through the session managed-job path. Auto-backgrounding can also start this path after `bash.autoBackground.thresholdMs`.\n\n## Result shaping, metadata, and error mapping\n\nAfter execution:\n\n1. `cancelled` handling:\n - if abort signal is aborted -> throw `ToolAbortError` (abort semantics),\n - else -> throw `ToolError` (treated as tool failure).\n2. PTY `timedOut` -> throw `ToolError`.\n3. empty output becomes `(no output)`.\n4. attach truncation metadata via `toolResult(...).truncationFromSummary(result, { direction: \"tail\" })`.\n5. exit-code mapping:\n - missing exit code -> throw `ToolError(\"... missing exit status\")`\n - non-zero exit -> error result with `\"Command exited with code N\"` and `details.exitCode`\n - zero exit -> success result.\n\nSuccess payload structure:\n\n- `content`: text output,\n- `details.meta.truncation` when truncated, including:\n - `direction`, `truncatedBy`, total/output line+byte counts,\n - `shownRange`,\n - `artifactId` when available.\n\nBecause built-in tools are wrapped with `wrapToolWithMetaNotice()`, truncation notice text is appended to final text content automatically (for example: `Full: artifact://<id>`).\n\n## Rendering paths\n\n## Tool-call renderer (`bashToolRenderer`)\n\n`bashToolRenderer` is used for tool-call messages (`toolCall` / `toolResult`):\n\n- collapsed mode shows visual-line-truncated preview,\n- expanded mode shows all currently available output text,\n- warning line includes truncation reason and `artifact://<id>` when truncated,\n- timeout value (from args) is shown in footer metadata line.\n\n### Caveat: full artifact expansion\n\n`BashRenderContext` has `isFullOutput`, but current renderer context builder does not set it for bash tool results. Expanded view still uses the text already in result content (tail/truncated output) unless another caller provides full artifact content.\n\n## User bang-command component (`BashExecutionComponent`)\n\n`BashExecutionComponent` is for user `!` commands in interactive mode (not model tool calls):\n\n- streams chunks live,\n- collapsed preview keeps last 20 logical lines,\n- line clamp at 4000 chars per line,\n- shows truncation + artifact warnings when metadata is present,\n- marks cancelled/error/exit state separately.\n\nThis component is wired by `CommandController.handleBashCommand()` and fed from `AgentSession.executeBash()`.\n\n## Mode-specific behavior differences\n\n| Surface | Entry path | PTY eligible | Live output UX | Error surfacing |\n| ------------------------------ | ----------------------------------------------------- | ----------------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------ |\n| Interactive tool call | `BashTool.execute` | Yes, when `pty=true` and UI exists and `PI_NO_PTY!=1` | PTY overlay (interactive) or streamed tail updates | Tool errors become `toolResult.isError` |\n| Print mode tool call | `BashTool.execute` | No (no UI context) | No TUI overlay; output appears in event stream/final assistant text flow | Same tool error mapping |\n| RPC tool call (agent tooling) | `BashTool.execute` | Usually no UI -> non-PTY | Structured tool events/results | Same tool error mapping |\n| Interactive bang command (`!`) | `AgentSession.executeBash` + `BashExecutionComponent` | No (uses executor directly) | Dedicated bash execution component | Controller catches exceptions and shows UI error |\n| RPC `bash` command | `rpc-mode` -> `session.executeBash` | No | Returns `BashResult` directly | Consumer handles returned fields |\n\n## Operational caveats\n\n- Interceptor only blocks commands when suggested tool is currently available in context.\n- If artifact allocation fails, truncation still occurs but no `artifact://` back-reference is available.\n- Shell session cache has no explicit eviction in this module; lifetime is process-scoped.\n- PTY and non-PTY timeout surfaces differ:\n - PTY exposes explicit `timedOut` result field,\n - non-PTY maps timeout into `cancelled + annotation` summary.\n\n## Implementation files\n\n- [`src/tools/bash.ts`](../packages/coding-agent/src/tools/bash.ts) — tool entrypoint, input handling/interception, async and PTY/non-PTY selection, result/error mapping, bash tool renderer.\n- [`src/tools/bash-command-fixup.ts`](../packages/coding-agent/src/tools/bash-command-fixup.ts) — native-backed conservative cleanup for trailing `head`/`tail` pipes and redundant `2>&1`.\n- [`src/tools/bash-interceptor.ts`](../packages/coding-agent/src/tools/bash-interceptor.ts) — interceptor rule matching and blocked-command messages.\n- [`src/exec/bash-executor.ts`](../packages/coding-agent/src/exec/bash-executor.ts) — non-PTY executor, shell session reuse, cancellation wiring, output sink integration.\n- [`src/tools/bash-interactive.ts`](../packages/coding-agent/src/tools/bash-interactive.ts) — PTY runtime, overlay UI, input normalization, non-interactive env defaults.\n- [`src/session/streaming-output.ts`](../packages/coding-agent/src/session/streaming-output.ts) — `OutputSink`, `TailBuffer`, truncation/artifact spill, and summary metadata.\n- [`src/tools/output-meta.ts`](../packages/coding-agent/src/tools/output-meta.ts) — truncation metadata shape + notice injection wrapper.\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — session-level `executeBash`, message recording, abort lifecycle.\n- [`src/modes/components/bash-execution.ts`](../packages/coding-agent/src/modes/components/bash-execution.ts) — interactive `!` command execution component.\n- [`src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts) — wiring for interactive `!` command UI stream/update completion.\n- [`src/modes/rpc/rpc-mode.ts`](../packages/coding-agent/src/modes/rpc/rpc-mode.ts) — RPC `bash` and `abort_bash` command surface.\n- [`src/internal-urls/artifact-protocol.ts`](../packages/coding-agent/src/internal-urls/artifact-protocol.ts) — `artifact://<id>` resolution.\n",
|
|
12
|
+
"blob-artifact-architecture.md": "# Blob and artifact storage architecture\n\nThis document describes how coding-agent stores large/binary payloads outside session JSONL, how truncated tool output is persisted, and how internal URLs (`artifact://`, `agent://`) resolve back to stored data.\n\n## Why two storage systems exist\n\nThe runtime uses two different persistence mechanisms for different data shapes:\n\n- **Content-addressed blobs** (`blob:sha256:<hash>`): global storage used to externalize large image base64 payloads and provider image data URLs from persisted session entries.\n- **Session-scoped artifacts** (files under `<sessionFile-without-.jsonl>/`): per-session text files used for full tool outputs and subagent outputs.\n\nThey are intentionally separate:\n\n- blob storage optimizes deduplication and stable references by content hash,\n- artifact storage optimizes append-only session tooling and human/tool retrieval by local IDs.\n\n## Storage boundaries and on-disk layout\n\n### Blob store boundary (global)\n\n`SessionManager` constructs `BlobStore(getBlobsDir())`, so blob files live in a shared global blob directory, not in a session folder.\n\nBlob file naming:\n\n- file path: `<blobsDir>/<sha256-hex>`\n- no extension\n- reference string stored in entries: `blob:sha256:<sha256-hex>`\n\nImplications:\n\n- same binary content across sessions resolves to the same hash/path,\n- writes are idempotent at the content level,\n- blobs can outlive any individual session file.\n\n## Artifact boundary (session-local)\n\n`ArtifactManager` derives artifact directory from session file path:\n\n- session file: `.../<timestamp>_<sessionId>.jsonl`\n- artifacts directory: `.../<timestamp>_<sessionId>/` (strip `.jsonl`)\n\nArtifact types share this directory:\n\n- truncated tool output files: `<numericId>.<toolType>.log` (for `artifact://`)\n- subagent output files: `<outputId>.md` (for `agent://`)\n- subagent session JSONL sidecars: `<outputId>.jsonl` when task execution receives an artifacts directory\n\nSubagents can adopt the parent `ArtifactManager`; in that case parent and subagent tree share one artifact directory and numeric artifact ID space.\n\n## ID and name allocation schemes\n\n### Blob IDs: content hash\n\n`BlobStore.put()` / `putSync()` computes SHA-256 over the bytes it is given and returns:\n\n- `hash`: hex digest,\n- `path`: `<blobsDir>/<hash>`,\n- `ref`: `blob:sha256:<hash>`.\n\nNo session-local counter is used.\n\n### Artifact IDs: session-local monotonic integer\n\n`ArtifactManager` scans existing `*.log` artifact files on first directory-backed allocation to find max existing numeric ID and sets `nextId = max + 1`.\n\nAllocation behavior:\n\n- file format: `{id}.{toolType}.log`\n- IDs are sequential strings (`\"0\"`, `\"1\"`, ...)\n- resume does not overwrite existing artifacts because scan happens before allocation\n- the directory is created lazily on first save/allocation\n\nIf the artifact directory is missing, scanning yields an empty list and allocation starts from `0`.\n\nNon-persistent sessions without an adopted manager can store `saveArtifact(...)` content in memory under numeric IDs, but `artifact://` resolution is file-backed through registered artifact directories.\n\n### Agent output IDs (`agent://`)\n\n`AgentOutputManager` allocates IDs for subagent outputs from the requested name, used verbatim the first time and suffixed (`-2`, `-3`, …) only when the same name repeats (e.g. `Anna`, `Anna-2`). Nested outputs are grouped under the parent prefix (e.g. `Parent.Child`). It scans existing `.md` files on initialization so a resumed session never reuses a name that would clobber a prior output.\n\n## Persistence dataflow\n\n### 1) Session entry persistence rewrite path\n\nBefore session entries are written (`#rewriteFile` / incremental persist), `SessionManager` calls `prepareEntryForPersistence()` / `prepareEntryForPersistenceSync()` through the truncation pipeline.\n\nKey behaviors:\n\n1. **Large string truncation**: oversized strings are cut and suffixed with `\"[Session persistence truncated large content]\"`; signature fields (`thinkingSignature`, `thoughtSignature`, `textSignature`) are cleared instead of truncated.\n2. **Transient field stripping**: `partialJson` and `jsonlEvents` are removed from persisted entries.\n3. **Image externalization to blobs**:\n - image blocks in `content` arrays are externalized when `data` is not already a blob ref and base64 length is at least threshold (`BLOB_EXTERNALIZE_THRESHOLD = 1024`),\n - provider-style `image_url` data URLs are externalized when they start with `data:image/` and contain `;base64,`,\n - image block `data` is stored as decoded binary bytes,\n - provider data URLs are stored as the original UTF-8 data URL string,\n - persisted values are replaced with `blob:sha256:<hash>`.\n\nThis keeps session JSONL compact while preserving recoverability.\n\n### 2) Session load rehydration path\n\nWhen opening a session (`setSessionFile`), after migrations, `SessionManager` runs `resolveBlobRefsInEntries()`.\n\nFor message/custom-message image blocks with `blob:sha256:<hash>` and for persisted provider `image_url` fields with blob refs:\n\n- reads blob bytes from blob store,\n- converts image-block bytes back to base64,\n- converts provider `image_url` blobs back to the original string,\n- mutates in-memory entry fields for runtime consumers.\n\nIf a blob is missing:\n\n- image-block resolution logs a warning and keeps the original `blob:sha256:` ref string in memory,\n- provider `image_url` resolution logs a warning and keeps the original ref string,\n- load continues.\n\n### 3) Tool output spill/truncation path\n\n`OutputSink` powers streaming output in bash/python/ssh and related executors.\n\nBehavior:\n\n1. Every chunk is sanitized with `sanitizeWithOptionalSixelPassthrough(..., sanitizeText)` and appended to in-memory accounting.\n2. Optional live `onChunk` receives sanitized pre-column-cap chunks, throttled if configured.\n3. A per-line column cap can drop bytes from long lines in the LLM-facing buffer; when this happens, artifact mirroring starts so the on-disk file keeps the full sanitized stream.\n4. When the in-memory tail buffer would exceed spill threshold (`DEFAULT_MAX_BYTES`, 50KB), sink marks output truncated and starts artifact mirroring if an artifact path is available.\n5. If a file sink is opened, it first writes the current buffer, then all queued/subsequent sanitized chunks.\n6. In-memory buffer is trimmed to a tail window, or to head + elision marker + tail when head retention is configured.\n7. `dump()` returns summary including `artifactId` only when file sink creation succeeded.\n\nPractical effect:\n\n- UI/tool return shows bounded output,\n- full sanitized output is preserved in artifact file and referenced as `artifact://<id>` when file-backed artifact mirroring succeeded.\n\nIf file sink creation fails (I/O error, missing path, etc.), sink falls back to in-memory truncation only; full output is not persisted.\n\n## URL access model\n\n### `blob:` references\n\n`blob:sha256:<hash>` is a persistence reference inside session entry payloads, not an internal URL scheme handled by the router. Resolution is done by `SessionManager` during session load.\n\n### `artifact://<id>`\n\nHandled by `ArtifactProtocolHandler` over registered active session artifact directories:\n\n- requires a numeric ID,\n- searches each registered artifacts directory for filename prefix `<id>.`,\n- returns raw text (`text/plain`) from the matched `.log` file,\n- when missing, error includes available numeric artifact IDs from existing artifact files.\n\nFailure behavior:\n\n- if no artifact directories are registered: throws `No session - artifacts unavailable`,\n- if registered directories exist but none are present on disk: throws `No artifacts directory found`,\n- if ID is not numeric: throws `artifact:// ID must be numeric, got: <id>`.\n\n### `agent://<id>`\n\nHandled by `AgentProtocolHandler` over registered active session artifact directories and `<artifactsDir>/<id>.md`:\n\n- plain form returns markdown text,\n- `/path` or `?q=` forms perform JSON extraction,\n- path and query extraction cannot be combined,\n- if extraction requested, file content must parse as JSON.\n\nFailure behavior:\n\n- if no artifact directories are registered: throws `No session - agent outputs unavailable`,\n- if registered directories exist but none are present on disk: throws `No artifacts directory found`,\n- missing output throws `Not found: <id>` with available `.md` output IDs when directory listing succeeds.\n\nRead tool integration:\n\n- `read` supports offset/limit pagination for non-extraction internal URL reads,\n- rejects offset/limit when `agent://` extraction is used.\n\n## Resume, fork, and move semantics\n\n### Resume\n\n- `ArtifactManager` scans existing `{id}.*.log` files on first allocation and continues numbering.\n- `AgentOutputManager` scans existing `.md` output IDs and continues numbering.\n- `SessionManager` rehydrates blob refs to base64/data URLs on load.\n\n### Fork\n\n`SessionManager.fork()` creates a new session file with new session ID and `parentSession` link, then returns old/new file paths. Artifact copying is handled by `AgentSession.fork()`:\n\n- flushes current session first,\n- attempts recursive copy of old artifact directory to new artifact directory,\n- missing old directory is tolerated,\n- non-ENOENT copy errors are logged as warnings and fork still completes.\n\nID implications after fork:\n\n- if copy succeeded, artifact counters in the new session continue after max copied ID when the new `ArtifactManager` first scans,\n- if copy failed/skipped, new session artifact IDs start from `0`.\n\nBlob implications after fork:\n\n- blobs are global and content-addressed, so no blob directory copy is required.\n\n### Move to new cwd\n\n`SessionManager.moveTo()` renames both session file and artifact directory to the new default session directory, with rollback logic if a later step fails. This preserves artifact identity while relocating session scope.\n\n## Failure handling and fallback paths\n\n| Case | Behavior |\n| --------------------------------------------------------- | -------------------------------------------------------------------- |\n| Blob file missing during image-block rehydration | Warn and keep `blob:sha256:` ref string in memory |\n| Blob file missing during provider `image_url` rehydration | Warn and keep `blob:sha256:` ref string in memory |\n| Blob read ENOENT via `BlobStore.get` | Returns `null` |\n| Artifact directory missing (`ArtifactManager.listFiles`) | Returns empty list (allocation can start fresh) |\n| No registered artifact dirs (`artifact://`) | Throws `No session - artifacts unavailable` |\n| No registered artifact dirs (`agent://`) | Throws `No session - agent outputs unavailable` |\n| Registered artifact dirs missing on disk | Throws explicit `No artifacts directory found` |\n| Artifact ID not found | Throws with available IDs listing |\n| OutputSink artifact writer init fails | Continues with bounded in-memory output only |\n| Non-persistent `saveArtifact` | Stores text in `SessionManager` memory map; not file-backed URL data |\n\n## Binary blob externalization vs text-output artifacts\n\n- **Blob externalization** is for image payloads inside persisted session entry content and provider image data URLs; it replaces inline payload strings in JSONL with stable content refs.\n- **Artifacts** are plain text files for execution output and subagent output; file-backed artifacts are addressable by session-local IDs through internal URLs.\n\nThe two systems intersect only indirectly: both reduce session JSONL bloat, but they have different identity, lifetime, and retrieval paths.\n\n## Implementation files\n\n- [`src/session/blob-store.ts`](../packages/coding-agent/src/session/blob-store.ts) — blob reference format, hashing, put/get, externalize/resolve helpers.\n- [`src/session/artifacts.ts`](../packages/coding-agent/src/session/artifacts.ts) — session artifact directory model and numeric artifact ID/path allocation.\n- [`src/session/streaming-output.ts`](../packages/coding-agent/src/session/streaming-output.ts) — `OutputSink` truncation/spill-to-file behavior and summary metadata.\n- [`src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts) — persistence transforms, blob rehydration on load, session fork/move interactions.\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — artifact directory copy during interactive fork.\n- [`src/internal-urls/artifact-protocol.ts`](../packages/coding-agent/src/internal-urls/artifact-protocol.ts) — `artifact://` resolver.\n- [`src/internal-urls/agent-protocol.ts`](../packages/coding-agent/src/internal-urls/agent-protocol.ts) — `agent://` resolver + JSON extraction.\n- [`src/internal-urls/router.ts`](../packages/coding-agent/src/internal-urls/router.ts) — internal URL router wiring.\n- [`src/task/output-manager.ts`](../packages/coding-agent/src/task/output-manager.ts) — session-scoped agent output ID allocation for `agent://`.\n- [`src/task/executor.ts`](../packages/coding-agent/src/task/executor.ts) — subagent output artifact writes (`<id>.md`) and session JSONL sidecars.\n",
|
|
13
|
+
"compaction.md": "# Compaction and Branch Summaries\n\nCompaction and branch summaries are the two mechanisms that keep long sessions usable without losing prior work context.\n\n- **Compaction** rewrites old history into a summary on the current branch.\n- **Branch summary** captures abandoned branch context during `/tree` navigation.\n\nBoth are persisted as session entries and converted back into user-context messages when rebuilding LLM input.\n\n## Key implementation files\n\n- `packages/agent/src/compaction/compaction.ts` (context-full summarization and handoff generation)\n- `packages/snapcompact/src/snapcompact.ts` (snapcompact strategy: history archived as dense bitmap images)\n- `packages/agent/src/compaction/branch-summarization.ts`\n- `packages/agent/src/compaction/pruning.ts`\n- `packages/agent/src/compaction/utils.ts`\n- `packages/agent/src/compaction/openai.ts`\n- `packages/coding-agent/src/session/session-manager.ts`\n- `packages/coding-agent/src/session/agent-session.ts`\n- `packages/coding-agent/src/session/messages.ts`\n- `packages/coding-agent/src/extensibility/hooks/types.ts`\n- `packages/coding-agent/src/config/settings-schema.ts`\n\n## Session entry model\n\nCompaction and branch summaries are first-class session entries, not plain assistant/user messages.\n\n- `CompactionEntry`\n - `type: \"compaction\"`\n - `summary`, optional `shortSummary`\n - `firstKeptEntryId` (compaction boundary)\n - `tokensBefore`\n - optional `details`, `preserveData`, `fromExtension`\n- `BranchSummaryEntry`\n - `type: \"branch_summary\"`\n - `fromId`, `summary`\n - optional `details`, `fromExtension`\n\nWhen context is rebuilt (`buildSessionContext`):\n\n1. Latest compaction on the active path is converted to one `compactionSummary` message.\n2. Kept entries from `firstKeptEntryId` to the compaction point are re-included.\n3. Later entries on the path are appended.\n4. `branch_summary` entries are converted to `branchSummary` messages.\n5. `custom_message` entries are converted to `custom` messages.\n\nThose custom roles are then transformed into LLM-facing user messages in `convertToLlm()` using the static templates:\n\n- `packages/agent/src/compaction/prompts/compaction-summary-context.md`\n- `packages/agent/src/compaction/prompts/branch-summary-context.md`\n- `packages/agent/src/compaction/prompts/handoff-document.md`\n\n## Compaction pipeline\n\n### Triggers\n\nCompaction/context maintenance can run in five ways:\n\n1. **Manual context compaction**: `/compact [instructions]` calls `AgentSession.compact(...)`.\n2. **Automatic overflow recovery**: after a same-model assistant error that matches context overflow.\n3. **Automatic incomplete-output recovery**: after a same-model assistant message ends with `stopReason === \"length\"` (OpenAI/Codex `response.incomplete`).\n4. **Automatic threshold maintenance**: after a successful turn when context exceeds the resolved threshold.\n5. **Idle maintenance**: `runIdleCompaction()` can invoke the same auto-maintenance path with reason `\"idle\"`.\n\n### Compaction shape (visual)\n\n```text\nBefore compaction:\n\n entry: 0 1 2 3 4 5 6 7 8 9\n ┌─────┬─────┬─────┬──────┬─────┬─────┬──────┬──────┬─────┬──────┐\n │ hdr │ usr │ ass │ tool │ usr │ ass │ tool │ tool │ ass │ tool │\n └─────┴─────┴─────┴──────┴─────┴─────┴──────┴──────┴─────┴──────┘\n └────────┬───────┘ └──────────────┬──────────────┘\n messagesToSummarize kept messages\n ↑\n firstKeptEntryId (entry 4)\n\nAfter compaction (new entry appended):\n\n entry: 0 1 2 3 4 5 6 7 8 9 10\n ┌─────┬─────┬─────┬──────┬─────┬─────┬──────┬──────┬─────┬──────┬─────┐\n │ hdr │ usr │ ass │ tool │ usr │ ass │ tool │ tool │ ass │ tool │ cmp │\n └─────┴─────┴─────┴──────┴─────┴─────┴──────┴──────┴─────┴──────┴─────┘\n └──────────┬──────┘ └──────────────────────┬───────────────────┘\n not sent to LLM sent to LLM\n ↑\n starts from firstKeptEntryId\n\nWhat the LLM sees:\n\n ┌────────┬─────────┬─────┬─────┬──────┬──────┬─────┬──────┐\n │ system │ summary │ usr │ ass │ tool │ tool │ ass │ tool │\n └────────┴─────────┴─────┴─────┴──────┴──────┴─────┴──────┘\n ↑ ↑ └─────────────────┬────────────────┘\n prompt from cmp messages from firstKeptEntryId\n```\n\n### Overflow/incomplete recovery vs threshold/idle maintenance\n\nThe automatic paths are intentionally different:\n\n- **Overflow recovery**\n - Trigger: current-model assistant error is detected as context overflow and the error is not older than the latest compaction.\n - The failing assistant error message is removed from active agent state before retry.\n - Context promotion is tried first; if a configured larger model is available, the agent switches model and retries without compacting.\n - If promotion is unavailable and compaction is enabled, context-full compaction runs with `reason: \"overflow\"` and `willRetry: true`; handoff strategy is not used for overflow because the handoff request would reuse the overflowing input.\n - On success, `agent.continue()` is scheduled to retry the turn.\n\n- **Incomplete-output recovery**\n - Trigger: same-model assistant message ends with `stopReason === \"length\"` and the message is not older than the latest compaction.\n - The incomplete assistant message is removed from active agent state before recovery.\n - Context promotion is tried first.\n - If promotion is unavailable and compaction is enabled, auto maintenance runs with `reason: \"incomplete\"` and `willRetry: true`.\n - Unlike overflow, `compaction.strategy: \"handoff\"` is allowed for incomplete-output recovery because the input context is still usable.\n - On context-full success, `agent.continue()` is scheduled to retry the turn.\n\n- **Threshold maintenance**\n - Trigger: successful, non-error assistant message whose adjusted context tokens exceed `resolveThresholdTokens(...)`.\n - Tool-output pruning can reduce the measured token count before threshold comparison.\n - Context promotion is tried before compaction.\n - If promotion is unavailable, auto maintenance runs with `reason: \"threshold\"` and `willRetry: false`.\n - With `compaction.strategy: \"handoff\"`, threshold maintenance normally schedules a post-prompt auto-handoff task instead of writing a compaction entry; pre-prompt checks run it inline to avoid racing the next turn. If handoff returns no document without aborting, it falls back to context-full compaction.\n - On success, if `compaction.autoContinue !== false`, schedules an agent-authored developer auto-continue prompt from `prompts/system/auto-continue.md`.\n\n- **Idle maintenance**\n - Trigger: `runIdleCompaction()` when not streaming or already compacting.\n - Uses `reason: \"idle\"` and does not auto-continue afterward.\n\n### Snapcompact strategy\n\n`compaction.strategy: \"snapcompact\"` replaces the LLM summarization call with a local, deterministic archival pass (`snapcompactCompact` from `@oh-my-pi/snapcompact`):\n\n- The discarded history is serialized, whitespace-collapsed, and printed onto provider-aware square PNG frames using bundled public-domain pixel fonts. Anthropic-family and unknown APIs use repeated black `8x8` cells, Google uses repeated sentence-colored `8x8` cells, and OpenAI uses dense stretched `6x6` cells with `detail: \"original\"`.\n- Serialization keeps the archive conversation-dense: tool results are truncated head+tail (default 2,000 chars at a 0.6 head ratio), tool-call argument values are capped per value (500) and per call (2,000), and tool output is printed in dim gray ink so conversation reads louder than tool noise. All budgets and the dimming are configurable via `SnapcompactSerializeOptions` (`toolResultMaxChars`, `toolArgMaxChars`, `toolCallMaxChars`, `truncateHeadRatio`, `dimToolResults`).\n- Frames persist under `CompactionEntry.preserveData.snapcompact` and are re-attached to the `compactionSummary` message as image blocks on every context rebuild; the entry's `summary` is a deterministic reading guide (grid geometry, role tags, truncation notes) plus the usual file-operation lists.\n- Later compactions carry earlier frames forward. Beyond an 8-frame budget the archive fades from the middle out: the earliest frame (session head — the original request, or the filmed summary of older history) is pinned, and the oldest *unpinned* frames are evicted, so head and tail both survive. If the previous compaction was text-based, its summary is printed at the head of the frame archive as `[Summary of earlier history]`.\n- No model, API key, or network is involved, so snapcompact is also safe for overflow recovery. It requires a vision-capable current model (`model.input` includes `\"image\"`); otherwise the run falls back to context-full and emits a warning notice (auto and manual paths). Manual `/compact` honors the strategy unless custom instructions are given (those imply a directed LLM summary).\n- Rationale: the shape table comes from the snapcompact 200k-token evals in `packages/snapcompact`, where bitmap frames preserved QA recall at lower billed-token cost than raw text for vision-capable models.\n\n### Display transcript\n\nCompaction no longer visually restarts the conversation. The TUI renders the **display transcript** (`buildSessionContext({ transcript: true })` / `AgentSession.buildTranscriptSessionContext()`): every path entry in chronological order, with each compaction shown inline as a slim divider — `── 📷 compacted · ctrl+o ──` — at the point it fired. Expanding (ctrl+o) reveals the summary. Only the LLM context resets at the compaction boundary; the scrollback above the divider stays intact, including across session resume.\n\n### Pre-compaction pruning\n\nBefore compaction checks, tool-result pruning may run (`pruneToolOutputs`).\n\nDefault prune policy:\n\n- Protect newest `40_000` tool-output tokens.\n- Require at least `20_000` total estimated savings.\n- Never prune tool results from `skill` or `read`.\n\nPruned tool results are replaced with:\n\n- `[Output truncated - N tokens]`\n\nIf pruning changes entries, session storage is rewritten and agent message state is refreshed before compaction decisions.\n\n### Boundary and cut-point logic\n\n`prepareCompaction()` only considers entries since the last compaction entry (if any).\n\n1. Find previous compaction index.\n2. Compute `boundaryStart = prevCompactionIndex + 1`.\n3. Adapt `keepRecentTokens` using measured usage ratio when available.\n4. Run `findCutPoint()` over the boundary window.\n\nValid cut points include:\n\n- message entries with roles: `user`, `assistant`, `bashExecution`, `hookMessage`, `branchSummary`, `compactionSummary`\n- `custom_message` entries\n- `branch_summary` entries\n\nHard rule: never cut at `toolResult`.\n\nIf there are non-message metadata entries immediately before the cut point (`model_change`, `thinking_level_change`, labels, etc.), they are pulled into the kept region by moving cut index backward until a message or compaction boundary is hit.\n\n### Split-turn handling\n\nIf cut point is not at a user-turn start, compaction treats it as a split turn.\n\nTurn start detection treats these as user-turn boundaries:\n\n- `message.role === \"user\"`\n- `message.role === \"bashExecution\"`\n- `custom_message` entry\n- `branch_summary` entry\n\nSplit-turn compaction generates two summaries:\n\n1. History summary (`messagesToSummarize`)\n2. Turn-prefix summary (`turnPrefixMessages`)\n\nFinal stored summary is merged as:\n\n```markdown\n<history summary>\n\n---\n\n**Turn Context (split turn):**\n\n<turn prefix summary>\n```\n\n### Summary generation\n\n`compact(...)` builds summaries from serialized conversation text:\n\n1. Convert messages via `convertToLlm()`.\n2. Serialize with `serializeConversation()`.\n3. Wrap in `<conversation>...</conversation>`.\n4. Optionally include `<previous-summary>...</previous-summary>`.\n5. Optionally inject extension hook context and active memory-backend compaction context as `<additional-context>` entries.\n6. Execute summarization prompt with `SUMMARIZATION_SYSTEM_PROMPT`.\n\nPrompt selection:\n\n- first compaction: `compaction-summary.md`\n- iterative compaction with prior summary: `compaction-update-summary.md`\n- split-turn second pass: `compaction-turn-prefix.md`\n- short UI summary: `compaction-short-summary.md`\n- handoff document: `handoff-document.md` (used by `generateHandoff(...)`, not serialized compaction)\n\nRemote summarization modes:\n\n- If `compaction.remoteEndpoint` is set and remote compaction is enabled, local summary generation POSTs:\n - `{ systemPrompt, prompt }`\n- Expects JSON containing at least `{ summary }`.\n- For OpenAI/OpenAI Codex models, compaction first tries the provider-native `/responses/compact` endpoint when remote compaction is enabled. It preserves provider replacement history in `preserveData.openaiRemoteCompaction` and falls back to local summarization if that native request fails.\n\n### Handoff generation\n\n`packages/agent/src/compaction/compaction.ts` also exports `generateHandoff(...)`. Handoff generation uses the same `completeSimple(...)` oneshot style as summarization, but it preserves the live agent cache prefix by sending the active system prompt, tool array, and real LLM message history, then appending one agent-attributed `user` message containing the handoff prompt. It forces `toolChoice: \"none\"` and returns joined text blocks directly.\n\nHandoff does not write a `CompactionEntry`. `AgentSession.handoff()` owns the session transition: it starts a new session, injects the generated document as a visible `custom_message` with `customType: \"handoff\"`, and rebuilds agent messages from that new session.\n\n### File-operation context in summaries\n\nCompaction tracks cumulative file activity using assistant tool calls:\n\n- `read(path)` → read set\n- `write(path)` → modified set\n- `edit(path)` → modified set\n\nCumulative behavior:\n\n- Includes prior compaction details only when prior entry is pi-generated (`fromExtension !== true`).\n- In split turns, includes turn-prefix file ops too.\n- `details.readFiles` excludes files also modified; `details.modifiedFiles` carries the rest (persisted shape is unchanged).\n\nSummary text gets one `<files>` tag appended via prompt template: a grouped, prefix-folded directory tree (find-tool shape) with a per-file access marker — `(Read)` for read-only files, `(Write)` for modified files never read, `(RW)` for modified files also present in the cumulative read set. Capped at 20 files with an `… (N more files omitted)` line.\n\n```xml\n<files>\n# packages/agent/src/compaction/\ncompaction.ts (Read)\nutils.ts (RW)\n## prompts/\nfile-operations.md (Write)\n</files>\n```\n\nLegacy `<read-files>`/`<modified-files>` tags from summaries written by earlier versions are stripped (alongside `<files>`) before re-appending, so old summaries self-heal on the next compaction.\n\n### Persist and reload\n\nAfter summary generation (or hook-provided summary), agent session:\n\n1. Appends `CompactionEntry` with `appendCompaction(...)` for context-full maintenance; handoff strategy creates a new session and injects a handoff `custom_message` instead.\n2. Rebuilds display context from the active leaf via `buildDisplaySessionContext()`.\n3. Replaces live agent messages with rebuilt context.\n4. Synchronizes active todo phases from the rebuilt branch and closes provider sessions whose history was rewritten.\n5. Emits `session_compact` hook event.\n\n## Branch summarization pipeline\n\nBranch summarization is tied to tree navigation, not token overflow.\n\n### Trigger\n\nDuring `navigateTree(...)`:\n\n1. Compute abandoned entries from old leaf to common ancestor using `collectEntriesForBranchSummary(...)`.\n2. If caller requested summary (`options.summarize`), generate summary before switching leaf.\n3. If summary exists, attach it at the navigation target using `branchWithSummary(...)`.\n\nOperationally this is commonly driven by `/tree` flow when `branchSummary.enabled` is enabled.\n\n### Branch switch shape (visual)\n\n```text\nTree before navigation:\n\n ┌─ B ─ C ─ D (old leaf, being abandoned)\n A ───┤\n └─ E ─ F (target)\n\nCommon ancestor: A\nEntries to summarize: B, C, D\n\nAfter navigation with summary:\n\n ┌─ B ─ C ─ D ─ [summary of B,C,D]\n A ───┤\n └─ E ─ F (new leaf)\n```\n\n### Preparation and token budget\n\n`generateBranchSummary(...)` computes budget as:\n\n- `tokenBudget = model.contextWindow - branchSummary.reserveTokens`\n\n`prepareBranchEntries(...)` then:\n\n1. First pass: collect cumulative file ops from all summarized entries, including prior pi-generated `branch_summary` details.\n2. Second pass: walk newest → oldest, adding messages until token budget is reached.\n3. Prefer preserving recent context.\n4. May still include large summary entries near budget edge for continuity.\n\nCompaction entries are included as messages (`compactionSummary`) during branch summarization input.\n\n### Summary generation and persistence\n\nBranch summarization:\n\n1. Converts and serializes selected messages.\n2. Wraps in `<conversation>`.\n3. Uses custom instructions if supplied, otherwise `branch-summary.md`.\n4. Calls summarization model with `SUMMARIZATION_SYSTEM_PROMPT`.\n5. Prepends `branch-summary-preamble.md`.\n6. Appends file-operation tags.\n\nResult is stored as `BranchSummaryEntry` with optional details (`readFiles`, `modifiedFiles`).\n\n## Extension and hook touchpoints\n\n### `session_before_compact`\n\nPre-compaction hook.\n\nCan:\n\n- cancel compaction (`{ cancel: true }`)\n- provide full custom compaction payload (`{ compaction: CompactionResult }`)\n\n### `session.compacting`\n\nPrompt/context customization hook for default compaction.\n\nCan return:\n\n- `prompt` (override base summary prompt)\n- `context` (extra context lines injected into `<additional-context>`)\n- `preserveData` (stored on compaction entry)\n\n### `session_compact`\n\nPost-compaction notification with saved `compactionEntry` and `fromExtension` flag.\n\n### `session_before_tree`\n\nRuns on tree navigation before default branch summary generation.\n\nCan:\n\n- cancel navigation\n- provide custom `{ summary: { summary, details } }` used when user requested summarization\n\n### `session_tree`\n\nPost-navigation event exposing new/old leaf and optional summary entry.\n\n## Runtime behavior and failure semantics\n\n- Manual compaction aborts current agent operation first.\n- `abortCompaction()` cancels manual compaction, auto-compaction, and handoff generation controllers.\n- Auto compaction emits start/end session events for UI/state updates.\n- Auto compaction can try multiple model candidates and retry transient failures; long retry delays prefer the next candidate when one is available.\n- Overflow errors are excluded from generic retry path because they are handled by context promotion/compaction.\n- If auto-compaction fails:\n - overflow path emits `Context overflow recovery failed: ...`\n - incomplete-output path emits `Incomplete response recovery failed: ...`\n - threshold/idle paths emit `Auto-compaction failed: ...`\n- Branch summarization can be cancelled via abort signal (e.g., Escape), returning canceled/aborted navigation result.\n\n## Settings and defaults\n\nFrom `settings-schema.ts`:\n\n- `compaction.enabled` = `true`\n- `compaction.strategy` = `\"context-full\"` (`\"handoff\"`, `\"shake\"`, `\"snapcompact\"`, and `\"off\"` are also supported)\n- `compaction.reserveTokens` = `16384`\n- `compaction.keepRecentTokens` = `20000`\n- `compaction.autoContinue` = `true`\n- `compaction.remoteEnabled` = `true`\n- `compaction.remoteEndpoint` = `undefined`\n- `compaction.thresholdPercent` = `-1` and `compaction.thresholdTokens` = `-1`; when no positive override is set, the threshold is `contextWindow - max(15% of contextWindow, reserveTokens)`\n- `compaction.idleEnabled` = `false`\n- `compaction.idleThresholdTokens` = `200000`\n- `compaction.idleTimeoutSeconds` = `300`\n- `branchSummary.enabled` = `false`\n- `branchSummary.reserveTokens` = `16384`\n\nThese values are consumed at runtime by `AgentSession` and compaction/branch summarization modules.\n",
|
|
14
|
+
"config-usage.md": "# Configuration Discovery and Resolution\n\nThis document describes how the coding-agent resolves configuration today: which roots are scanned, how precedence works, and how resolved config is consumed by settings, skills, hooks, tools, and extensions.\n\n## Scope\n\nPrimary implementation:\n\n- `packages/coding-agent/src/config.ts`\n- `packages/coding-agent/src/config/settings.ts`\n- `packages/coding-agent/src/config/settings-schema.ts`\n- `packages/coding-agent/src/discovery/builtin.ts`\n- `packages/coding-agent/src/discovery/helpers.ts`\n\nKey integration points:\n\n- `packages/coding-agent/src/capability/index.ts`\n- `packages/coding-agent/src/discovery/index.ts`\n- `packages/coding-agent/src/extensibility/skills.ts`\n- `packages/coding-agent/src/extensibility/hooks/loader.ts`\n- `packages/coding-agent/src/extensibility/custom-tools/loader.ts`\n- `packages/coding-agent/src/extensibility/extensions/loader.ts`\n\n---\n\n## Resolution flow (visual)\n\n```text\n Generic helper order (`config.ts`)\n┌───────────────────────────────────────┐\n│ 1) ~/.omp/agent, ~/.claude, ... │\n│ 2) <cwd>/.omp, <cwd>/.claude, ... │\n└───────────────────────────────────────┘\n │\n ▼\n capability providers enumerate items\n (native provider scans project .omp before user .omp;\n other providers have their own loading rules)\n │\n ▼\n provider priority sort + capability dedup\n │\n ▼\n subsystem-specific consumption\n (settings, skills, hooks, tools, extensions)\n```\n\n## 1) Config roots and source order\n\n## Canonical roots\n\n`src/config.ts` defines a fixed source priority list:\n\n1. `.omp` (native)\n2. `.claude`\n3. `.codex`\n4. `.gemini`\n\nUser-level bases:\n\n- `~/.omp/agent`\n- `~/.claude`\n- `~/.codex`\n- `~/.gemini`\n\nProject-level bases:\n\n- `<cwd>/.omp`\n- `<cwd>/.claude`\n- `<cwd>/.codex`\n- `<cwd>/.gemini`\n\n`CONFIG_DIR_NAME` is `.omp` (`packages/utils/src/dirs.ts`).\n\n## Important constraint\n\nThe generic helpers in `src/config.ts` do **not** include `.pi` in source discovery order.\n\n---\n\n## 2) Core discovery helpers (`src/config.ts`)\n\n## `getConfigDirs(subpath, options)`\n\nReturns ordered entries:\n\n- User-level entries first (by source priority)\n- Then project-level entries (by same source priority)\n\nOptions:\n\n- `user` (default `true`)\n- `project` (default `true`)\n- `cwd` (default `getProjectDir()`)\n- `existingOnly` (default `false`)\n\nThis API is used for directory-based config lookups (commands, hooks, tools, agents, etc.).\n\n## `findConfigFile(subpath, options)` / `findConfigFileWithMeta(...)`\n\nSearches for the first existing file across ordered bases, returns first match (path-only or path+metadata).\n\n## `findAllNearestProjectConfigDirs(subpath, cwd)`\n\nWalks parent directories upward and returns the **nearest existing directory per source base** (`.omp`, `.claude`, `.codex`, `.gemini`), then sorts results by source priority.\n\nUse this when project config should be inherited from ancestor directories (monorepo/nested workspace behavior).\n\n---\n\n## 3) File config wrapper (`ConfigFile<T>` in `src/config.ts`)\n\n`ConfigFile<T>` is the schema-validated loader for single config files.\n\nSupported formats:\n\n- `.yml` / `.yaml`\n- `.json` / `.jsonc`\n\nBehavior:\n\n- Validates parsed data against a provided Zod schema.\n- Caches load result until `invalidate()`.\n- Returns tri-state result via `tryLoad()`:\n - `ok`\n - `not-found`\n - `error` (`ConfigError` with schema/parse context)\n\nLegacy migration still supported:\n\n- If target path is `.yml`/`.yaml`, a sibling `.json` is auto-migrated once (`migrateJsonToYml`).\n\n---\n\n## 4) Settings resolution model (`src/config/settings.ts`)\n\nThe runtime settings model is layered:\n\n1. Global settings: `~/.omp/agent/config.yml`\n2. Project settings: discovered via settings capability (`settings.json` and `config.yml` from providers)\n3. CLI config overlays: `omp --config <path>` / repeated `--config` files, loaded as `config.yml`-style YAML for this process only\n4. Runtime overrides: in-memory, non-persistent\n5. Schema defaults: from `SETTINGS_SCHEMA`\n\nEffective precedence:\n\n`defaults <- global <- project <- CLI config overlays <- overrides`\n\nWrite behavior:\n\n- `settings.set(...)` writes to the **global** layer (`config.yml`) and queues background save.\n- Project settings are read-only from capability discovery.\n\n## Migration behavior still active\n\nOn startup, if `config.yml` is missing:\n\n1. Migrate from `~/.omp/agent/settings.json` (renamed to `.bak` on success)\n2. Merge with legacy DB settings from `agent.db`\n3. Write merged result to `config.yml`\n\nField-level migrations in `#migrateRawSettings`:\n\n- `queueMode` -> `steeringMode`\n- `ask.timeout` milliseconds -> seconds when old value looks like ms (`> 1000`)\n- Legacy flat `theme: \"...\"` -> `theme.dark/theme.light` structure\n\n---\n\n## 5) Capability/discovery integration\n\nMost non-core config loading flows through the capability registry (`src/capability/index.ts` + `src/discovery/index.ts`).\n\n## Provider ordering\n\nProviders are sorted by numeric priority (higher first). Example priorities:\n\n- Native OMP (`builtin.ts`): `100`\n- Claude: `80`\n- Codex / agents / Claude marketplace: `70`\n- Gemini: `60`\n\n```text\nProvider precedence (higher wins)\n\nnative (.omp) priority 100\nclaude priority 80\ncodex / agents / ... priority 70\ngemini priority 60\n```\n\n## Dedup semantics\n\nCapabilities define a `key(item)`:\n\n- same key => first item wins (higher-priority/earlier-loaded item)\n- no key (`undefined`) => no dedup, all items retained\n\nRelevant keys:\n\n- skills: `name`\n- tools: `name`\n- hooks: `${type}:${tool}:${name}`\n- extension modules: `name`\n- extensions: `name`\n- settings: no dedup (all items preserved)\n\n---\n\n## 6) Native `.omp` provider behavior (`packages/coding-agent/src/discovery/builtin.ts`)\n\nNative provider (`id: native`) reads native config from:\n\n- project: `<cwd>/.omp/...`\n- user: `~/.omp/agent/...`\n\n### Directory admission rules\n\n- Slash commands, rules, prompts, instructions, hooks, tools, extensions, extension modules, and settings use a project/user root only when the root directory exists and is non-empty.\n- Skills scan `<ancestor>/.omp/skills` for each ancestor from the current working directory up to the repo root/home boundary, plus `~/.omp/agent/skills`, without requiring the root `.omp` directory itself to be non-empty.\n- `SYSTEM.md` and `AGENTS.md` read user-level files directly and use nearest-ancestor project `.omp` lookup for project files, but the project `.omp` directory must be non-empty. See [`docs/system-prompt-customization.md`](./system-prompt-customization.md) for the full `SYSTEM.md` / `APPEND_SYSTEM.md` contract (replace vs. append, templating).\n\n### Scope-specific loading\n\n- Skills: `<ancestor>/.omp/skills/*/SKILL.md` and `~/.omp/agent/skills/*/SKILL.md`\n- Slash commands: `commands/*.md`\n- Rules: `rules/*.{md,mdc}`\n- Prompts: `prompts/*.md`\n- Instructions: `instructions/*.md`\n- Hooks: `hooks/pre/*`, `hooks/post/*`\n- Tools: `tools/*.{json,md,ts,js,sh,bash,py}` and `tools/<name>/index.ts`\n- Extension modules: discovered under `extensions/` (+ legacy `settings.json.extensions` string array)\n- Extensions: `extensions/<name>/gemini-extension.json`\n- Settings capability: `settings.json`, then `config.yml`\n\n### Nearest-project lookup nuance\n\n## For `SYSTEM.md` and `AGENTS.md`, native provider uses nearest-ancestor project `.omp` directory search (walk-up) and still requires the project `.omp` dir to be non-empty.\n\n## 7) How major subsystems consume config\n\n## Settings subsystem\n\n- `Settings.init()` loads global `config.yml` + discovered project settings capability items.\n- Only capability items with `level === \"project\"` are merged into project layer.\n\n### Session title prompt override\n\nCreate `TITLE_SYSTEM.md` in the same config locations as `SYSTEM.md` / `APPEND_SYSTEM.md`:\n\n```text\n# ~/.omp/agent/TITLE_SYSTEM.md\nGenerate a session name using lowercase `<type>:<primary-objective>`.\n```\n\n- Missing `TITLE_SYSTEM.md` keeps the bundled title prompts.\n- Discovery uses the same project-then-user config directory pattern as `SYSTEM.md`: project `.omp/TITLE_SYSTEM.md` first, then user `~/.omp/agent/TITLE_SYSTEM.md` and the other supported config bases.\n- The override replaces only the automatic session-title generation system prompt; normal `SYSTEM.md` / `APPEND_SYSTEM.md` prompt customization is unaffected.\n- The online path still forces the `set_title` tool call. The local tiny-title path keeps the `<title>...</title>` prefill/stop wrapper and uses this file as its system turn.\n\n## Skills subsystem\n\n- `extensibility/skills.ts` loads via `loadCapability(skillCapability.id, { cwd })`.\n- Applies source toggles and filters (`ignoredSkills`, `includeSkills`, custom dirs).\n- Legacy-named toggles still exist (`skills.enablePiUser`, `skills.enablePiProject`) but they gate the native provider (`provider === \"native\"`).\n\n## Hooks subsystem\n\n- `discoverAndLoadHooks()` resolves hook paths from hook capability + explicit configured paths.\n- Then loads modules via Bun import.\n\n## Tools subsystem\n\n- `discoverAndLoadCustomTools()` resolves tool paths from tool capability + plugin tool paths + explicit configured paths.\n- Declarative `.md/.json` tool files are metadata only; executable loading expects code modules.\n\n## Extensions subsystem\n\n- `discoverAndLoadExtensions()` resolves extension modules from extension-module capability plus explicit paths.\n- Current implementation intentionally keeps only capability items with `_source.provider === \"native\"` before loading.\n\n---\n\n## 8) Precedence rules to rely on\n\nUse this mental model:\n\n1. Source directory ordering from `config.ts` determines candidate path order.\n2. Capability provider priority determines cross-provider precedence.\n3. Capability key dedup determines collision behavior (first wins for keyed capabilities).\n4. Subsystem-specific merge logic can further change effective precedence (especially settings).\n\n### Settings-specific caveat\n\nSettings capability items are not deduplicated; `Settings.#loadProjectSettings()` deep-merges project items in returned order. Because merge applies later item values over earlier values, effective override behavior depends on provider emission order, not just capability key semantics.\n\n---\n\n## 9) Legacy/compatibility behaviors still present\n\n- `ConfigFile` JSON -> YAML migration for YAML-targeted files.\n- Settings migration from `settings.json` and `agent.db` to `config.yml`.\n- Settings key migrations include `queueMode`, `ask.timeout`, flat `theme`, `task.isolation.enabled`, legacy `task.isolation.mode` values, removed edit modes, `statusLine.plan_mode`, `memories.enabled`, and hindsight scoping/name fields.\n- Legacy setting names `skills.enablePiUser` / `skills.enablePiProject` are still active gates for native skill source.\n\nIf these compatibility paths are removed in code, update this document immediately; several runtime behaviors still depend on them today.\n",
|
|
15
|
+
"custom-tools.md": "# Custom Tools\n\nCustom tools are model-callable functions that plug into the same tool execution pipeline as built-in tools.\n\nA custom tool is a TypeScript/JavaScript module that exports a factory. The factory receives a host API (`CustomToolAPI`) and returns one tool or an array of tools.\n\n## What this is (and is not)\n\n- **Custom tool**: callable by the model during a turn (`execute` + Zod parameter schema).\n- **Extension**: lifecycle/event framework that can register tools and intercept/modify events.\n- **Hook**: external pre/post command scripts.\n- **Skill**: static guidance/context package, not executable tool code.\n\nIf you need the model to call code directly, use a custom tool.\n\n## Integration paths in current code\n\nThere are two active integration styles:\n\n1. **SDK-provided custom tools** (`options.customTools`)\n - Wrapped into agent tools via `CustomToolAdapter` or extension wrappers.\n - Always included in the initial active tool set in SDK bootstrap.\n\n2. **Filesystem-discovered modules via loader API** (`discoverAndLoadCustomTools` / `loadCustomTools`)\n - Exposed as library APIs in `src/extensibility/custom-tools/loader.ts`.\n - Host code can call these to discover and load tool modules from config/provider/plugin paths.\n\n```text\nModel tool call flow\n\nLLM tool call\n │\n ▼\nTool registry (built-ins + custom tool adapters)\n │\n ▼\nCustomTool.execute(toolCallId, params, onUpdate, ctx, signal)\n │\n ├─ onUpdate(...) -> streamed partial result\n └─ return result -> final tool content/details\n```\n\n## Discovery locations (loader API)\n\n`discoverAndLoadCustomTools(configuredPaths, cwd, builtInToolNames)` merges:\n\n1. Capability providers (`toolCapability`), including:\n - Native OMP config (`~/.omp/agent/tools`, `.omp/tools`)\n - Claude config (`~/.claude/tools`, `.claude/tools`)\n - Codex config (`~/.codex/tools`, `.codex/tools`)\n - Claude marketplace plugin cache provider\n2. Installed plugin manifests (`~/.omp/plugins/node_modules/*` via plugin loader)\n3. Explicit configured paths passed to the loader\n\n### Important behavior\n\n- Duplicate resolved paths are deduplicated.\n- Tool name conflicts are rejected against built-ins and already-loaded custom tools.\n- `.md` and `.json` files are discovered as tool metadata by some providers, but the executable module loader rejects them as runnable tools.\n- Relative configured paths are resolved from `cwd`; `~` is expanded.\n\n## Module contract\n\nA custom tool module must export a function (default export preferred):\n\n```ts\nimport type { CustomToolFactory } from \"@oh-my-pi/pi-coding-agent\";\n\nconst factory: CustomToolFactory = (pi) => ({\n name: \"repo_stats\",\n label: \"Repo Stats\",\n description: \"Counts tracked TypeScript files\",\n parameters: pi.zod.object({\n glob: pi.zod.string().optional().default(\"**/*.ts\"),\n }),\n\n async execute(toolCallId, params, onUpdate, ctx, signal) {\n onUpdate?.({\n content: [{ type: \"text\", text: \"Scanning files...\" }],\n details: { phase: \"scan\" },\n });\n\n const result = await pi.exec(\n \"git\",\n [\"ls-files\", params.glob ?? \"**/*.ts\"],\n { signal, cwd: pi.cwd },\n );\n if (result.killed) {\n throw new Error(\"Scan was cancelled\");\n }\n if (result.code !== 0) {\n throw new Error(result.stderr || \"git ls-files failed\");\n }\n\n const files = result.stdout.split(\"\\n\").filter(Boolean);\n return {\n content: [{ type: \"text\", text: `Found ${files.length} files` }],\n details: { count: files.length, sample: files.slice(0, 10) },\n };\n },\n\n onSession(event) {\n if (event.reason === \"shutdown\") {\n // cleanup resources if needed\n }\n },\n});\n\nexport default factory;\n```\n\nSchemas are authored with Zod (`pi.zod`) and flow through the shared validation/wire pipeline.\n\nFactory return type:\n\n- `CustomTool`\n- `CustomTool[]`\n- `Promise<CustomTool | CustomTool[]>`\n\n## API surface passed to factories (`CustomToolAPI`)\n\nFrom `types.ts` and `loader.ts`:\n\n- `cwd`: host working directory\n- `exec(command, args, options?)`: process execution helper\n- `ui`: UI context (can be no-op in headless modes)\n- `hasUI`: `false` in non-interactive flows\n- `logger`: shared file logger\n- `typebox`: zod-backed compatibility shim for legacy TypeBox-style schemas\n- `zod`: injected `zod/v4` module (canonical for new schemas)\n- `pi`: injected `@oh-my-pi/pi-coding-agent` exports\n- `pushPendingAction(action)`: register a preview action for hidden `resolve` tool (`docs/resolve-tool-runtime.md`)\n Loader starts with a no-op UI context and requires host code to call `setUIContext(...)` when real UI is ready.\n\n## Execution contract and typing\n\n`CustomTool.execute` signature:\n\n```ts\nexecute(toolCallId, params, onUpdate, ctx, signal);\n```\n\n- `params` is statically typed from your Zod/TypeBox schema via `Static<TParams>`.\n- Runtime argument validation happens before execution in the agent loop.\n- `onUpdate` emits partial results for UI streaming.\n- `ctx` includes `sessionManager`, `modelRegistry`, current `model`, `isIdle()`, `hasQueuedMessages()`, `abort()`, and optional `settings` / `autoApprove`.\n- `signal` carries cancellation.\n\n`CustomToolAdapter` bridges this to the agent tool interface and forwards calls in the correct argument order.\n\nTool definitions may also declare `strict`, `hidden`, `deferrable`, `mcpServerName`, `mcpToolName`, `approval`, and `formatApprovalDetails`.\n\n## How tools are exposed to the model\n\n- Tools are wrapped into `AgentTool` instances (`CustomToolAdapter` or extension wrappers).\n- They are inserted into the session tool registry by name.\n- In SDK bootstrap, custom and extension-registered tools are force-included in the initial active set.\n- CLI `--tools` currently validates only built-in tool names; custom tool inclusion is handled through discovery/registration paths and SDK options.\n\n## Rendering hooks\n\nOptional rendering hooks:\n\n- `renderCall(args, options, theme)`\n- `renderResult(result, options, theme, args?)`\n\nRuntime behavior in TUI:\n\n- If hooks exist, tool output is rendered inside a `Box` container.\n- `renderResult` receives `{ expanded, isPartial, spinnerFrame? }`.\n- Renderer errors are caught and logged; UI falls back to default text rendering.\n\n## Session/state handling\n\nOptional `onSession(event, ctx)` receives session lifecycle events, including:\n\n- `start`, `switch`, `branch`, `tree`, `shutdown`\n- `auto_compaction_start`, `auto_compaction_end`\n- `auto_retry_start`, `auto_retry_end`\n- `ttsr_triggered`, `todo_reminder`\n\nUse `ctx.sessionManager` to reconstruct state from history when branch/session context changes.\n\n## Failures and cancellation semantics\n\n### Synchronous/async failures\n\n- Throwing (or rejected promises) in `execute` is treated as tool failure.\n- Agent runtime converts failures into tool result messages with `isError: true` and error text content.\n- With extension wrappers, `tool_result` handlers can further rewrite content/details and even override error status.\n\n### Cancellation\n\n- Agent abort propagates through `AbortSignal` to `execute`.\n- Forward `signal` to subprocess work (`pi.exec(..., { signal })`) for cooperative cancellation.\n- `ctx.abort()` lets a tool request abort of the current agent operation.\n\n### onSession errors\n\n- `onSession` errors are caught and logged as warnings; they do not crash the session.\n\n## Real constraints to design for\n\n- Tool names must be globally unique in the active registry.\n- Prefer deterministic, schema-shaped outputs in `details` for renderer/state reconstruction.\n- Guard UI usage with `pi.hasUI`.\n- Treat `.md`/`.json` in tool directories as metadata, not executable modules.\n",
|
|
16
|
+
"environment-variables.md": "# Environment Variables (Current Runtime Reference)\n\nThis reference is derived from current code paths in:\n\n- `packages/coding-agent/src/**`\n- `packages/ai/src/**` (provider/auth resolution used by coding-agent)\n- `packages/utils/src/**` and `packages/tui/src/**` where those vars directly affect coding-agent runtime\n\nIt documents only active behavior.\n\n## Resolution model and precedence\n\nMost runtime lookups use `$env` from `@oh-my-pi/pi-utils` (`packages/utils/src/env.ts`).\n\n`$env` loading order:\n\n1. Existing process environment (`Bun.env`)\n2. Project `.env` (`$PWD/.env`) for keys not already set\n3. Agent `.env` (`~/.omp/agent/.env`, respecting `PI_CONFIG_DIR` / `PI_CODING_AGENT_DIR`) for keys not already set\n4. Config-root `.env` (`~/.omp/.env`, respecting `PI_CONFIG_DIR`) for keys not already set\n5. Home `.env` (`~/.env`) for keys not already set\n\nAdditional rule inside each `.env` file: `OMP_*` keys are mirrored to `PI_*` keys in that parsed file.\n\n---\n\n## 1) Model/provider authentication\n\nThese are consumed via `getEnvApiKey()` (`packages/ai/src/stream.ts`) unless noted otherwise.\n\n### Core provider credentials\n\n| Variable | Used for | Required when | Notes / precedence |\n| ------------------------------- | ------------------------------------------------ | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_OAUTH_TOKEN` | Anthropic API auth | Using Anthropic with OAuth token auth | Takes precedence over `ANTHROPIC_API_KEY` for provider auth resolution |\n| `ANTHROPIC_API_KEY` | Anthropic API auth | Using Anthropic without OAuth token | Fallback after `ANTHROPIC_OAUTH_TOKEN` |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Anthropic via Azure Foundry / enterprise gateway | `CLAUDE_CODE_USE_FOUNDRY` enabled | Takes precedence over `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY` when Foundry mode is enabled |\n| `OPENAI_API_KEY` | OpenAI auth | Using OpenAI-family providers without explicit apiKey argument | Used by OpenAI Completions/Responses providers |\n| `GEMINI_API_KEY` | Google Gemini auth | Using `google` provider models | Primary key for Gemini provider mapping |\n| `GOOGLE_API_KEY` | Gemini image tool auth fallback | Using `gemini_image` tool without `GEMINI_API_KEY` | Used by coding-agent image tool fallback path |\n| `GROQ_API_KEY` | Groq auth | Using Groq models | |\n| `CEREBRAS_API_KEY` | Cerebras auth | Using Cerebras models | |\n| `FIREWORKS_API_KEY` | Fireworks auth | Using Fireworks models | |\n| `FIREPASS_API_KEY` | Fire Pass auth | Using Fire Pass models | |\n| `TOGETHER_API_KEY` | Together auth | Using `together` provider | |\n| `AIMLAPI_API_KEY` | AIML API auth | Using `aimlapi` provider | OpenAI-compatible AIML API endpoint at `https://api.aimlapi.com/v1` |\n| `HUGGINGFACE_HUB_TOKEN` | Hugging Face auth | Using `huggingface` provider | Primary Hugging Face token env var |\n| `HF_TOKEN` | Hugging Face auth | Using `huggingface` provider | Fallback when `HUGGINGFACE_HUB_TOKEN` is unset |\n| `SYNTHETIC_API_KEY` | Synthetic auth | Using Synthetic models | |\n| `NVIDIA_API_KEY` | NVIDIA auth | Using `nvidia` provider | |\n| `NANO_GPT_API_KEY` | NanoGPT auth | Using `nanogpt` provider | |\n| `VENICE_API_KEY` | Venice auth | Using `venice` provider | |\n| `LITELLM_API_KEY` | LiteLLM auth | Using `litellm` provider | OpenAI-compatible LiteLLM proxy key |\n| `LM_STUDIO_API_KEY` | LM Studio auth (optional) | Using `lm-studio` provider with authenticated hosts | Local LM Studio usually runs without auth; any non-empty token works when a key is required |\n| `OLLAMA_API_KEY` | Ollama auth (optional) | Using `ollama` provider with authenticated hosts | Local Ollama usually runs without auth; any non-empty token works when a key is required |\n| `LLAMA_CPP_API_KEY` | llama.cpp auth (optional) | Using `llama.cpp` provider with authenticated hosts | Local llama.cpp usually runs without auth; any non-empty token works when a key is configured |\n| `XIAOMI_API_KEY` | Xiaomi MiMo auth | Using `xiaomi` provider | |\n| `MOONSHOT_API_KEY` | Moonshot auth | Using `moonshot` provider | |\n| `XAI_API_KEY` | xAI auth | Using xAI models or as fallback for `xai-oauth` | |\n| `XAI_OAUTH_TOKEN` | xAI OAuth/SuperGrok auth | Using `xai-oauth` provider | Takes precedence over `XAI_API_KEY` for `xai-oauth` |\n| `OPENROUTER_API_KEY` | OpenRouter auth | Using OpenRouter models | Also used by image tool when preferred/auto provider is OpenRouter |\n| `MISTRAL_API_KEY` | Mistral auth | Using Mistral models | |\n| `ZAI_API_KEY` | z.ai auth | Using z.ai models | Also used by z.ai web search provider |\n| `ZHIPU_API_KEY` | Zhipu Coding Plan auth | Using `zhipu-coding-plan` provider | |\n| `MINIMAX_API_KEY` | MiniMax auth | Using `minimax` provider | |\n| `MINIMAX_CODE_API_KEY` | MiniMax Code auth | Using `minimax-code` provider | |\n| `MINIMAX_CODE_CN_API_KEY` | MiniMax Code CN auth | Using `minimax-code-cn` provider | |\n| `OPENCODE_API_KEY` | OpenCode auth | Using `opencode-go` / `opencode-zen` models | |\n| `QIANFAN_API_KEY` | Qianfan auth | Using `qianfan` provider | |\n| `QWEN_OAUTH_TOKEN` | Qwen Portal auth | Using `qwen-portal` with OAuth token | Takes precedence over `QWEN_PORTAL_API_KEY` |\n| `QWEN_PORTAL_API_KEY` | Qwen Portal auth | Using `qwen-portal` with API key | Fallback after `QWEN_OAUTH_TOKEN` |\n| `ZENMUX_API_KEY` | ZenMux auth | Using `zenmux` provider | Used for ZenMux OpenAI and Anthropic-compatible routes |\n| `VLLM_API_KEY` | vLLM auth/discovery opt-in | Using `vllm` provider (local OpenAI-compatible servers) | Any non-empty value works for no-auth local servers |\n| `CURSOR_ACCESS_TOKEN` | Cursor provider auth | Using Cursor provider | |\n| `AI_GATEWAY_API_KEY` | Vercel AI Gateway auth | Using `vercel-ai-gateway` provider | |\n| `CLOUDFLARE_AI_GATEWAY_API_KEY` | Cloudflare AI Gateway auth | Using `cloudflare-ai-gateway` provider | Base URL must be configured as `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic` |\n| `ALIBABA_CODING_PLAN_API_KEY` | Alibaba Coding Plan auth | Using `alibaba-coding-plan` provider | |\n| `DEEPSEEK_API_KEY` | DeepSeek auth | Using DeepSeek models | |\n| `KILO_API_KEY` | Kilo auth | Using Kilo models | |\n| `OLLAMA_CLOUD_API_KEY` | Ollama Cloud auth | Using `ollama-cloud` provider | |\n| `WAFER_PASS_API_KEY` | Wafer Pass auth | Using `wafer-pass` provider | Flat-rate Wafer subscription; validated against `https://pass.wafer.ai/v1/models` |\n| `WAFER_SERVERLESS_API_KEY` | Wafer Serverless auth | Using `wafer-serverless` provider | Pay-as-you-go Wafer SKU; validated against `https://pass.wafer.ai/v1/models` |\n| `GITLAB_TOKEN` | GitLab Duo auth | Using `gitlab-duo` provider | |\n\n### GitHub/Copilot tokens\n\n| Variable | Used for | Notes |\n| ---------------------- | ------------------------------------------------ | ------------------------------------------ |\n| `COPILOT_GITHUB_TOKEN` | GitHub Copilot provider auth | Generic GitHub tokens are not used here |\n| `GH_TOKEN` | GitHub API auth in web scraper | Web scraper fallback after `GITHUB_TOKEN` |\n| `GITHUB_TOKEN` | GitHub API auth in web scraper | Web scraper checks this before `GH_TOKEN` |\n\n### Auth broker / auth gateway (remote credential vault)\n\nWhen the broker is enabled, the local SQLite credential store is bypassed and all OAuth refresh / access tokens live on the broker host. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full protocol, CLI surface, and 5-min/15-s usage cache layering.\n\n| Variable | Used for | Required when | Notes / precedence |\n| ----------------------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |\n| `OMP_AUTH_BROKER_URL` | Base URL of the remote auth-broker (e.g. `https://broker.tailnet:8765`); selects broker mode | Resolving credentials through a broker; also required by `omp auth-gateway serve` (the gateway is itself a broker client) | Wins over `auth.broker.url` in `config.yml`. When set with no resolvable token, `resolveAuthBrokerConfig()` hard-errors instead of falling back to local SQLite. |\n| `OMP_AUTH_BROKER_TOKEN` | Bearer token sent on every broker endpoint except `/v1/healthz` | `OMP_AUTH_BROKER_URL` is set and no token is available from `auth.broker.token` or `<config-dir>/auth-broker.token` | Resolution: this env → `auth.broker.token` (`$ENV_NAME` indirection supported) → `<config-dir>/auth-broker.token` (mode `0600`). `<config-dir>` is `~/.omp/` (respecting `PI_CONFIG_DIR`). |\n| `OMP_AUTH_BROKER_SNAPSHOT_TTL_MS` | Freshness window for the encrypted local broker snapshot cache | Optional in broker mode | Default `3600000` (1 h). Freshness is based on broker `snapshot.generatedAt`; `0` disables cache reads/writes and forces the old blocking fetch every startup. |\n| `OMP_AUTH_BROKER_SNAPSHOT_CACHE` | Path to the encrypted local broker snapshot cache | Optional in broker mode | Defaults to `~/.omp/cache/auth-broker-snapshot.enc` (or XDG cache equivalent). Useful for tests, ephemeral hosts, or relocating the `0600` cache file. |\n\nThe gateway has no dedicated env vars — it inherits `OMP_AUTH_BROKER_*`. Its own inbound bearer token lives at `<config-dir>/auth-gateway.token` and is managed via `omp auth-gateway token`.\n\n---\n\n## 2) Provider-specific runtime configuration\n\n### Anthropic Foundry Gateway (Azure / enterprise proxy)\n\nWhen `CLAUDE_CODE_USE_FOUNDRY` is enabled, Anthropic requests switch to Foundry mode:\n\n- Base URL resolves from `FOUNDRY_BASE_URL` (fallback remains model/default base URL if unset).\n- API key resolution for provider `anthropic` becomes:\n `ANTHROPIC_FOUNDRY_API_KEY` → `ANTHROPIC_OAUTH_TOKEN` → `ANTHROPIC_API_KEY`.\n- `ANTHROPIC_CUSTOM_HEADERS` is parsed as comma/newline-separated `key: value`\n pairs and merged into request headers. They are also forwarded when\n `ANTHROPIC_BASE_URL` points to a non-Anthropic host (e.g. a corporate API\n gateway), so enterprise gateways requiring proprietary auth headers work\n without enabling Foundry mode.\n- TLS client/server material can be injected from env values:\n `NODE_EXTRA_CA_CERTS`, `CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`.\n Each accepts either:\n - a filesystem path to PEM content, or\n - inline PEM (including escaped `\\n` sequences).\n\n| Variable | Value type | Behavior |\n| --------------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------- |\n| `CLAUDE_CODE_USE_FOUNDRY` | Boolean-like string (`1`, `true`, `yes`, `on`) | Enables Foundry mode for Anthropic provider |\n| `FOUNDRY_BASE_URL` | URL string | Anthropic endpoint base URL in Foundry mode |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Token string | Used for `Authorization: Bearer <token>` |\n| `ANTHROPIC_CUSTOM_HEADERS` | Header list string | Extra headers; format `header-a: value, header-b: value` or newline-separated. Also forwarded outside Foundry whenever `ANTHROPIC_BASE_URL` is non-Anthropic. |\n| `NODE_EXTRA_CA_CERTS` | PEM path or inline PEM | Extra CA chain for server certificate validation |\n| `CLAUDE_CODE_CLIENT_CERT` | PEM path or inline PEM | mTLS client certificate |\n| `CLAUDE_CODE_CLIENT_KEY` | PEM path or inline PEM | mTLS client private key (must be paired with cert) |\n\n### Amazon Bedrock\n\n| Variable | Default / behavior |\n| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- |\n| `AWS_REGION` | Primary region source |\n| `AWS_DEFAULT_REGION` | Fallback if `AWS_REGION` unset |\n| `AWS_PROFILE` | Enables named profile auth path |\n| `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | Enables IAM key auth path |\n| `AWS_BEARER_TOKEN_BEDROCK` | Highest-precedence bearer token auth path; skips AWS profile/credential-chain lookup when set |\n| `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` / `AWS_CONTAINER_CREDENTIALS_FULL_URI` | Enables ECS task credential path |\n| `AWS_WEB_IDENTITY_TOKEN_FILE` + `AWS_ROLE_ARN` | Enables web identity auth path |\n| `AWS_BEDROCK_SKIP_AUTH` | If `1`, injects dummy credentials (proxy/non-auth scenarios) |\n| `AWS_BEDROCK_FORCE_HTTP1` | If `1`, forces Node HTTP/1 request handler |\n| `HTTPS_PROXY` / `HTTP_PROXY` / `ALL_PROXY` | Routes Bedrock runtime and AWS SSO credential calls through the configured proxy using HTTP/1 |\n| `NO_PROXY` | Excludes matching hosts from proxy routing when a proxy variable is configured |\n\nRegion fallback in provider code: `options.region` → `AWS_REGION` → `AWS_DEFAULT_REGION` → `us-east-1`.\n\n### Azure OpenAI Responses\n\n| Variable | Default / behavior |\n| ---------------------------------- | --------------------------------------------------------------------------- |\n| `AZURE_OPENAI_API_KEY` | Required unless API key passed as option |\n| `AZURE_OPENAI_API_VERSION` | Default `v1` |\n| `AZURE_OPENAI_BASE_URL` | Direct base URL override |\n| `AZURE_OPENAI_RESOURCE_NAME` | Used to construct base URL: `https://<resource>.openai.azure.com/openai/v1` |\n| `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` | Optional mapping string: `modelId=deploymentName,model2=deployment2` |\n\nBase URL resolution: option `azureBaseUrl` → env `AZURE_OPENAI_BASE_URL` → option/env resource name → `model.baseUrl`.\n\n### Google Vertex AI\n\n| Variable | Required? | Notes |\n| -------------------------------- | ------------------------------ | ------------------------------------------------------------------------------------------------------------------------- |\n| `GOOGLE_CLOUD_PROJECT` | Yes (unless passed in options) | Primary project ID source |\n| `GCP_PROJECT` | Fallback | Alternate project ID source |\n| `GCLOUD_PROJECT` | Fallback | Alternate project ID source |\n| `GOOGLE_CLOUD_PROJECT_ID` | OAuth login helper only | Used by Gemini CLI OAuth project discovery |\n| `GOOGLE_VERTEX_LOCATION` | Yes (unless passed in options) | Primary Vertex location source |\n| `GOOGLE_CLOUD_LOCATION` | Fallback | Alternate Vertex location source |\n| `VERTEX_LOCATION` | Fallback | Alternate Vertex location source |\n| `GOOGLE_CLOUD_API_KEY` | Conditional | Direct Vertex API-key auth; otherwise ADC fallback can authenticate when project and location are set |\n| `GOOGLE_APPLICATION_CREDENTIALS` | Conditional | If set, file must exist; otherwise ADC fallback path is checked (`~/.config/gcloud/application_default_credentials.json`) |\n\n### Kimi\n\n| Variable | Default / behavior |\n| ---------------------- | -------------------------------------------------------- |\n| `KIMI_CODE_OAUTH_HOST` | Primary OAuth host override |\n| `KIMI_OAUTH_HOST` | Fallback OAuth host override |\n| `KIMI_CODE_BASE_URL` | Overrides Kimi usage endpoint base URL (`usage/kimi.ts`) |\n\nOAuth host chain: `KIMI_CODE_OAUTH_HOST` → `KIMI_OAUTH_HOST` → `https://auth.kimi.com`.\n\n### Gemini CLI compatibility\n\n| Variable | Default / behavior |\n| -------------------------- | --------------------------------------------------------------- |\n| `PI_AI_GEMINI_CLI_VERSION` | Overrides Gemini CLI user-agent version tag (`0.35.3` if unset) |\n\n### OpenAI Codex responses (feature/debug controls)\n\n| Variable | Behavior |\n| ------------------------------------------ | ---------------------------------------------------- |\n| `PI_CODEX_DEBUG` | `1`/`true` enables Codex provider debug logging |\n| `PI_CODEX_WEBSOCKET` | `1`/`true` enables websocket transport preference |\n| `PI_CODEX_WEBSOCKET_V2` | `1`/`true` enables websocket v2 path |\n| `PI_CODEX_WEBSOCKET_IDLE_TIMEOUT_MS` | Positive integer override (default 300000) |\n| `PI_CODEX_WEBSOCKET_RETRY_BUDGET` | Non-negative integer override (default 5) |\n| `PI_CODEX_WEBSOCKET_RETRY_DELAY_MS` | Positive integer base backoff override (default 500) |\n| `PI_OPENAI_STREAM_FIRST_EVENT_TIMEOUT_MS` | Positive integer OpenAI first-event timeout override |\n| `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS` | Positive integer OpenAI stream idle timeout override |\n\n### Cursor provider debug\n\n| Variable | Behavior |\n| ------------------ | ------------------------------------------------------------------------ |\n| `DEBUG_CURSOR` | Enables provider debug logs; `2`/`verbose` for detailed payload snippets |\n| `DEBUG_CURSOR_LOG` | Optional file path for JSONL debug log output |\n\n### Prompt cache compatibility switch\n\n| Variable | Behavior |\n| -------------------- | ----------------------------------------------------------------------------------------------------------------- |\n| `PI_CACHE_RETENTION` | If `long`, enables long retention where supported (`anthropic`, `openai-responses`, Bedrock retention resolution) |\n\n---\n\n## 3) Web search subsystem\n\n### Search provider credentials\n\n| Variable | Used by |\n| --------------------------------------------------- | ------------------------------------------------------------- |\n| `EXA_API_KEY` | Exa search provider and Exa MCP tools |\n| `BRAVE_API_KEY` | Brave search provider |\n| `PERPLEXITY_API_KEY` | Perplexity search provider API-key mode |\n| `PERPLEXITY_COOKIES` | Perplexity cookie-auth search mode |\n| `TAVILY_API_KEY` | Tavily search provider |\n| `ZAI_API_KEY` | z.ai search provider (also checks stored OAuth in `agent.db`) |\n| `OPENAI_API_KEY` / Codex OAuth in DB | Codex search provider availability/auth |\n| `PI_CODEX_WEB_SEARCH_MODEL` | Codex search provider model override |\n| `MOONSHOT_SEARCH_API_KEY` / `KIMI_SEARCH_API_KEY` | Kimi/Moonshot search provider env auth |\n| `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` | Kimi/Moonshot search endpoint override |\n| `KAGI_API_KEY` | Kagi search provider |\n| `JINA_API_KEY` | Jina search provider |\n| `PARALLEL_API_KEY` | Parallel search provider |\n| `SEARXNG_ENDPOINT`, `SEARXNG_TOKEN` | SearXNG endpoint and optional bearer token |\n| `SEARXNG_BASIC_USERNAME`, `SEARXNG_BASIC_PASSWORD` | SearXNG HTTP Basic Auth credentials |\n\nSearXNG also reads the equivalent `searxng.endpoint`, `searxng.token`, `searxng.basicUsername`, and `searxng.basicPassword` settings from `~/.omp/agent/config.yml`; environment variables are fallbacks.\n\n### Anthropic web search auth chain\n\n`searchAnthropic()` resolves credentials in this order:\n\n1. `ANTHROPIC_SEARCH_API_KEY`\n2. `authStorage.getApiKey(\"anthropic\")` fallback credentials (runtime/config overrides, stored API-key credentials, stored OAuth credentials, then generic Anthropic env fallback: `ANTHROPIC_FOUNDRY_API_KEY` in Foundry mode, otherwise `ANTHROPIC_OAUTH_TOKEN` / `ANTHROPIC_API_KEY`)\n\nFor either credential path, base URL resolution is:\n\n1. `ANTHROPIC_SEARCH_BASE_URL`\n2. `FOUNDRY_BASE_URL` when `CLAUDE_CODE_USE_FOUNDRY` is enabled\n3. `ANTHROPIC_BASE_URL`\n4. `https://api.anthropic.com`\n\nRelated vars:\n\n| Variable | Default / behavior |\n| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_SEARCH_API_KEY` | API key used exclusively for the Anthropic web search provider. Highest-priority search auth; overrides `ANTHROPIC_API_KEY` / OAuth / Foundry for search calls without affecting chat completions. |\n| `ANTHROPIC_SEARCH_BASE_URL` | Base URL used exclusively for the Anthropic web search provider. Applied to either `ANTHROPIC_SEARCH_API_KEY` or fallback Anthropic credentials; overrides `ANTHROPIC_BASE_URL` (and `FOUNDRY_BASE_URL` in Foundry mode) for search calls. |\n| `ANTHROPIC_SEARCH_MODEL` | Search model override. Defaults to `claude-haiku-4-5`. |\n| `ANTHROPIC_BASE_URL` | Generic fallback base URL for Anthropic requests when no search-specific base URL is set. |\n\nUse `ANTHROPIC_SEARCH_BASE_URL` (optionally with `ANTHROPIC_SEARCH_API_KEY`) to keep chat routed through an enterprise gateway (`ANTHROPIC_BASE_URL` or `CLAUDE_CODE_USE_FOUNDRY=true`) while pointing web search at a direct Anthropic endpoint, or vice versa.\n\n### Perplexity OAuth flow behavior flag\n\n| Variable | Behavior |\n| ------------------- | ------------------------------------------------------------------------------- |\n| `PI_AUTH_NO_BORROW` | If set, disables macOS native-app token borrowing path in Perplexity login flow |\n\n---\n\n## 4) Python tooling and kernel runtime\n\n| Variable | Default / behavior |\n| ----------------------- | ------------------------------------------------------------------------------------------------------------------- |\n| `PI_PY` | Eval backend override: `0`/`bash`=JavaScript only, `1`/`py`=Python only, `mix`/`both`=both; invalid values ignored |\n| `PI_PYTHON_SKIP_CHECK` | If `1`, skips Python interpreter availability checks (subprocess runner still starts on demand) |\n| `PI_PYTHON_INTEGRATION` | If `1`, opts gated integration tests in (e.g. `python-runner.integration.test.ts`) into running against real Python |\n| `PI_PYTHON_IPC_TRACE` | If `1`, logs NDJSON frames exchanged with the Python runner subprocess |\n| `VIRTUAL_ENV` | Highest-priority venv path for Python runtime resolution |\n\nExtra conditional behavior:\n\n- If `BUN_ENV=test` or `NODE_ENV=test`, Python availability checks are treated as OK and warming is skipped.\n- Python env filtering denies common API keys and allows safe base vars + `LC_`, `XDG_`, `PI_` prefixes.\n\n---\n\n## 5) Agent/runtime behavior toggles\n\n| Variable | Default / behavior |\n| ---------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `PI_SMOL_MODEL` | Ephemeral model-role override for `smol` (CLI `--smol` takes precedence) |\n| `PI_SLOW_MODEL` | Ephemeral model-role override for `slow` (CLI `--slow` takes precedence) |\n| `PI_PLAN_MODEL` | Ephemeral model-role override for `plan` (CLI `--plan` takes precedence) |\n| `PI_NO_TITLE` | If set (any non-empty value), disables auto session title generation on first user message |\n| `PI_TINY_DEVICE` | ONNX execution provider for local tiny models; overrides the `providers.tinyModelDevice` setting (default: CPU; supports `cpu`, `gpu`, `metal`/`webgpu`, `auto`, `cuda`, `dml`, `coreml`, `wasm`, `webnn`, `webnn-gpu`, `webnn-cpu`, `webnn-npu`) |\n| `PI_TINY_DTYPE` | ONNX quantization/precision for local tiny models; overrides the `providers.tinyModelDtype` setting (default: each model's shipped dtype, currently `q4`; supports `auto`, `fp32`, `fp16`, `q8`, `int8`, `uint8`, `q4`, `bnb4`, `q4f16`, `q2`, `q2f16`, `q1`, `q1f16`) |\n| `PI_NO_INTERLEAVED_THINKING` | If `1`, disables Anthropic interleaved thinking budget behavior and uses output-token inflation for older thinking mode |\n| `NULL_PROMPT` | If `true`, system prompt builder returns empty string |\n| `PI_BLOCKED_AGENT` | Blocks a specific subagent type in task tool |\n| `PI_SUBPROCESS_CMD` | Overrides subagent spawn command (`omp` / `omp.cmd` resolution bypass) |\n| `PI_TASK_MAX_OUTPUT_BYTES` | Max captured output bytes per subagent (default `500000`) |\n| `PI_TASK_MAX_OUTPUT_LINES` | Max captured output lines per subagent (default `5000`) |\n| `PI_TIMING` | If set (any non-empty value), prints a hierarchical timing-span tree to **stderr** via `logger.printTimings()`. In interactive mode the tree prints once the agent is ready (before the TUI starts); in print mode it prints after the whole prompt batch completes. Print-mode prompts are wrapped in `print:prompt:initial` / `print:prompt:next` spans so each user message shows up as its own row. `PI_TIMING=x` exits the process with code 0 right after printing in interactive mode (use to measure cold startup only). `PI_TIMING=full` lists every module-load entry instead of just the top N. |\n| `PI_DEBUG_STARTUP` | If set (any non-empty value), streams one synchronous `[startup] <phase>:start` / `:done` marker line to **stderr** as each startup phase begins/ends — including command-module imports (`cli:load:<name>`) and the native addon extraction/`dlopen` (`native:*`). Unlike `PI_TIMING` (which prints only once startup completes), the markers survive a hard hang: the last line on stderr names the phase the process is stuck in. Combine with `PI_TIMING` freely; markers and the span tree share the same phase names. |\n| `PI_PACKAGE_DIR` | Overrides package asset base dir resolution (`docs/`, `examples/`, `CHANGELOG.md`) |\n| `PI_DISABLE_LSPMUX` | If `1`, disables lspmux detection/integration and forces direct LSP server spawning |\n| `PI_RPC_EMIT_TITLE` | Boolean-like flag enabling title events in RPC mode |\n| `SMITHERY_URL` | Smithery web URL override (default `https://smithery.ai`) |\n| `SMITHERY_API_URL` | Smithery API base URL override (default `https://api.smithery.ai`) |\n| `SMITHERY_API_KEY` | Smithery API key for managed MCP auth lookup |\n| `PUPPETEER_EXECUTABLE_PATH` | Browser tool Chromium executable override |\n| `LM_STUDIO_BASE_URL` | Default implicit LM Studio discovery base URL override (`http://127.0.0.1:1234/v1` if unset) |\n| `OLLAMA_BASE_URL` | Default implicit Ollama discovery base URL override (`OLLAMA_HOST` if unset, then `http://127.0.0.1:11434`) |\n| `OLLAMA_HOST` | Ollama host used for implicit Ollama discovery when `OLLAMA_BASE_URL` is unset; accepts Ollama-style values such as `127.0.0.1:11434` or `http://host:11434` |\n| `OLLAMA_CONTEXT_LENGTH` | Positive integer context-window override for implicit Ollama discovery; affects OMP context budgeting only and does not change Ollama's runtime `num_ctx` |\n| `LLAMA_CPP_BASE_URL` | Default implicit Llama.cpp discovery base URL override (`http://127.0.0.1:8080` if unset) |\n| `PI_EDIT_VARIANT` | Forces edit tool variant when valid (`patch`, `replace`, `hashline`, `apply_patch`) |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces supported image protocol (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) where used |\n| `PI_ALLOW_SIXEL_PASSTHROUGH` | Allows SIXEL passthrough when `PI_FORCE_IMAGE_PROTOCOL=sixel` |\n| `PI_NO_PTY` | If `1`, disables interactive PTY path for bash tool |\n| `OMP_MCP_TIMEOUT_MS` | Overrides MCP client request timeout (ms) for every MCP server. `0` disables client-side timeouts (`AbortSignal` never fires). Invalid (negative or non-numeric) values are ignored with a warning and the per-server config or default (`30000`) is used. |\n\n`PI_NO_PTY` is also set internally when CLI `--no-pty` is used.\n\n---\n\n## 6) Storage and config root paths\n\nThese are consumed via `@oh-my-pi/pi-utils/dirs` and affect where coding-agent stores data.\n\n| Variable | Default / behavior |\n| --------------------- | ----------------------------------------------------------------------------- |\n| `PI_CONFIG_DIR` | Config root dirname under home (default `.omp`) |\n| `PI_CODING_AGENT_DIR` | Full override for agent directory (default `~/<PI_CONFIG_DIR or .omp>/agent`) |\n| `PWD` | Used when matching canonical current working directory in path helpers |\n\n---\n\n## 7) Shell/tool execution environment\n\n(From `packages/utils/src/procmgr.ts` and coding-agent bash tool integration.)\n\n| Variable | Behavior |\n| -------------------------- | ------------------------------------------------------------------------------ |\n| `PI_BASH_NO_CI` | Suppresses automatic `CI=true` injection into spawned shell env |\n| `CLAUDE_BASH_NO_CI` | Legacy alias fallback for `PI_BASH_NO_CI` |\n| `PI_BASH_NO_LOGIN` | Disables login-shell mode; shell args become `['-c']` instead of `['-l','-c']` |\n| `CLAUDE_BASH_NO_LOGIN` | Legacy alias fallback for `PI_BASH_NO_LOGIN` |\n| `PI_SHELL_PREFIX` | Optional command prefix wrapper |\n| `CLAUDE_CODE_SHELL_PREFIX` | Legacy alias fallback for `PI_SHELL_PREFIX` |\n| `VISUAL` | Preferred external editor command |\n| `EDITOR` | Fallback external editor command |\n\nCurrent implementation: `PI_BASH_NO_LOGIN`/`CLAUDE_BASH_NO_LOGIN` are active; when either is set, `getShellArgs()` returns `['-c']`.\n\n---\n\n## 8) UI/theme/session detection (auto-detected env)\n\nThese are read as runtime signals; they are usually set by the terminal/OS rather than manually configured.\n\n| Variable | Used for |\n| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------- |\n| `COLORTERM`, `TERM`, `WT_SESSION` | Color capability detection (theme color mode) |\n| `COLORFGBG` | Terminal background light/dark auto-detection |\n| `TERM_PROGRAM`, `TERM_PROGRAM_VERSION`, `TERMINAL_EMULATOR` | Terminal identity in system prompt/context |\n| `KDE_FULL_SESSION`, `XDG_CURRENT_DESKTOP`, `DESKTOP_SESSION`, `XDG_SESSION_DESKTOP`, `GDMSESSION`, `WINDOWMANAGER` | Desktop/window-manager detection in system prompt/context |\n| `TMUX_PANE`, `CMUX_SURFACE_ID`, `KITTY_WINDOW_ID`, `TERM_SESSION_ID`, `WT_SESSION` | Stable per-terminal session breadcrumb IDs |\n| `SHELL`, `ComSpec`, `TERM_PROGRAM`, `TERM` | System info diagnostics |\n| `APPDATA`, `XDG_CONFIG_HOME` | lspmux config path resolution |\n| `HOME` | Path shortening in MCP command UI |\n\n---\n\n## 9) TUI runtime flags (shared package, affects coding-agent UX)\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------------------------- |\n| `PI_NOTIFICATIONS` | `off` / `0` / `false` suppress desktop notifications |\n| `PI_TUI_WRITE_LOG` | If set, logs TUI writes to file |\n| `PI_HARDWARE_CURSOR` | If `1`, enables hardware cursor mode |\n| `PI_NO_SYNC_OUTPUT` | If `1`, disables DEC 2026 synchronized-output wrappers while keeping TUI autowrap guards |\n| `PI_NO_DECCARA` | If set (truthy), disables Kitty DECCARA rectangular-SGR background fills (forces padded-string rendering) |\n| `PI_DEBUG_REDRAW` | If `1`, enables redraw debug logging |\n| `PI_FORCE_IMAGE_PROTOCOL` | Forces terminal image protocol detection (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) |\n\n---\n\n## 10) Commit generation controls\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------- |\n| `PI_COMMIT_TEST_FALLBACK` | If `true` (case-insensitive), force commit fallback generation path |\n| `PI_COMMIT_NO_FALLBACK` | If `true`, disables fallback when agent returns no proposal |\n| `PI_COMMIT_MAP_REDUCE` | If `false`, disables map-reduce commit analysis path |\n| `DEBUG` | If set, commit agent error stack traces are printed |\n\n---\n\n## Security-sensitive variables\n\nTreat these as secrets; do not log or commit them:\n\n- Provider/API keys and OAuth/bearer credentials (all `*_API_KEY`, `*_TOKEN`, OAuth access/refresh tokens)\n- Cloud credentials (`AWS_*`, `GOOGLE_APPLICATION_CREDENTIALS` path may expose service-account material)\n- Search/provider auth vars (`EXA_API_KEY`, `BRAVE_API_KEY`, `PERPLEXITY_API_KEY`, Anthropic search keys)\n- Foundry mTLS material (`CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS` when it points to private CA bundles)\n\nPython runtime also explicitly strips many common key vars before spawning kernel subprocesses (`packages/coding-agent/src/eval/py/runtime.ts`).\n",
|
|
17
|
+
"extension-loading.md": "# Extension Loading (TypeScript/JavaScript Modules)\n\nThis document covers how the coding agent discovers and loads **extension modules** (`.ts`/`.js`) at startup.\n\nIt does **not** cover `gemini-extension.json` manifest extensions (documented separately).\n\n## What this subsystem does\n\nExtension loading builds a list of module entry files, imports each module with Bun, executes its factory, and returns:\n\n- loaded extension definitions\n- per-path load errors (without aborting the whole load)\n- a shared extension runtime object used later by `ExtensionRunner`\n\n## Primary implementation files\n\n- `src/extensibility/extensions/loader.ts` — path discovery + import/execution\n- `src/extensibility/extensions/index.ts` — public exports\n- `src/extensibility/extensions/runner.ts` — runtime/event execution after load\n- `src/discovery/builtin.ts` — native auto-discovery provider for extension modules\n- `src/config/settings.ts` — loads merged `extensions` / `disabledExtensions` settings\n\n---\n\n## Inputs to extension loading\n\n### 1) Auto-discovered native extension modules\n\n`discoverAndLoadExtensions()` first asks discovery providers for `extension-module` capability items, then keeps only provider `native` items.\n\nNative `extension-module` discovery comes from:\n\n- Project directory: `<cwd>/.omp/extensions`\n- User directory: `~/.omp/agent/extensions`\n- Native legacy/settings JSON entries: `<cwd>/.omp/settings.json#extensions` and `~/.omp/agent/settings.json#extensions`\n\nPath roots come from the native provider (`SOURCE_PATHS.native`). Project lookup is cwd-only for these native roots; it does not walk ancestors.\n\nNotes:\n\n- Native auto-discovery is currently `.omp` based.\n- Legacy `.pi` is still accepted in package manifests (`pi.extensions`) and project override lookup, but `.pi/extensions` is not a native root here.\n\n### 2) Installed plugin extension entries\n\nAfter native auto-discovery, `discoverAndLoadExtensions()` appends extension entry points from enabled installed plugins via `getAllPluginExtensionPaths(cwd)`.\n\nPlugin extension entries come from package `omp.extensions` / `pi.extensions` manifests, including enabled feature entries.\n\n### 3) Explicitly configured paths\n\nAfter plugin extension entries, configured paths are appended and resolved.\n\nConfigured path sources in the main session startup path (`sdk.ts`):\n\n1. CLI-provided paths (`--extension/-e`, and `--hook` is also treated as an extension path)\n2. Merged settings `extensions` array\n\nSettings files:\n\n- User: `~/.omp/agent/config.yml` (or custom agent dir via `PI_CODING_AGENT_DIR`)\n- Project/native settings capability: `<cwd>/.omp/config.yml` and `<cwd>/.omp/settings.json`\n\nNative extension-module discovery also reads legacy JSON extension lists from:\n\n- `~/.omp/agent/settings.json`\n- `<cwd>/.omp/settings.json`\n\nExamples:\n\n```yaml\n# ~/.omp/agent/config.yml\nextensions:\n - ~/my-exts/safety.ts\n - ./local/ext-pack\n```\n\n```json\n{\n \"extensions\": [\"./.omp/extensions/my-extra\"]\n}\n```\n\n---\n\n## Enable/disable controls\n\n### Disable discovery\n\n- CLI: `--no-extensions`\n- SDK option: `disableExtensionDiscovery`\n\nBehavior split:\n\n- SDK: when `disableExtensionDiscovery=true`, it still loads `additionalExtensionPaths` via `loadExtensions()`.\n- CLI path building (`main.ts`) currently clears CLI extension paths when `--no-extensions` is set, so explicit `-e/--hook` are not forwarded in that mode.\n\n### Disable specific extension modules\n\n`disabledExtensions` setting filters by extension id format:\n\n- `extension-module:<derivedName>`\n\n`derivedName` is based on entry path (`getExtensionNameFromPath`), for example:\n\n- `/x/foo.ts` -> `foo`\n- `/x/bar/index.ts` -> `bar`\n\nExample:\n\n```yaml\ndisabledExtensions:\n - extension-module:foo\n```\n\n---\n\n## Path and entry resolution\n\n### Path normalization\n\nFor configured paths:\n\n1. Normalize unicode spaces\n2. Expand `~`\n3. If relative, resolve against current `cwd`\n\n### If configured path is a file\n\nIt is used directly as a module entry candidate.\n\n### If configured path is a directory\n\nResolution order:\n\n1. `package.json` in that directory with `omp.extensions` (or legacy `pi.extensions`) -> use declared entries\n2. `index.ts`\n3. `index.js`\n4. Otherwise scan one level for extension entries:\n - direct `*.ts` / `*.js`\n - subdir `index.ts` / `index.js`\n - subdir `package.json` with `omp.extensions` / `pi.extensions`\n\nRules and constraints:\n\n- no recursive discovery beyond one subdirectory level\n- declared `extensions` manifest entries are resolved relative to that package directory\n- declared entries are included only if file exists/access is allowed\n- in `*/index.{ts,js}` pairs, TypeScript is preferred over JavaScript\n- symlinks are treated as eligible files/directories\n\n### Ignore behavior differs by source\n\n- Native auto-discovery (`discoverExtensionModulePaths` in discovery helpers) uses native glob with `gitignore: true` and `hidden: false`.\n- Explicit configured directory scanning in `loader.ts` uses `readdir` rules and does **not** apply gitignore filtering.\n\n---\n\n## Load order and precedence\n\n`discoverAndLoadExtensions()` builds one ordered list and then calls `loadExtensions()`.\n\nOrder:\n\n1. Native auto-discovered modules\n2. Installed plugin extension entries\n3. Explicit configured paths (in provided order)\n\nIn `sdk.ts`, configured order is:\n\n1. CLI additional paths\n2. Settings `extensions`\n\nDe-duplication:\n\n- absolute path based\n- first seen path wins\n- later duplicates are ignored\n\nImplication: if the same module path is both auto-discovered and explicitly configured, it is loaded once at the first position (auto-discovered stage).\n\n---\n\n## Module import and factory contract\n\nEach candidate path is loaded with dynamic import:\n\n- `await import(resolvedPath)`\n- factory is `module.default ?? module`\n- factory must be a function (`ExtensionFactory`)\n\nIf export is not a function, that path fails with a structured error and loading continues.\n\n---\n\n## Failure handling and isolation\n\n### During loading\n\nPer extension path, failures are captured as `{ path, error }` and do not stop other paths from loading.\n\nCommon cases:\n\n- import failure / missing file\n- invalid factory export (non-function)\n- exception thrown while executing factory\n\n### Runtime isolation model\n\n- Extensions are **not sandboxed** (same process/runtime).\n- They share one `EventBus` and one `ExtensionRuntime` instance.\n- During load, runtime action methods intentionally throw `ExtensionRuntimeNotInitializedError`; action wiring happens later in `ExtensionRunner.initialize()`.\n\n### After loading\n\nWhen events run through `ExtensionRunner`, handler exceptions are caught and emitted as extension errors instead of crashing the runner loop.\n\n---\n\n## Minimal user/project layout examples\n\n### User-level\n\n```text\n~/.omp/agent/\n config.yml\n extensions/\n guardrails.ts\n audit/\n index.ts\n```\n\n### Project-level\n\n```text\n<repo>/\n .omp/\n settings.json\n extensions/\n checks/\n package.json\n lint-gates.ts\n```\n\n`checks/package.json`:\n\n```json\n{\n \"omp\": {\n \"extensions\": [\"./src/check-a.ts\", \"./src/check-b.js\"]\n }\n}\n```\n\nLegacy manifest key still accepted:\n\n```json\n{\n \"pi\": {\n \"extensions\": [\"./index.ts\"]\n }\n}\n```\n",
|
|
18
|
+
"extensions.md": "# Extensions\n\nPrimary guide for authoring runtime extensions in `packages/coding-agent`.\n\nThis document covers the current extension runtime in:\n\n- `src/extensibility/extensions/types.ts`\n- `src/extensibility/extensions/runner.ts`\n- `src/extensibility/extensions/wrapper.ts`\n- `src/extensibility/extensions/index.ts`\n- `src/modes/controllers/extension-ui-controller.ts`\n\nFor discovery paths and filesystem loading rules, see [`extension-loading.md`](./extension-loading.md).\n\n## What an extension is\n\nAn extension is a TS/JS module exporting a default factory:\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI) {\n // register handlers/tools/commands/renderers\n}\n```\n\nExtensions can combine all of the following in one module:\n\n- event handlers (`pi.on(...)`)\n- LLM-callable tools (`pi.registerTool(...)`)\n- slash commands (`pi.registerCommand(...)`)\n- keyboard shortcuts and flags\n- custom message rendering\n- session/message injection APIs (`sendMessage`, `sendUserMessage`, `appendEntry`)\n\n## Runtime model\n\n1. Extensions are imported and their factory functions run.\n2. During that load phase, registration methods are valid; runtime action methods are not yet initialized.\n3. `ExtensionRunner.initialize(...)` wires live actions/contexts for the active mode.\n4. Session/agent/tool lifecycle events are emitted to handlers.\n5. Every tool execution is wrapped with extension interception (`tool_call` / `tool_result`).\n\n```text\nExtension lifecycle (simplified)\n\nload paths\n │\n ▼\nimport module + run factory (registration only)\n │\n ▼\nExtensionRunner.initialize(mode/session/tool registry)\n │\n ├─ emit session/agent events to handlers\n ├─ wrap tool execution (tool_call/tool_result)\n └─ expose runtime actions (sendMessage, setActiveTools, ...)\n```\n\nImportant constraint from `loader.ts`:\n\n- calling action methods like `pi.sendMessage()` during extension load throws `ExtensionRuntimeNotInitializedError`\n- register first; perform runtime behavior from events/commands/tools\n\n## Quick start\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function (pi: ExtensionAPI) {\n const { z } = pi.zod;\n\n pi.setLabel(\"Safety + Utilities\");\n\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(`Extension loaded in ${ctx.cwd}`, \"info\");\n });\n\n pi.on(\"tool_call\", async (event) => {\n if (event.toolName === \"bash\" && event.input.command?.includes(\"rm -rf\")) {\n return { block: true, reason: \"Blocked by extension policy\" };\n }\n });\n\n pi.registerTool({\n name: \"hello_extension\",\n label: \"Hello Extension\",\n description: \"Return a greeting\",\n parameters: z.object({ name: z.string() }),\n async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {\n return {\n content: [{ type: \"text\", text: `Hello, ${params.name}` }],\n details: { greeted: params.name },\n };\n },\n });\n\n pi.registerCommand(\"hello-ext\", {\n description: \"Show queue state\",\n handler: async (_args, ctx) => {\n ctx.ui.notify(`pending=${ctx.hasPendingMessages()}`, \"info\");\n },\n });\n}\n```\n\n## Extension API surfaces\n\n## 1) Registration and actions (`ExtensionAPI`)\n\nCore methods:\n\n- `on(event, handler)`\n- `registerTool`, `registerCommand`, `registerShortcut`, `registerFlag`\n- `registerMessageRenderer`, `registerAssistantThinkingRenderer`\n- `setLabel`, `getFlag`\n- `sendMessage`, `sendUserMessage`, `appendEntry`, `exec`\n- `getActiveTools`, `getAllTools`, `setActiveTools`\n- `getCommands`\n- `getSessionName`, `setSessionName`\n- `setModel`, `getThinkingLevel`, `setThinkingLevel`\n- `registerProvider`\n- `events` (shared event bus)\n\nIn interactive mode, `input` handlers run before the built-in first-message auto-title check. Extensions that call `await pi.setSessionName(...)` from `input` can set the persisted session name and prevent the default auto-generated title from running for that session.\n\nAlso exposed:\n\n- `pi.logger`\n- `pi.typebox` (zod-backed compatibility shim for legacy TypeBox-style schemas)\n- `pi.zod` (injected `zod/v4` module — canonical for tool parameter schemas)\n- `pi.pi` (package exports)\n\n### Message delivery semantics\n\n`pi.sendMessage(message, options)` supports:\n\n- `deliverAs: \"steer\"` (default) — interrupts current run\n- `deliverAs: \"followUp\"` — queued to run after current run\n- `deliverAs: \"nextTurn\"` — stored and injected on the next user prompt\n- `triggerTurn: true` — starts a turn when idle (`nextTurn` ignores this)\n\n`pi.sendUserMessage(content, { deliverAs })` always goes through prompt flow; while streaming it queues as steer/follow-up.\n\n## 2) Handler context (`ExtensionContext`)\n\nHandlers and tool `execute` receive `ctx` with:\n\n- `ui`\n- `hasUI`\n- `cwd`\n- `sessionManager` (read-only)\n- `modelRegistry`, `model`\n- `getContextUsage()`\n- `compact(...)`\n- `isIdle()`, `hasPendingMessages()`, `abort()`\n- `shutdown()`\n- `getSystemPrompt()`\n\n## 3) Command context (`ExtensionCommandContext`)\n\nCommand handlers additionally get:\n\n- `waitForIdle()`\n- `newSession(...)`\n- `switchSession(...)`\n- `branch(entryId)`\n- `navigateTree(targetId, { summarize })`\n- `reload()`\n\nUse command context for session-control flows; these methods are intentionally separated from general event handlers.\n\n## Event surface (current names and behavior)\n\nCanonical event unions and payload types are in `types.ts`.\n\n### Session lifecycle\n\n- `session_start`\n- `session_before_switch` / `session_switch`\n- `session_before_branch` / `session_branch`\n- `session_before_compact` / `session.compacting` / `session_compact`\n- `session_before_tree` / `session_tree`\n- `session_shutdown`\n\nCancelable pre-events:\n\n- `session_before_switch` → `{ cancel?: boolean }`\n- `session_before_branch` → `{ cancel?: boolean; skipConversationRestore?: boolean }`\n- `session_before_compact` → `{ cancel?: boolean; compaction?: CompactionResult }`\n- `session_before_tree` → `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }`\n\n### Prompt and turn lifecycle\n\n- `input`\n- `before_agent_start`\n- `before_provider_request` (may replace provider request payload)\n- `after_provider_response`\n- `context`\n- `agent_start` / `agent_end`\n- `turn_start` / `turn_end`\n- `message_start` / `message_update` / `message_end`\n\n### Tool lifecycle\n\n- `tool_call` (pre-exec, may block)\n- `tool_result` (post-exec, may patch content/details/isError)\n- `tool_execution_start` / `tool_execution_update` / `tool_execution_end` (observability)\n\n`tool_result` is middleware-style: handlers run in extension order and each sees prior modifications.\n\n### Reliability/runtime signals\n\n- `auto_compaction_start` / `auto_compaction_end`\n- `auto_retry_start` / `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n- `goal_updated`\n- `credential_disabled`\n\n### User command interception\n\n- `user_bash` (override with `{ result }`)\n- `user_python` (override with `{ result }`)\n\n### `resources_discover`\n\n`resources_discover` exists in extension types and `ExtensionRunner`.\nCurrent runtime note: `ExtensionRunner.emitResourcesDiscover(...)` is implemented, but there are no `AgentSession` callsites invoking it in the current codebase.\n\n## Tool authoring details\n\n`registerTool` uses `ToolDefinition` from `types.ts`.\n\nCurrent `execute` signature:\n\n```ts\nexecute(\n\ttoolCallId,\n\tparams,\n\tsignal,\n\tonUpdate,\n\tctx,\n): Promise<AgentToolResult>\n```\n\nTemplate:\n\n```ts\nconst { z } = pi.zod;\n\npi.registerTool({\n name: \"my_tool\",\n label: \"My Tool\",\n description: \"...\",\n parameters: z.object({}),\n hidden: false,\n defaultInactive: false,\n deferrable: false,\n async execute(_id, _params, signal, onUpdate, ctx) {\n if (signal?.aborted) {\n return { content: [{ type: \"text\", text: \"Cancelled\" }] };\n }\n onUpdate?.({ content: [{ type: \"text\", text: \"Working...\" }] });\n return { content: [{ type: \"text\", text: \"Done\" }], details: {} };\n },\n onSession(event, ctx) {\n // reason: start|switch|branch|tree|shutdown\n },\n renderCall(args, options, theme) {\n // optional TUI render\n },\n renderResult(result, options, theme, args) {\n // optional TUI render\n },\n});\n```\n\n`tool_call`/`tool_result` intercept all tools once the registry is wrapped in `sdk.ts`, including built-ins and extension/custom tools. `ToolDefinition` also supports optional `hidden`, `defaultInactive`, `deferrable`, `mcpServerName`, `mcpToolName`, `renderCall`, and `renderResult` fields.\n\n## UI integration points\n\n`ctx.ui` implements the `ExtensionUIContext` interface. Support differs by mode.\n\n### Interactive mode (`extension-ui-controller.ts`)\n\nSupported:\n\n- dialogs: `select`, `confirm`, `input`, `editor`\n- input editing: `setEditorText`, `getEditorText`, `pasteToEditor`, `editor`\n- terminal title and working message (`setTitle`, `setWorkingMessage`)\n- notifications/status/editor text/terminal input/custom overlays\n- theme listing/loading by name (`setTheme` supports string names)\n- tools expanded toggle\n\nCurrent no-op methods in this controller:\n\n- `setFooter`\n- `setHeader`\n- `setEditorComponent`\n\nAlso note: `setWidget` currently routes to status-line text via `setHookWidget(...)`.\n\n### RPC mode (`rpc-mode.ts`)\n\n`ctx.ui` is backed by RPC `extension_ui_request` events:\n\n- dialog methods (`select`, `confirm`, `input`, `editor`) round-trip to client responses\n- fire-and-forget methods emit requests (`notify`, `setStatus`, `setWidget` for string arrays, `setTitle`, `setEditorText`)\n\nUnsupported/no-op in RPC implementation:\n\n- `onTerminalInput`\n- `custom`\n- `setFooter`, `setHeader`, `setEditorComponent`\n- `setWorkingMessage`\n- theme switching/loading (`setTheme` returns failure)\n- tool expansion controls are inert\n\n### Print/headless/subagent paths\n\nWhen no UI context is supplied to runner init, `ctx.hasUI` is `false` and methods are no-op/default-returning.\n\n### Background interactive mode\n\nBackground mode installs a non-interactive UI context object. In current implementation, `ctx.hasUI` may still be `true` while interactive dialogs return defaults/no-op behavior.\n\n## Session and state patterns\n\nFor durable extension state:\n\n1. Persist with `pi.appendEntry(customType, data)`.\n2. Rebuild state from `ctx.sessionManager.getBranch()` on `session_start`, `session_branch`, `session_tree`.\n3. Keep tool result `details` structured when state should be visible/reconstructible from tool result history.\n\nExample reconstruction pattern:\n\n```ts\npi.on(\"session_start\", async (_event, ctx) => {\n let latest;\n for (const entry of ctx.sessionManager.getBranch()) {\n if (entry.type === \"custom\" && entry.customType === \"my-state\") {\n latest = entry.data;\n }\n }\n // restore from latest\n});\n```\n\n## Rendering extension points\n\n## Custom message renderer\n\n```ts\npi.registerMessageRenderer(\"my-type\", (message, { expanded }, theme) => {\n // return pi-tui Component\n});\n```\n\nUsed by interactive rendering when custom messages are displayed.\n\n## Assistant thinking renderer\n\n```ts\nimport { Container, Text } from \"@oh-my-pi/pi-tui\";\n\npi.registerAssistantThinkingRenderer((context, theme) => {\n const container = new Container();\n container.addChild(new Text(theme.fg(\"dim\", `thinking chars: ${context.text.length}`), 1, 0));\n return container;\n});\n```\n\nUsed by interactive rendering to add display-only supplemental UI below each visible assistant thinking block. The renderer receives the already-visible thinking text, content/thinking indexes, theme, and a `requestRender()` callback for async renderers. All registered renderers that return a component are appended in registration order. Renderers must not mutate messages; the original thinking block remains the provider/session source of truth.\n\n## Tool call/result renderer\n\nProvide `renderCall` / `renderResult` on `registerTool` definitions for custom tool visualization in TUI.\n\n## Constraints and pitfalls\n\n- Runtime actions are unavailable during extension load.\n- `tool_call` errors block execution (fail-closed).\n- Command name conflicts with built-ins are skipped with diagnostics.\n- Reserved shortcuts are ignored (`ctrl+c`, `ctrl+d`, `ctrl+z`, `ctrl+k`, `ctrl+p`, `ctrl+l`, `ctrl+o`, `ctrl+t`, `ctrl+g`, `ctrl+q`, `alt+m`, `shift+tab`, `shift+ctrl+p`, `alt+enter`, `escape`, `enter`).\n- Treat `ctx.reload()` as terminal for the current command handler frame.\n\n## Extensions vs hooks vs custom-tools\n\nUse the right surface:\n\n- **Extensions** (`src/extensibility/extensions/*`): unified system (events + tools + commands + renderers + provider registration).\n- **Hooks** (`src/extensibility/hooks/*`): separate legacy event API.\n- **Custom-tools** (`src/extensibility/custom-tools/*`): tool-focused modules; when loaded alongside extensions they are adapted and still pass through extension interception wrappers.\n\nIf you need one package that owns policy, tools, command UX, and rendering together, use extensions.\n",
|
|
19
|
+
"fs-scan-cache-architecture.md": "# Filesystem Scan Cache Architecture Contract\n\nThis document defines the current contract for the shared filesystem scan cache implemented in Rust (`crates/pi-natives/src/fs_cache.rs`) and consumed by native discovery/search APIs exposed to `packages/coding-agent`.\n\n## What this cache is\n\nThe cache stores full directory-scan entry lists (`GlobMatch[]`) keyed by scan scope, traversal policy, and requested metadata detail. Higher-level operations (`glob` filtering, `fuzzyFind` scoring, and cached `grep` candidate selection) run against those cached entries.\n\nPrimary goals:\n\n- avoid repeated filesystem walks for repeated discovery/search calls\n- keep consistency across native discovery/search flows when they share the same scan policy\n- allow explicit staleness recovery for empty results and explicit invalidation after file mutations\n\n## Ownership and public surface\n\n- Cache implementation and policy: `crates/pi-natives/src/fs_cache.rs`\n- Native consumers:\n - `crates/pi-natives/src/glob.rs`\n - `crates/pi-natives/src/fd.rs` (`fuzzyFind`)\n - `crates/pi-natives/src/grep.rs` (cached directory mode only)\n- JS binding/export:\n - `packages/natives/native/index.d.ts` (`invalidateFsScanCache`)\n - `packages/natives/native/index.js`\n- Coding-agent mutation invalidation helpers:\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts`\n\n## Cache key partitioning (hard contract)\n\nEach entry is keyed by:\n\n- canonicalized `root` directory path\n- `include_hidden` boolean\n- `use_gitignore` boolean\n- `skip_node_modules` boolean\n- `detail` (`ScanDetail::Minimal` or `ScanDetail::Full`)\n\nImplications:\n\n- Hidden and non-hidden scans do **not** share entries.\n- Gitignore-respecting and ignore-disabled scans do **not** share entries.\n- Scans that prune `node_modules` do **not** share entries with scans that include it.\n- Minimal scans (path + file type only) do **not** share entries with full scans (mtime + regular-file size metadata).\n- `follow_links` is part of `ScanOptions` used to build the walker, but is not currently part of `CacheKey`; calls that differ only by `follow_links` can share a cache entry.\n\nConsumers must pass stable semantics for hidden/gitignore/node_modules/detail behavior; changing any keyed flag creates a different cache partition.\n\n## Scan collection behavior\n\nCache population uses `ignore::WalkBuilder` configured by `include_hidden`, `use_gitignore`, `skip_node_modules`, and `follow_links`:\n\n- sorted by file path\n- `.git` is always pruned\n- `node_modules` is pruned at traversal time when `skip_node_modules=true`\n- cancellation is checked before the walk and every 128 visited entries per parallel visitor\n- `ScanDetail::Minimal` records normalized relative path and file type only\n- `ScanDetail::Full` also records mtime and regular-file size\n\nSearch roots for cache scans are resolved by `fs_cache::resolve_search_path`:\n\n- relative paths are resolved against current cwd\n- target must be an existing directory\n- root is canonicalized when possible\n\n## Freshness and eviction policy\n\nGlobal policy (environment-overridable):\n\n- `FS_SCAN_CACHE_TTL_MS` (default `1000`)\n- `FS_SCAN_EMPTY_RECHECK_MS` (default `200`)\n- `FS_SCAN_CACHE_MAX_ENTRIES` (default `16`)\n\nBehavior:\n\n- `get_or_scan(...)`\n - if TTL is `0`: bypass cache entirely, always fresh scan (`cache_age_ms = 0`)\n - on cache hit within TTL: return cloned cached entries + non-zero `cache_age_ms`\n - on expired hit: evict key, rescan, store fresh entry\n- `force_rescan(..., store=false)`: remove any matching key, scan fresh, and do not repopulate cache\n- `force_rescan(..., store=true)`: remove any matching key, scan fresh, then store the new entry\n- max entry enforcement is oldest-first eviction by `created_at` after insert\n\n## Empty-result fast recheck (separate from normal hits)\n\nNormal cache hit:\n\n- a cache hit inside TTL returns cached entries and does nothing else.\n\nEmpty-result fast recheck:\n\n- this is a **caller-side** policy using `ScanResult.cache_age_ms`\n- if filtered/query result is empty and cached scan age is at least `empty_recheck_ms()`, caller performs one `force_rescan(..., store=true)` and retries\n- intended to reduce stale-negative results when files were added while the cache is still inside TTL\n\nCurrent consumers:\n\n- `glob`: rechecks when filtered matches are empty and scan age exceeds threshold\n- `fuzzyFind` (`fd.rs`): rechecks only when query is non-empty and scored matches are empty\n- `grep`: rechecks when cached directory candidate file list is empty\n\n## Consumer defaults and cache usage\n\nCache is opt-in on exposed scan/search APIs (`cache?: boolean`, default `false`).\n\nCurrent defaults in native APIs:\n\n- `glob`: `hidden=false`, `gitignore=true`, `cache=false`; `node_modules` is included only when `includeNodeModules=true` or the pattern mentions `node_modules`; full detail is used only when `sortByMtime=true`\n- `fuzzyFind`: `hidden=false`, `gitignore=true`, `cache=false`, `node_modules` is skipped, `follow_links=true`, minimal detail\n- `grep`: `hidden=true`, `gitignore=true`, `cache=false`; cached directory mode skips `node_modules` unless the glob mentions `node_modules`; minimal detail\n\nCoding-agent callers today:\n\n- High-volume mention candidate discovery enables cache:\n - `packages/coding-agent/src/utils/file-mentions.ts`\n- Mutation flows invalidate through `packages/coding-agent/src/tools/fs-cache-invalidation.ts`.\n- Tool-level search integration (`packages/coding-agent/src/tools/search.ts`) currently calls native `grep` with `cache: false`.\n\n## Invalidation contract\n\nNative invalidation entrypoint:\n\n- `invalidateFsScanCache(path?: string)`\n - with `path`: remove cache entries whose root is a prefix of the target path\n - without path: clear all scan cache entries\n\nPath handling details:\n\n- relative invalidation paths are resolved against cwd\n- invalidation attempts canonicalization\n- if target does not exist (for example after delete), fallback canonicalizes the parent and reattaches the filename when possible\n- this preserves invalidation behavior for create/delete/rename where one side may not exist\n\n## Coding-agent mutation flow responsibilities\n\nCoding-agent code must invalidate after successful filesystem mutations.\n\nCentral helpers:\n\n- `invalidateFsScanAfterWrite(path)`\n- `invalidateFsScanAfterDelete(path)`\n- `invalidateFsScanAfterRename(oldPath, newPath)` (invalidates both sides when paths differ)\n\nCurrent mutation callsites include:\n\n- `packages/coding-agent/src/tools/write.ts`\n- `packages/coding-agent/src/edit/hashline/filesystem.ts`\n- `packages/coding-agent/src/edit/modes/patch.ts`\n- `packages/coding-agent/src/edit/modes/replace.ts`\n\nRule: if a flow mutates filesystem content or location and bypasses these helpers, cache staleness bugs are expected.\n\n## Adding a new cache consumer safely\n\nWhen introducing cache use in a new scanner/search path:\n\n1. **Use stable scan policy inputs**\n - decide hidden/gitignore/node_modules/detail semantics first\n - pass them consistently to `get_or_scan`/`force_rescan` so cache partitions are intentional\n\n2. **Treat cache data as pre-filtered only by traversal policy**\n - apply tool-specific filtering (glob patterns, type filters, scoring) after retrieval\n - never assume cached entries already reflect your higher-level filters\n\n3. **Implement empty-result fast recheck only for stale-negative risk**\n - use `scan.cache_age_ms >= empty_recheck_ms()`\n - retry once with `force_rescan(..., store=true, ...)`\n - keep this path separate from normal cache-hit logic\n\n4. **Respect no-cache mode explicitly**\n - when caller disables cache, call `force_rescan(..., store=false, ...)` or use an uncached streaming walker\n - do not populate shared cache in a no-cache request path\n\n5. **Wire mutation invalidation for any new write path**\n - after successful write/edit/delete/rename, call the coding-agent invalidation helper\n - for rename/move, invalidate both old and new paths\n\n6. **Do not add per-call TTL knobs**\n - current contract is global policy only (env-configured), no per-request TTL override\n\n## Known boundaries\n\n- Cache scope is process-local in-memory (`DashMap`), not persisted across process restarts.\n- Cache stores scan entries, not final tool results.\n- `glob`/`fuzzyFind`/cached `grep` share scan entries only when key dimensions (`root`, `hidden`, `gitignore`, `skip_node_modules`, `detail`) match.\n- `.git` is always excluded at scan collection time regardless of caller options.\n",
|
|
20
|
+
"gemini-manifest-extensions.md": "# Gemini Manifest Extensions (`gemini-extension.json`)\n\nThis document covers how the coding-agent discovers and parses Gemini-style manifest extensions (`gemini-extension.json`) into the `extensions` capability.\n\nIt does **not** cover TypeScript/JavaScript extension module loading (`extensions/*.ts`, `index.ts`, `package.json omp.extensions`), which is documented in `extension-loading.md`.\n\n## Implementation files\n\n- [`packages/coding-agent/src/discovery/gemini.ts`](../packages/coding-agent/src/discovery/gemini.ts)\n- [`packages/coding-agent/src/discovery/builtin.ts`](../packages/coding-agent/src/discovery/builtin.ts)\n- [`packages/coding-agent/src/discovery/helpers.ts`](../packages/coding-agent/src/discovery/helpers.ts)\n- [`packages/coding-agent/src/capability/extension.ts`](../packages/coding-agent/src/capability/extension.ts)\n- [`packages/coding-agent/src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`packages/coding-agent/src/extensibility/extensions/loader.ts`](../packages/coding-agent/src/extensibility/extensions/loader.ts)\n\n---\n\n## What gets discovered\n\nThe Gemini provider (`id: gemini`, priority `60`) registers an `extensions` loader that scans two fixed roots:\n\n- User: `~/.gemini/extensions`\n- Project: `<cwd>/.gemini/extensions`\n\nPath resolution is direct from `ctx.home` and `ctx.cwd` via `getUserPath()` / `getProjectPath()`.\n\nImportant scope rule: project lookup is **cwd-only**. It does not walk parent directories.\n\n---\n\n## Directory scan rules\n\nFor each root (`~/.gemini/extensions` and `<cwd>/.gemini/extensions`), discovery does:\n\n1. `readDirEntries(root)`\n2. keep only direct child directories (`entry.isDirectory()`)\n3. for each child `<name>`, attempt to read exactly:\n - `<root>/<name>/gemini-extension.json`\n\nThere is no recursive scan beyond one directory level.\n\n### Hidden directories\n\nGemini manifest discovery does **not** filter out dot-prefixed directory names. If a hidden child directory exists and contains `gemini-extension.json`, it is considered.\n\n### Missing/unreadable files\n\nIf `gemini-extension.json` is missing or unreadable, that directory is skipped silently (no warning).\n\n---\n\n## Manifest shape (as implemented)\n\nThe capability type defines this manifest shape:\n\n```ts\ninterface ExtensionManifest {\n name?: string;\n description?: string;\n mcpServers?: Record<string, Omit<MCPServer, \"name\" | \"_source\">>;\n tools?: unknown[];\n context?: unknown;\n}\n```\n\nDiscovery-time behavior is intentionally loose:\n\n- JSON parse success is required.\n- There is no runtime schema validation for field types/content beyond JSON syntax.\n- The parsed object is stored as `manifest` on the capability item.\n\n### Name normalization\n\n`Extension.name` is set to:\n\n1. `manifest.name` if it is not `null`/`undefined`\n2. otherwise the extension directory name\n\nNo string-type enforcement is applied here.\n\n---\n\n## Materialization into capability items\n\nA valid parsed manifest creates one `Extension` capability item:\n\n```ts\n{\n\tname: manifest.name ?? <directory-name>,\n\tpath: <extension-directory>,\n\tmanifest: <parsed-json>,\n\tlevel: \"user\" | \"project\",\n\t_source: {\n\t\tprovider: \"gemini\",\n\t\tproviderName: \"Gemini CLI\" // attached by capability registry\n\t\tpath: <absolute-manifest-path>,\n\t\tlevel: \"user\" | \"project\"\n\t}\n}\n```\n\nNotes:\n\n- `_source.path` is normalized to an absolute path by `createSourceMeta()`.\n- Registry-level capability validation for `extensions` only checks presence of `name` and `path`.\n- Manifest internals (`mcpServers`, `tools`, `context`) are not validated during discovery.\n\n---\n\n## Error handling and warning semantics\n\n### Warned\n\n- Invalid JSON in a manifest file:\n - warning format: `Invalid JSON in <manifestPath>`\n\n### Not warned (silent skip)\n\n- `extensions` directory missing\n- child directory has no `gemini-extension.json`\n- unreadable manifest file\n- manifest JSON is syntactically valid but semantically odd/incomplete\n\nThis means partial validity is accepted: only syntactic JSON failure emits a warning.\n\n---\n\n## Precedence and deduplication with other sources\n\n`extensions` capability is aggregated across providers by the capability registry.\n\nCurrent providers for this capability:\n\n- `native` (`packages/coding-agent/src/discovery/builtin.ts`) priority `100`\n- `gemini` (`packages/coding-agent/src/discovery/gemini.ts`) priority `60`\n\nDedup key is `ext.name` (`extensionCapability.key = ext => ext.name`).\n\n### Cross-provider precedence\n\nHigher-priority provider wins on duplicate extension names.\n\n- If `native` and `gemini` both emit extension name `foo`, the native item is kept.\n- Lower-priority duplicate is retained only in `result.all` with `_shadowed = true`.\n\n### Intra-provider order effects\n\nBecause dedup is “first seen wins”, provider-local item order matters.\n\n- Gemini loader appends **user first**, then **project**.\n- Therefore, duplicate names between `~/.gemini/extensions` and `<cwd>/.gemini/extensions` keep the user entry and shadow the project entry.\n\nBy contrast, native provider builds config dir order differently (`project` then `user` in `getConfigDirs()`), so native intra-provider shadowing is the opposite direction.\n\n---\n\n## User vs project behavior summary\n\nFor Gemini manifests specifically:\n\n- Both user and project roots are scanned every load.\n- Project root is fixed to `<cwd>/.gemini/extensions` (no ancestor walk).\n- Duplicate names inside Gemini source resolve to user-first.\n- Duplicate names against higher-priority providers (notably native) lose by priority.\n\n---\n\n## Boundary: discovery metadata vs runtime extension loading\n\n`gemini-extension.json` discovery currently feeds capability metadata (`Extension` items). It does **not** directly load runnable TS/JS extension modules.\n\nRuntime module loading (`discoverAndLoadExtensions()` / `loadExtensions()`) uses the `extension-module` capability and explicit paths, and currently filters auto-discovered modules to provider `native` only.\n\nPractical implication:\n\n- Gemini manifest extensions are discoverable as capability records.\n- They are not, by themselves, executed as runtime extension modules by the extension loader pipeline.\n\nThis boundary is intentional in current implementation and explains why manifest discovery and executable module loading can diverge.\n",
|
|
21
|
+
"handoff-generation-pipeline.md": "# `/handoff` generation pipeline\n\nThis document describes how the coding-agent implements `/handoff`: trigger path, oneshot generation, session switch, context reinjection, persistence, and UI behavior.\n\n## Scope\n\nCovers:\n\n- Interactive `/handoff` command dispatch\n- `AgentSession.handoff()` lifecycle and state transitions\n- `generateHandoff(...)` request shape\n- How old/new sessions persist handoff data differently\n- UI behavior for success, cancel, and failure\n\nDoes not cover:\n\n- Generic tree navigation/branch internals\n- Non-handoff session commands (`/new`, `/fork`, `/resume`)\n\n## Implementation files\n\n- [`../src/modes/controllers/input-controller.ts`](../packages/coding-agent/src/modes/controllers/input-controller.ts)\n- [`../src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`packages/agent/src/compaction/compaction.ts`](../packages/agent/src/compaction/compaction.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/extensibility/slash-commands.ts`](../packages/coding-agent/src/extensibility/slash-commands.ts)\n\n## Trigger path\n\n1. `/handoff` is declared in builtin slash command metadata (`slash-commands.ts`) with optional inline hint: `[focus instructions]`.\n2. In interactive input handling (`InputController`), submit text matching `/handoff` or `/handoff ...` is intercepted before normal prompt submission.\n3. The editor is cleared and `handleHandoffCommand(customInstructions?)` is called.\n4. `CommandController.handleHandoffCommand` performs a preflight guard using current entries:\n - Counts `type === \"message\"` entries.\n - If `< 2`, it warns: `Nothing to hand off (no messages yet)` and returns.\n\nThe same minimum-content guard exists again inside `AgentSession.handoff()` and throws if violated. This duplicates safety at both UI and session layers.\n\n## End-to-end lifecycle\n\n### 1) Start handoff generation\n\n`AgentSession.handoff(customInstructions?)`:\n\n- Reads current branch entries (`sessionManager.getBranch()`).\n- Validates minimum message count (`>= 2`).\n- Creates `#handoffAbortController` and links any caller-provided abort signal to it.\n- Resolves the current model API key through `ModelRegistry`.\n- Calls `generateHandoff(...)` with:\n - live agent messages (`agent.state.messages`),\n - the current model and API key,\n - the base system prompt (`#baseSystemPrompt`),\n - the live tool array (`agent.state.tools`),\n - optional focus instructions,\n - coding-agent message conversion (`convertToLlm`),\n - provider metadata, current thinking level, and `initiatorOverride: \"agent\"`.\n\n`generateHandoff(...)` lives in `packages/agent/src/compaction/compaction.ts` next to summarization. It renders `packages/agent/src/compaction/prompts/handoff-document.md` via `renderHandoffPrompt(...)` with optional `additionalFocus`.\n\n### 2) Generate and capture output\n\n`generateHandoff(...)` converts the existing `AgentMessage[]` history to real LLM `Message[]` history, then appends one trailing agent-attributed `user` message containing the rendered handoff prompt.\n\nThe request uses `completeSimple(...)` directly:\n\n```ts\nawait completeSimple(\n model,\n {\n systemPrompt,\n messages: requestMessages,\n tools,\n },\n {\n apiKey,\n signal,\n reasoning: resolveCompactionEffort(model, options.thinkingLevel),\n toolChoice: \"none\",\n initiatorOverride,\n metadata,\n },\n);\n```\n\nImportant generation properties:\n\n- The request preserves the live provider cache prefix by reusing the same system prompt, tool definitions, and real message history shape as the active agent.\n- The handoff instruction is a trailing `user` message, not a developer message, so the cached prefix remains aligned with the prior turn.\n- `toolChoice: \"none\"` prevents intentional tool dispatch.\n- The returned assistant content is filtered to text blocks and joined with `\\n`; stray tool-call blocks are ignored if a provider does not honor `toolChoice: \"none\"`.\n- `stopReason === \"error\"` throws a generation error.\n\nNo agent-loop events are used for capture. The handoff path no longer waits for `agent_end` and no longer scans the latest assistant message.\n\n### 3) Cancellation checks\n\nCancellation throws `Error(\"Handoff cancelled\")`; a completed generation with no text returns `undefined`.\n\n- caller signal aborts `#handoffAbortController`\n- `completeSimple(...)` receives the abort signal\n- aborted handoff signal or provider `AbortError` is normalized to `Error(\"Handoff cancelled\")`\n- empty generated text returns `undefined`\n\n`AgentSession.handoff()` always clears `#handoffAbortController` in `finally`.\n\n### 4) New session creation\n\nIf text was generated and not aborted:\n\n1. Flush current session writer (`sessionManager.flush()`).\n2. Cancel session-owned async jobs.\n3. Start a brand-new session with `parentSession` pointing at the previous session file when one exists.\n4. Reset in-memory agent state (`agent.reset()`).\n5. Rebind `agent.sessionId` to the new session id.\n6. Rekey/reset Hindsight and Mnemopi memory session tracking for the new session.\n7. Clear queued context arrays (`#steeringMessages`, `#followUpMessages`, `#pendingNextTurnMessages`) and any scheduled hidden next-turn generation.\n8. Reset todo reminder counter.\n\n### 5) Handoff-context injection\n\nThe generated handoff document is wrapped by coding-agent session glue and appended to the new session as a `custom_message` entry:\n\n```text\n<handoff-context>\n...handoff text...\n</handoff-context>\n\nThe above is a handoff document from a previous session. Use this context to continue the work seamlessly.\n```\n\nInsertion call:\n\n```ts\nthis.sessionManager.appendCustomMessageEntry(\n \"handoff\",\n handoffContent,\n true,\n undefined,\n \"agent\",\n);\n```\n\nSemantics:\n\n- `customType`: `\"handoff\"`\n- `display`: `true` (visible in TUI rebuild)\n- attribution: `\"agent\"`\n- Entry type: `custom_message` (participates in LLM context)\n\n### 6) Rebuild active agent context\n\nAfter injection:\n\n1. `buildDisplaySessionContext()` resolves message list for current leaf.\n2. `agent.replaceMessages(sessionContext.messages)` makes the injected handoff message active context.\n3. Todo phases are synchronized from the new branch.\n4. Method returns `{ document: handoffText, savedPath? }`.\n\nAt this point, the active LLM context in the new session contains the injected handoff message, not the old transcript.\n\n## Persistence model: old session vs new session\n\n### Old session\n\nHandoff generation is a oneshot request, not a visible agent turn. The generated handoff text is not appended to the old session as an assistant message.\n\nResult: the original session keeps its prior transcript unchanged except for data already persisted before handoff began.\n\n### New session\n\nAfter session reset, handoff is persisted as `custom_message` with `customType: \"handoff\"`.\n\n`buildSessionContext()` converts this entry into a runtime custom/user-context message via `createCustomMessage(...)`, so it is included in future prompts from the new session.\n\nAuto-triggered handoffs can additionally write a timestamped `handoff-*.md` artifact under the session artifacts directory when `compaction.handoffSaveToDisk` is enabled. Manual `/handoff` does not write that artifact.\n\n## Controller/UI behavior\n\n`CommandController.handleHandoffCommand` behavior:\n\n- Shows a status loader: `Generating handoff… (esc to cancel)`.\n- Calls `await session.handoff(customInstructions)`.\n- If result is `undefined`: `showError(\"Handoff cancelled\")`.\n- On success:\n - `rebuildChatFromMessages()` (loads new session context, including injected handoff)\n - invalidates status line and editor top border\n - reloads todos\n - appends success chat line: `New session started with handoff context`\n- On exception:\n - if message is `\"Handoff cancelled\"` or error name is `AbortError`: `showError(\"Handoff cancelled\")`\n - otherwise: `showError(\"Handoff failed: <message>\")`\n- Stops the loader, restores the previous Escape handler, and requests render at end.\n\nManual `/handoff` no longer streams the generated document into chat. A cancellable loader remains visible while the oneshot request runs, and the chat is rebuilt after generation completes.\n\n## Cancellation semantics\n\n### Session-level cancellation primitive\n\n`AgentSession` exposes:\n\n- `abortHandoff()` → aborts `#handoffAbortController`\n- `isGeneratingHandoff` → true while controller exists\n\nWhen this abort path is used, the abort signal is passed to `completeSimple(...)`; `handoff()` normalizes the cancellation to `Error(\"Handoff cancelled\")`, and command controller maps it to cancellation UI.\n\n### Interactive `/handoff` path\n\nThe command controller installs a temporary Escape handler for `/handoff` while the loader is visible. Pressing Escape calls `session.abortHandoff()`, which aborts the `completeSimple(...)` request through `#handoffAbortController`.\n\n## Aborted vs failed handoff\n\nCurrent UI classification:\n\n- **Aborted/cancelled**\n - `abortHandoff()` path triggers `\"Handoff cancelled\"`, or\n - thrown `AbortError`\n - UI shows `Handoff cancelled`\n- **Failed**\n - any other thrown error from `handoff()` / `generateHandoff()` / provider request path\n - UI shows `Handoff failed: ...`\n\nAdditional nuance: if generation completes but no text is returned, `handoff()` returns `undefined` and controller currently reports **cancelled**, not **failed**.\n\n## Short-session and minimum-content guardrails\n\nTwo guards prevent low-signal handoffs:\n\n- UI layer (`handleHandoffCommand`): warns and returns early for `< 2` message entries\n- Session layer (`handoff()`): throws the same condition as an error\n\nThis avoids creating a new session with empty/near-empty handoff context.\n\n## State transition summary\n\nHigh-level state flow:\n\n1. Interactive slash command intercepted.\n2. Preflight message-count guard.\n3. `#handoffAbortController` created (`isGeneratingHandoff = true`).\n4. `generateHandoff(...)` issues one `completeSimple(...)` request with live system prompt, tools, message history, current thinking level, and trailing handoff prompt.\n5. Assistant response text blocks are joined; tool-call blocks are discarded.\n6. If missing text → return `undefined`; if aborted → cancellation error path.\n7. If present:\n - flush old session\n - cancel async jobs\n - create new empty session with previous session as parent\n - reset runtime queues/counters\n - append `custom_message(handoff)`\n - optionally save an auto-triggered handoff document under the session artifacts directory when `compaction.handoffSaveToDisk` is enabled\n8. Controller rebuilds chat UI and announces success.\n9. `#handoffAbortController` cleared (`isGeneratingHandoff = false`).\n\n## Known assumptions and limitations\n\n- No structural validation checks that generated markdown follows the requested section format.\n- Missing generated text is reported as cancellation in controller UX.\n- Manual handoff has no streaming visibility; a cancellable loader is shown until the UI updates after generation completes.\n- Auto-triggered handoffs can write a timestamped `handoff-*.md` artifact when `compaction.handoffSaveToDisk` is enabled; write failure is logged and does not fail the handoff.\n",
|
|
22
|
+
"hooks.md": "# Hooks\n\nThis document describes the **current hook subsystem code** in `src/extensibility/hooks/*`.\n\n## Current status in runtime\n\nThe hook package (`src/extensibility/hooks/`) is still exported and usable as an API surface, but the default CLI runtime now initializes the **extension runner** path. In current startup flow:\n\n- `--hook` is treated as an alias for `--extension` (CLI paths are merged into `additionalExtensionPaths`)\n- tools are wrapped by `ExtensionToolWrapper`, not `HookToolWrapper`\n- context transforms and lifecycle emissions go through `ExtensionRunner`\n\nSo this file documents the hook subsystem implementation itself (types/loader/runner/wrapper), including legacy behavior and constraints.\n\n## Key files\n\n- `src/extensibility/hooks/types.ts` — hook context, event types, and result contracts\n- `src/extensibility/hooks/loader.ts` — module loading and hook discovery bridge\n- `src/extensibility/hooks/runner.ts` — event dispatch, command lookup, error signaling\n- `src/extensibility/hooks/tool-wrapper.ts` — pre/post tool interception wrapper\n- `src/extensibility/hooks/index.ts` — exports/re-exports\n\n## What a hook module is\n\nA hook module must default-export a factory:\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function hook(pi: HookAPI): void {\n pi.on(\"tool_call\", async (event, ctx) => {\n if (\n event.toolName === \"bash\" &&\n String(event.input.command ?? \"\").includes(\"rm -rf\")\n ) {\n return { block: true, reason: \"blocked by policy\" };\n }\n });\n}\n```\n\nThe factory can:\n\n- register event handlers with `pi.on(...)`\n- send persistent custom messages with `pi.sendMessage(...)`\n- persist non-LLM state with `pi.appendEntry(...)`\n- register slash commands via `pi.registerCommand(...)`\n- register custom message renderers via `pi.registerMessageRenderer(...)`\n- run shell commands via `pi.exec(...)`\n- author schemas/helpers with injected `pi.zod`, `pi.typebox`, and package exports via `pi.pi`\n\n## Discovery and loading\n\n`discoverAndLoadHooks(configuredPaths, cwd)` does:\n\n1. Load discovered hooks from capability registry (`loadCapability(\"hooks\")`)\n2. Append explicitly configured paths (deduped by absolute path)\n3. Call `loadHooks(allPaths, cwd)`\n\n`loadHooks` then imports each path and expects a `default` function.\n\n### Path resolution\n\n`loader.ts` resolves hook paths as:\n\n- absolute path: used as-is\n- `~` path: expanded\n- relative path: resolved against `cwd`\n\n### Important legacy mismatch\n\nDiscovery providers for `hookCapability` still model pre/post shell-style hook files (for example `.claude/hooks/pre/*`, `.omp/.../hooks/pre/*`).\n\nThe hook loader here uses dynamic module import and requires a default JS/TS hook factory. If a discovered hook path is not importable as a module, load fails and is reported in `LoadHooksResult.errors`.\n\n## Event surfaces\n\nHook events are strongly typed in `types.ts`.\n\n### Session events\n\n- `session_start`\n- `session_before_switch` → can return `{ cancel?: boolean }`\n- `session_switch`\n- `session_before_branch` → can return `{ cancel?: boolean; skipConversationRestore?: boolean }`\n- `session_branch`\n- `session_before_compact` → can return `{ cancel?: boolean; compaction?: CompactionResult }`\n- `session.compacting` → can return `{ context?: string[]; prompt?: string; preserveData?: Record<string, unknown> }`\n- `session_compact`\n- `session_before_tree` → can return `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }`\n- `session_tree`\n- `session_shutdown`\n\n### Agent/context events\n\n- `context` → can return `{ messages?: Message[] }`\n- `before_agent_start` → can return `{ message?: { customType; content; display; details } }`\n- `agent_start`\n- `agent_end`\n- `turn_start`\n- `turn_end`\n- `auto_compaction_start`\n- `auto_compaction_end`\n- `auto_retry_start`\n- `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n\n### Tool events (pre/post model)\n\n- `tool_call` (pre-execution) → can return `{ block?: boolean; reason?: string }`\n- `tool_result` (post-execution) → can return `{ content?; details?; isError? }`\n\nThis is the hook subsystem’s core pre/post interception model.\n\n```text\nHook tool interception flow\n\ntool_call handlers\n │\n ├─ any { block: true }? ── yes ──> throw (tool blocked)\n │\n └─ no\n │\n ▼\n execute underlying tool\n │\n ├─ success ──> tool_result handlers can override { content, details }\n │\n └─ error ──> emit tool_result(isError=true) then rethrow original error\n```\n\n## Execution model and mutation semantics\n\n### 1) Pre-execution: `tool_call`\n\n`HookToolWrapper.execute()` emits `tool_call` before tool execution.\n\n- if any handler returns `{ block: true }`, execution stops\n- if handler throws, wrapper fails closed and blocks execution\n- returned `reason` becomes the thrown error text\n\n### 2) Tool execution\n\nUnderlying tool executes normally if not blocked.\n\n### 3) Post-execution: `tool_result`\n\nAfter success, wrapper emits `tool_result` with:\n\n- `toolName`, `toolCallId`, `input`\n- `content`\n- `details`\n- `isError: false`\n\nIf handler returns overrides:\n\n- `content` can replace result content\n- `details` can replace result details\n\nOn tool failure, wrapper emits `tool_result` with `isError: true` and error text content, then rethrows original error.\n\n### What hooks can mutate\n\n- LLM context for a single call via `context` (`messages` replacement chain)\n- tool output content/details on successful tool calls (`tool_result` path)\n- pre-agent injected message via `before_agent_start`\n- cancellation/custom compaction/tree behavior via `session_before_*` and `session.compacting`\n\n### What hooks cannot mutate in this implementation\n\n- raw tool input parameters in-place (only block/allow on `tool_call`)\n- execution continuation after thrown tool errors (error path rethrows)\n- final success/error status in wrapper behavior (returned `isError` is typed but not applied by `HookToolWrapper`)\n\n## Ordering and conflict behavior\n\n### Discovery-level ordering\n\nCapability providers are priority-sorted (higher first). Dedupe is by capability key, first wins.\n\nFor `hooks`, capability key is `${type}:${tool}:${name}`. Shadowed duplicates from lower-priority providers are marked and excluded from effective discovered list.\n\n### Load order\n\n`discoverAndLoadHooks` builds a flat `allPaths` list, deduped by resolved absolute path, then `loadHooks` iterates in that order.\nFile order within each discovered directory depends on `readdir` output; the hook loader does not perform an additional sort.\n\n### Runtime handler order\n\nInside `HookRunner`, order is deterministic by registration sequence:\n\n1. hooks array order\n2. handler registration order per hook/event\n\nConflict behavior by event type:\n\n- `tool_call`: last returned result wins unless a handler blocks; first block short-circuits\n- `tool_result`: last returned override wins (no short-circuit)\n- `context`: chained; each handler receives prior handler’s message output\n- `before_agent_start`: first returned message is kept; later messages ignored\n- `session_before_*`: latest returned result is tracked; `cancel: true` short-circuits immediately\n- `session.compacting`: latest returned result wins\n\nCommand/renderer conflicts:\n\n- `getCommand(name)` returns first match across hooks (first loaded wins)\n- `getMessageRenderer(customType)` returns first match\n- `getRegisteredCommands()` returns all commands (no dedupe)\n\n## UI interactions (`HookContext.ui`)\n\n`HookUIContext` includes:\n\n- `select`, `confirm`, `input`, `editor`\n- `notify`\n- `setStatus`\n- `custom`\n- `setEditorText`, `getEditorText`\n- `theme` getter\n\n`ctx` includes `hasUI`, `cwd`, `sessionManager`, `modelRegistry`, current `model`, `isIdle()`, `abort()`, and `hasQueuedMessages()`.\n\nWhen running with no UI, the default no-op context behavior is:\n\n- `select/input/editor` return `undefined`\n- `confirm` returns `false`\n- `notify`, `setStatus`, `setEditorText` are no-ops\n- `getEditorText` returns `\"\"`\n\n### Status line behavior\n\nHook status text set via `ctx.ui.setStatus(key, text)` is:\n\n- stored per key\n- sorted by key name\n- sanitized (`\\r`, `\\n`, `\\t` → spaces; repeated spaces collapsed)\n- joined and width-truncated for display\n\n## Error propagation and fallback\n\n### Load-time\n\n- invalid module or missing default export → captured in `LoadHooksResult.errors`\n- loading continues for other hooks\n\n### Event-time\n\n`HookRunner.emit(...)` catches handler errors for most events and emits `HookError` to listeners (`hookPath`, `event`, `error`), then continues.\n\n`emitToolCall(...)` is stricter: handler errors are not swallowed there; they propagate to caller. In `HookToolWrapper`, this blocks the tool call (fail-safe).\n\n## Realistic API examples\n\n### Block unsafe bash commands\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.on(\"tool_call\", async (event, ctx) => {\n if (event.toolName !== \"bash\") return;\n const cmd = String(event.input.command ?? \"\");\n if (!cmd.includes(\"rm -rf\")) return;\n\n if (!ctx.hasUI) return { block: true, reason: \"rm -rf blocked (no UI)\" };\n const ok = await ctx.ui.confirm(\"Dangerous command\", `Allow: ${cmd}`);\n if (!ok) return { block: true, reason: \"user denied command\" };\n });\n}\n```\n\n### Redact tool output on post-execution\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.on(\"tool_result\", async (event) => {\n if (event.toolName !== \"read\" || event.isError) return;\n\n const redacted = event.content.map((chunk) => {\n if (chunk.type !== \"text\") return chunk;\n return {\n ...chunk,\n text: chunk.text.replaceAll(/API_KEY=\\S+/g, \"API_KEY=[REDACTED]\"),\n };\n });\n\n return { content: redacted };\n });\n}\n```\n\n### Modify model context per LLM call\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.on(\"context\", async (event) => {\n const filtered = event.messages.filter(\n (msg) => !(msg.role === \"custom\" && msg.customType === \"debug-only\"),\n );\n return { messages: filtered };\n });\n}\n```\n\n### Register slash command with command-safe context methods\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.registerCommand(\"handoff\", {\n description: \"Create a new session with setup message\",\n handler: async (_args, ctx) => {\n await ctx.waitForIdle();\n await ctx.newSession({\n parentSession: ctx.sessionManager.getSessionFile(),\n setup: async (sm) => {\n sm.appendMessage({\n role: \"user\",\n content: [\n { type: \"text\", text: \"Continue from prior session summary.\" },\n ],\n timestamp: Date.now(),\n });\n },\n });\n },\n });\n}\n```\n\n## Export surface\n\n`src/extensibility/hooks/index.ts` and the package subpath `@oh-my-pi/pi-coding-agent/extensibility/hooks` export:\n\n- loading APIs (`discoverAndLoadHooks`, `loadHooks`)\n- runner and wrapper (`HookRunner`, `HookToolWrapper`)\n- all hook types\n- `execCommand` re-export\n\nThe package root (`@oh-my-pi/pi-coding-agent`) does not re-export `HookAPI`; import legacy hook types from the hooks subpath.\n",
|
|
23
|
+
"install-id.md": "# Install ID\n\nA persistent per-install UUID that identifies a single oh-my-pi installation across sessions. Used as a stable correlation key for server-side dedup of telemetry-style pushes (currently the auto-QA grievance flush from `report_tool_issue`).\n\n## API\n\nExported from `@oh-my-pi/pi-utils` (`packages/utils/src/dirs.ts`):\n\n| Symbol | Purpose |\n| --------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |\n| `getInstallId(): string` | Returns the install ID, generating and persisting one on first call. Result is cached in-process for the lifetime of the runtime. |\n| `__resetInstallIdCacheForTests(): void` | Clears the in-process cache. Test-only — MUST NOT be called from production code. |\n\nGenerated IDs are lowercase RFC 4122 UUIDs. Existing persisted values are accepted case-insensitively when they match `^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$` with the regex `i` flag, and are returned exactly as stored.\n\n## Storage\n\n- Path: `<config-root>/install-id` — i.e. `~/.omp/install-id` by default, respecting `PI_CONFIG_DIR` via `getConfigRootDir()`.\n- Format: a single UUID line (trailing `\\n`).\n- Permissions: file is created with mode `0o600`.\n- Lifecycle: independent of `~/.omp/agent/`. Wiping agent state (sessions, settings, DB) does NOT regenerate the install ID; only deleting the `install-id` file itself does.\n\n## Generation and lifecycle\n\n1. First call to `getInstallId()` reads the file. If contents parse as a valid UUID, that value is cached and returned.\n2. Otherwise the helper calls `crypto.randomUUID()` (Node's CSPRNG-backed UUID v4) to mint a new ID.\n3. The new value is written via `open(O_WRONLY | O_CREAT | O_EXCL, 0o600)`. The exclusive-create guard means two processes hitting first-call simultaneously cannot both succeed — the loser sees `EEXIST`, re-reads the winner's file, and adopts that ID.\n4. If the existing file contained non-empty garbage (failed UUID regex), it is `unlink`ed before the exclusive create so `O_EXCL` does not trip on stale data.\n5. Any other write failure (read-only FS, permission error) is swallowed: the freshly generated UUID is still cached in-memory so the rest of the process sees a stable value, and subsequent process launches will retry persistence.\n6. Subsequent in-process calls return the cached value without touching disk. Mutating the file on disk after the first call has no effect until the process restarts (or tests call `__resetInstallIdCacheForTests`).\n\n## Consumers\n\n- `packages/coding-agent/src/tools/report-tool-issue.ts` — included as `installId` in the auto-QA grievance push body so the backend can deduplicate repeated reports from the same install. See `dev.autoqaPush.*` settings and `PI_AUTO_QA_PUSH_*` env vars.\n\nNew consumers MUST treat the value as opaque and MUST NOT derive PII from it; the helper does not mix in hostname, username, or any other host-identifying entropy.\n\n## See also\n\n- [environment-variables.md](environment-variables.md) — `PI_CONFIG_DIR` controls where `install-id` lives.\n- [config-usage.md](config-usage.md) — broader config-root layout.\n",
|
|
24
|
+
"keybindings.md": "# Keybindings\n\nRun `/hotkeys` inside an `omp` session to see the active chords for your current build. The list reflects any remaps loaded from disk and any bindings added by extensions.\n\n## Customize keybindings\n\nUser remaps live in `~/.omp/agent/keybindings.yml`. The file is a YAML mapping whose keys are keybinding action IDs and whose values are either one chord string or an array of chord strings. It is not read from `~/.omp/agent/config.yml`, and there is no nested `keybindings` object.\n\n```yaml\napp.model.cycleForward: Ctrl+P\napp.model.selectTemporary: Alt+P\napp.plan.toggle: Alt+Shift+P\n```\n\nChord names are case-insensitive and use the same notation shown in the UI, such as `Ctrl+P`, `Alt+Shift+P`, `Shift+Enter`, and `Ctrl+Backspace`.\n\nSet an action to an empty array to disable it:\n\n```yaml\napp.stt.toggle: []\n```\n\n## Common action IDs\n\n| Action ID | Default | Meaning |\n| --------------------------- | -------------------------------------- | --------------------------------------------- |\n| `app.model.cycleForward` | `Ctrl+P` | Cycle role models forward |\n| `app.model.cycleBackward` | `Shift+Ctrl+P` | Cycle role models in temporary mode |\n| `app.model.selectTemporary` | `Alt+P` | Pick a model temporarily for this session |\n| `app.model.select` | `Alt+M` | Open the model selector and set roles |\n| `app.plan.toggle` | `Alt+Shift+P` | Toggle plan mode |\n| `app.history.search` | `Ctrl+R` | Search prompt history |\n| `app.tools.expand` | `Ctrl+O` | Toggle tool-output expansion |\n| `app.thinking.toggle` | `Ctrl+T` | Toggle thinking-block visibility |\n| `app.thinking.cycle` | `Shift+Tab` | Cycle thinking level |\n| `app.editor.external` | `Ctrl+G` | Edit the draft in `$VISUAL` / `$EDITOR` |\n| `app.message.followUp` | `Ctrl+Q`, `Ctrl+Enter` | Queue a follow-up message |\n| `app.message.dequeue` | `Alt+Up` | Dequeue a queued message back into the editor |\n| `app.display.reset` | `Ctrl+L` | Reset terminal display |\n| `app.clipboard.copyLine` | `Alt+Shift+L` | Copy the current line |\n| `app.clipboard.copyPrompt` | `Alt+Shift+C` | Copy the whole prompt |\n| `app.clipboard.pasteImage` | `Ctrl+V` (`Alt+V` fallback on Windows) | Paste an image from the clipboard |\n| `app.stt.toggle` | `Alt+H` | Toggle speech-to-text recording |\n\nOn Windows Terminal, `Ctrl+V` may be handled by the terminal paste command before `omp` sees it; use the `Alt+V` fallback when clipboard image paste appears to do nothing. Windows Terminal also swallows `Ctrl+Enter`, so the follow-up shortcut also binds `Ctrl+Q` — the same chord GitHub Copilot CLI uses. If your existing `keybindings.yml` already assigns `Ctrl+Q` to another action, that user remap wins and follow-up keeps `Ctrl+Enter` unless you explicitly bind `app.message.followUp`.\n\nTerminals that implement OSC 5522 enhanced paste can send clipboard MIME data directly to `omp`; image pastes are attached as `[Image #N]`, while text/plain paste events keep normal paste behavior. When OSC 5522 is unavailable, bracketed paste still handles text, and a pasted single image-file path is loaded as an image when the file is readable from the `omp` host.\n\nOlder unqualified action names are migrated when `keybindings.yml` is loaded, but new docs and new configs should use the namespaced action IDs above. Existing `keybindings.json` files are still accepted and migrated to `keybindings.yml`; `keybindings.yaml` is also accepted.\n",
|
|
25
|
+
"local-models.md": "# Embedded Local Tiny-Model Experiments\n\nThis document summarizes the experiments behind the optional **local** tiny-model paths for\nsession-title generation (`providers.tinyModel`), Mnemopi memory extraction/consolidation\n(`providers.memoryModel`), and the `auto` thinking-level difficulty classifier\n(`providers.autoThinkingModel`, which reuses the memory-model registry). It is a factual engineering\nrecord for maintainers: what we measured, which recipes won, and which models we shipped. All three\nsettings default to `online`, so existing users incur no downloads or on-device inference cost unless\nthey opt in.\n\n## Runtime / environment findings\n\n- **Stack**: `@huggingface/transformers` (transformers.js) v4 running under Bun. In Bun the library\n loads the **native `onnxruntime-node` backend** (not the WASM build).\n- **Device policy**: local tiny models default to CPU-only inference and retry once on CPU if an\n explicit accelerated provider cannot initialize.\n - Pick a provider persistently with the `providers.tinyModelDevice` setting (`default` keeps CPU),\n or per-run with the `PI_TINY_DEVICE` env var (which overrides the setting).\n - Accepted values are `cpu`, `gpu`, `metal`/`webgpu`, `auto`, `cuda`, `dml`, `coreml`, `wasm`,\n `webnn`, `webnn-gpu`, `webnn-cpu`, and `webnn-npu`.\n - Direct `coreml` remains opt-in via `PI_TINY_DEVICE=coreml`; it is not part of the default because\n cached decoder-LLM ONNX loads can fail during session initialization.\n - WebGPU/Metal works for the single-process eval harness, but the production worker forces\n Darwin `gpu`/`webgpu`/`auto` requests back to CPU because ONNX Runtime/Bun currently\n hard-crashes on worker teardown after WebGPU inference.\n - Use `providers.tinyModelDevice` or `PI_TINY_DEVICE` only when explicitly opting out of the CPU\n default.\n- **Quantization: q4 is the sweet spot** — smaller on disk, faster to load, and fast at inference.\n q8/int8 loads slower _and_ infers slower on CPU. Every shipped model defaults to `q4`; override the\n precision persistently with the `providers.tinyModelDtype` setting (`default` keeps `q4`, e.g. `fp16`\n for higher fidelity), or per-run with `PI_TINY_DTYPE` (which overrides the setting). Accepts `auto`,\n `fp32`, `fp16`, `q8`, `int8`, `uint8`, `q4`, `bnb4`, `q4f16`, `q2`, `q2f16`, `q1`, `q1f16`; an\n unrecognized value fails loudly at worker startup.\n- **Load-time correction (important).** An earlier belief that \"q4 >=1B models take minutes to load\"\n was a **measurement artifact** caused by running ~5 multi-GB HuggingFace downloads in parallel\n (I/O saturation). Clean, isolated **warm** loads are all sub-3s:\n - TinyLlama-1.1B q4: ~0.5s\n - Llama-3.2-1B q4: ~2.8s (`graphOpt=all`) / ~0.5s (`disabled`)\n - LFM2-1.2B q4: ~0.36s\n - Qwen2.5-1.5B q4: ~1.5s\n - Qwen3-1.7B q4: ~1.6s\n - gemma-3-1b q4: ~1.1s\n - Conclusion: **1B–1.7B models are viable on CPU.**\n- **`session_options.graphOptimizationLevel`** trades load vs inference speed: `disabled` = fastest\n load, slightly slower inference; `all` = default.\n- **First run** downloads weights from the HF Hub to a cache dir (q4 weights ~200MB–1.1GB depending\n on model); subsequent **warm** loads are sub-second to ~3s. Inference is async and\n background-friendly for memory tasks; titles are semi-interactive.\n\n## Task 1: Session title generation (`providers.tinyModel`)\n\n**Task**: turn the first user message into a 3–6 word title. Tiny models (sub-1B) suffice.\n\n**Winning recipe**:\n\n- Plain system prompt (no few-shot).\n- **Prefill** the assistant turn with `<title>` and **stop at `</title>`**, then take the first line.\n- Greedy decoding (`do_sample:false`), `enable_thinking:false` in the chat template.\n\n**What we learned**:\n\n- **Few-shot examples HURT sub-0.6B models** for titles; the tag-prefill rescues even 270M models.\n- **Token biasing (`bad_words_ids`) is a confirmed no-op** here — the prefill already controls the\n opener.\n\n**Leaderboard** (tag trick, CPU, warm):\n\n| Model | Verdict |\n| ------------- | ----------------------------------- |\n| LFM2-350M | Best speed/quality balance (~212MB) |\n| Qwen3-0.6B | Most robust |\n| gemma-3-270m | Smallest viable |\n| Qwen2.5-0.5B | Acceptable |\n| SmolLM2-135M | Too small |\n| flan-t5-small | Rejected — just echoes the input |\n\n**Shipped local options**: `lfm2-350m`, `qwen3-0.6b`, `gemma-270m`, `qwen2.5-0.5b`, `lfm2-700m`.\n**Default**: `online` (pi/smol).\n\n## Task 2: Mnemopi memory (`providers.memoryModel`)\n\nMnemopi runs two small-LLM tasks:\n\n1. **Extraction** — pull durable, structured items from a single message.\n2. **Consolidation** — summarize a list of memories into 1–3 faithful sentences.\n\nThese need **bigger models than titles: 1B–1.7B**. We tested LFM2-1.2B, Qwen2.5-1.5B, Qwen3-1.7B,\nand gemma-3-1b (q4, CPU) via four parallel agents each running 27–31 experiments.\n\n### Extraction findings\n\nThe stock 5-category JSON prompt fails on small models in two ways:\n\n1. The all-empty example `{\"facts\":[],...}` gets **copied verbatim** → 0 facts extracted.\n2. Capable models emit **JSON objects inside arrays**, which Mnemopi's `String(item)` coerces into\n the literal string `[object Object]`.\n\nThe robust fix is a **one-item-per-line output format** (consumed by Mnemopi's parser line-fallback)\nor a **flat JSON array of strings**. Every model also over-extracts pure small talk; an explicit\nchit-chat → NONE example is the best mitigation.\n\n### Technique polarity flips vs titles\n\n- At 1B+, **few-shot is the dominant quality lever**: e.g. Qwen2.5-1.5B extraction F1 0.52 → 0.83\n going 1 → 3 shots; gemma recall 0.65 → 0.92 with 2 shots.\n- **Prefill HURTS extraction** — it forces output on small talk, producing false positives.\n- **System-split** (instructions in the system role) helps models that have a system role.\n- **Greedy >= temperature** for both tasks.\n- **Token biasing** is again a no-op.\n\n### Per-model verdicts (head-to-head, 16-fixture set)\n\n- **Qwen3-1.7B** — most disciplined extraction: returns empty on small talk, no buried-fact leak,\n preserves language, clean flat JSON. Weaknesses: coarse granularity, missed a multi-turn value\n update.\n- **Qwen2.5-1.5B** — best extraction granularity (atomic facts), caught the value update, zero\n small-talk leakage. Weaknesses: weakest consolidation (run-on, no dedup) and one degenerate\n buried-fact output.\n- **gemma-3-1b** — best consolidation (dedup works, faithful, clean single-memory). Weaknesses: leaks\n small talk and translated German.\n- **LFM2-1.2B** — solid and fastest to load. Weaknesses: `Label: value` noise, small-talk + buried\n leaks, a fluffy single-memory summary.\n\n### Recommendation\n\nExtraction favors **precision** (do not pollute long-term memory) → **Qwen3-1.7B is the best single\npick** (its consolidation is good enough). If running a second model for consolidation, **gemma-3-1b**\nwins that task.\n\n**Shipped local options**: `qwen3-1.7b` (recommended), `gemma-3-1b`, `qwen2.5-1.5b`, `lfm2-1.2b`.\n**Default**: `online` (the configured smol model).\n\n### Known Mnemopi parser bugs (surfaced by these experiments)\n\n- `String(item)` produces `[object Object]` on object array items.\n- The line-fallback drops items `<=10` chars, so a correct short fact like `Name: Can` is discarded.\n\n\n## Integration notes\n\n- `providers.tinyModel`, `providers.memoryModel`, and `providers.autoThinkingModel` default to\n `online`, so existing users get **no downloads or on-device inference cost** unless they opt in.\n- Local inference runs **in a worker** (off the main thread); models are cached on disk and\n downloaded on first use.\n- The memory local path applies the refined recipes (line-format + small-talk-guarded extraction\n prompt, hardened consolidation prompt) via Mnemopi prompt overrides; the **online path is\n unchanged**.\n- `providers.autoThinkingModel` uses the same shipped local options as `providers.memoryModel`.\n",
|
|
26
|
+
"lsp-config.md": "# LSP configuration in OMP\n\nThis guide explains how to configure language servers for the OMP coding agent.\n\nSource of truth in code:\n\n- Server config type: `packages/coding-agent/src/lsp/types.ts` (`ServerConfig`)\n- Config loader: `packages/coding-agent/src/lsp/config.ts`\n- Built-in server definitions: `packages/coding-agent/src/lsp/defaults.json`\n\n## Auto-detection\n\nWhen no LSP config file is present, OMP auto-detects servers by intersecting two conditions:\n\n1. The project directory contains at least one of the server's `rootMarkers`.\n2. The server binary is available — checked in project-local bin directories first (e.g., `node_modules/.bin/`, `.venv/bin/`), then `$PATH`.\n\nNo configuration is required for common setups. The built-in server list covers most popular languages; see [`defaults.json`](../packages/coding-agent/src/lsp/defaults.json) for the full set.\n\n## Config file locations\n\nOMP merges LSP config from multiple files, lowest to highest priority:\n\n| Priority | Location |\n| ----------- | --------------------------------------------------------------------------------------------------------------------------- |\n| 5 (lowest) | `~/lsp.json`, `~/.lsp.json`, `~/lsp.yaml`, `~/.lsp.yaml`, `~/lsp.yml`, `~/.lsp.yml` |\n| 4 | Plugin LSP configs (marketplace / `--plugin-dir` roots) |\n| 3 | User config dirs: `~/.omp/agent/lsp.*`, `~/.claude/lsp.*`, `~/.codex/lsp.*`, `~/.gemini/lsp.*` |\n| 2 | Project config dirs: `<project>/.omp/lsp.*`, `<project>/.claude/lsp.*`, `<project>/.codex/lsp.*`, `<project>/.gemini/lsp.*` |\n| 1 (highest) | Project root: `<project>/lsp.*` and `<project>/.lsp.*` |\n\nEach location accepts `.json`, `.yaml`, and `.yml` variants, including hidden-file versions (`.lsp.json`, `.lsp.yaml`, `.lsp.yml`). Files are merged in order: higher-priority files override lower-priority fields for the same server. Servers not mentioned in any override file remain at their built-in defaults.\n\n**Recommended locations:**\n\n- User-wide preferences → `~/.omp/agent/lsp.json`\n- Project-specific overrides → `<project>/.omp/lsp.json`\n\n> **Note:** Auto-detection is skipped only when at least one config file contributes server overrides. A config file that only sets `idleTimeoutMs` still lets OMP auto-detect built-in servers. When server overrides exist, OMP merges them with defaults and then loads servers that have matching `rootMarkers`, an available binary, and are not explicitly `disabled`.\n\n## File shape\n\nBoth JSON and YAML are accepted. The top-level object can use either a `servers` wrapper key or a flat map directly:\n\n```json\n{\n \"servers\": {\n \"server-name\": { ... }\n },\n \"idleTimeoutMs\": 300000\n}\n```\n\nor (flat, without the `servers` wrapper):\n\n```json\n{\n \"server-name\": { ... },\n \"idleTimeoutMs\": 300000\n}\n```\n\nTop-level keys:\n\n- `servers` — map of server name to `ServerConfig` (optional wrapper; flat form is equivalent)\n- `idleTimeoutMs` — shut down idle language servers after this many milliseconds; disabled by default\n\n## ServerConfig fields\n\n| Field | Type | Required | Description |\n| ----------------- | ---------- | -------- | ---------------------------------------------------------------------------------------------------------------- |\n| `command` | `string` | yes | Binary name (resolved via PATH/local bins) or absolute path |\n| `args` | `string[]` | no | Arguments passed to the binary |\n| `fileTypes` | `string[]` | yes | File extensions this server handles, e.g. `[\".ts\", \".tsx\"]` |\n| `rootMarkers` | `string[]` | yes | Files/dirs that indicate a project root; glob patterns (e.g. `*.cabal`) are supported |\n| `initOptions` | `object` | no | Sent as `initializationOptions` during LSP handshake |\n| `settings` | `object` | no | Workspace settings pushed via `workspace/didChangeConfiguration` |\n| `disabled` | `boolean` | no | Set to `true` to disable this server entirely |\n| `warmupTimeoutMs` | `number` | no | Startup timeout in ms for this server (overrides the global default) |\n| `isLinter` | `boolean` | no | Mark server as linter/formatter only; excluded from type-intelligence operations (hover, go-to-definition, etc.) |\n| `capabilities` | `object` | no | Opt-in server-specific features; see [Capabilities](#capabilities) |\n\n`resolvedCommand` is populated automatically at runtime — do not set it manually.\n\n### Capabilities\n\nThe `capabilities` object enables optional server-specific features that OMP supports on a per-server basis:\n\n```json\n{\n \"capabilities\": {\n \"flycheck\": true,\n \"ssr\": true,\n \"expandMacro\": true,\n \"runnables\": true,\n \"relatedTests\": true\n }\n}\n```\n\nAll fields are boolean and optional. They are currently used by `rust-analyzer`.\n\n## Common recipes\n\n### Override a built-in server's settings\n\nPartial overrides are merged onto the built-in defaults. You only need to specify the fields you want to change.\n\n```json\n{\n \"servers\": {\n \"typescript-language-server\": {\n \"args\": [\"--stdio\", \"--log-level\", \"4\"]\n }\n }\n}\n```\n\n```yaml\nservers:\n gopls:\n settings:\n gopls:\n gofumpt: false\n staticcheck: false\n```\n\n### Disable a built-in server\n\n```json\n{\n \"servers\": {\n \"eslint\": {\n \"disabled\": true\n }\n }\n}\n```\n\n### Register a custom server\n\nNew servers require `command`, `fileTypes`, and `rootMarkers`. All other fields are optional.\n\n```json\n{\n \"servers\": {\n \"my-lsp\": {\n \"command\": \"my-lsp-server\",\n \"args\": [\"--stdio\"],\n \"fileTypes\": [\".xyz\"],\n \"rootMarkers\": [\".xyz-project\", \".git\"]\n }\n }\n}\n```\n\n### Set a global idle timeout\n\nShut down language servers that have been inactive for more than five minutes:\n\n```json\n{\n \"idleTimeoutMs\": 300000\n}\n```\n\n### Disable a server for one project, keep it globally\n\nPlace the override in `<project>/.omp/lsp.json`:\n\n```json\n{\n \"servers\": {\n \"pylsp\": {\n \"disabled\": true\n }\n }\n}\n```\n\nThe user-level config in `~/.omp/agent/lsp.json` is unaffected; pylsp is only suppressed in this project.\n\n## Built-in server list\n\nThe following servers ship in `defaults.json` and are eligible for auto-detection:\n\n| Server key | Language(s) | Binary |\n| ----------------------------- | ----------------------------- | --------------------------------- |\n| `rust-analyzer` | Rust | `rust-analyzer` |\n| `clangd` | C, C++, ObjC | `clangd` |\n| `zls` | Zig | `zls` |\n| `gopls` | Go | `gopls` |\n| `typescript-language-server` | TypeScript, JavaScript | `typescript-language-server` |\n| `denols` | TypeScript, JavaScript (Deno) | `deno` |\n| `biome` | TS/JS/JSON (linter) | `biome` |\n| `eslint` | TS/JS/Vue/Svelte (linter) | `vscode-eslint-language-server` |\n| `vscode-html-language-server` | HTML | `vscode-html-language-server` |\n| `vscode-css-language-server` | CSS, SCSS, Less | `vscode-css-language-server` |\n| `vscode-json-language-server` | JSON | `vscode-json-language-server` |\n| `tailwindcss` | HTML, CSS, TS/JS | `tailwindcss-language-server` |\n| `svelte` | Svelte | `svelteserver` |\n| `vue-language-server` | Vue | `vue-language-server` |\n| `astro` | Astro | `astro-ls` |\n| `pyright` | Python | `pyright-langserver` |\n| `basedpyright` | Python | `basedpyright-langserver` |\n| `pylsp` | Python | `pylsp` |\n| `ruff` | Python (linter) | `ruff` |\n| `jdtls` | Java | `jdtls` |\n| `kotlin-lsp` | Kotlin | `kotlin-lsp` |\n| `metals` | Scala | `metals` |\n| `hls` | Haskell | `haskell-language-server-wrapper` |\n| `ocamllsp` | OCaml | `ocamllsp` |\n| `elixirls` | Elixir | `elixir-ls` |\n| `erlangls` | Erlang | `erlang_ls` |\n| `gleam` | Gleam | `gleam` |\n| `solargraph` | Ruby | `solargraph` |\n| `ruby-lsp` | Ruby | `ruby-lsp` |\n| `rubocop` | Ruby (linter) | `rubocop` |\n| `bashls` | Bash, Zsh | `bash-language-server` |\n| `lua-language-server` | Lua | `lua-language-server` |\n| `intelephense` | PHP | `intelephense` |\n| `phpactor` | PHP | `phpactor` |\n| `omnisharp` | C# | `omnisharp` |\n| `yamlls` | YAML | `yaml-language-server` |\n| `terraformls` | Terraform | `terraform-ls` |\n| `dockerls` | Dockerfile | `docker-langserver` |\n| `helm-ls` | Helm | `helm_ls` |\n| `nixd` | Nix | `nixd` |\n| `nil` | Nix | `nil` |\n| `ols` | Odin | `ols` |\n| `dartls` | Dart | `dart` |\n| `marksman` | Markdown | `marksman` |\n| `texlab` | LaTeX | `texlab` |\n| `graphql` | GraphQL | `graphql-lsp` |\n| `prismals` | Prisma | `prisma-language-server` |\n| `vimls` | Vim script | `vim-language-server` |\n| `emmet-language-server` | HTML, CSS, JSX | `emmet-language-server` |\n| `sourcekit-lsp` | Swift | `sourcekit-lsp` |\n| `swiftlint` | Swift (linter) | `swiftlint` |\n| `tlaplus` | TLA+ | `tlapm_lsp` |\n",
|
|
27
|
+
"macos-signing-notarization.md": "# macOS signing & notarization\n\nThe compiled macOS `omp` binaries shipped on GitHub Releases are signed with a\n**Developer ID Application** certificate and **notarized** by Apple. This makes\nthem Gatekeeper-acceptable and is the prerequisite for an official Homebrew\nsubmission (see [#776](https://github.com/can1357/oh-my-pi/issues/776)).\n\nSigning happens in CI, in the `release_binary` job's darwin matrix legs\n(`.github/workflows/ci.yml`), via `scripts/ci-macos-sign.sh`. It **auto-skips**\nuntil the `APPLE_*` repository secrets below are configured, so releases keep\nworking (ad-hoc signed, as before) in the meantime.\n\n## How it works\n\n1. `ci:release:build-binaries` builds and **ad-hoc** signs the binary (so it can\n run on the build runner).\n2. `scripts/ci-macos-sign.sh` then:\n - imports the Developer ID cert into a throwaway keychain;\n - re-signs with `--options runtime --timestamp` (hardened runtime + secure\n timestamp) and `--entitlements scripts/macos-entitlements.plist`;\n - runs `--version` and `--smoke-test` under the new signature to fail fast;\n - notarizes the binary via `notarytool submit --wait`.\n3. `release_github_verify` re-downloads the published arm64 asset and asserts it\n is **not** ad-hoc, passes `codesign --verify --strict`, and boots cleanly.\n\n### Why the entitlements are mandatory\n\nThe binary is a Bun single-file executable, so the hardened runtime needs:\n\n| Entitlement | Reason |\n| --- | --- |\n| `com.apple.security.cs.allow-jit` | JavaScriptCore JITs at runtime. |\n| `com.apple.security.cs.allow-unsigned-executable-memory` | JSC executable memory pages. |\n| `com.apple.security.cs.disable-library-validation` | omp extracts its native addon (`pi_natives.<triple>.node`) and other optional dylibs to a runtime cache and `dlopen()`s them. They do not share the main binary's Team ID, so without this the hardened runtime aborts with *\"mapping process and mapped file have different Team IDs\"* — breaking effectively every command. |\n\nWithout `disable-library-validation`, a signed+notarized binary signs and\nnotarizes fine but **fails at first real use**. `scripts/ci-macos-sign.sh` runs\n`--smoke-test` after signing specifically to catch this before notarizing.\n\n### Stapling limitation (important)\n\nA bare Mach-O executable **cannot be stapled** (`stapler` only supports\n`.app`/`.pkg`/`.dmg`). The binary is genuinely notarized — `notarytool` returns\n`Accepted` and the ticket exists on Apple's servers keyed to its cdhash — but\nbecause there is no *stapled* ticket, a direct `spctl -a -t exec` assessment\nreports `rejected / source=Unnotarized Developer ID`. This is expected and is\n**not** a signing or credential failure.\n\nWhat this means in practice:\n\n- `curl https://omp.sh/install | sh` — `curl` sets no quarantine bit, so\n Gatekeeper is never consulted; the binary just runs. ✅\n- Homebrew **formula** installs — Homebrew does not quarantine formula files, so\n Gatekeeper is never consulted. ✅\n- Anything that **quarantines** the binary (a browser download, or a Homebrew\n **cask**) and is assessed offline will be blocked, because there is no stapled\n ticket. For that route, wrap the binary in a stapleable, notarized **`.pkg` or\n `.dmg`** (`xcrun stapler staple` works on those). That is a follow-up and is\n **not** required for the `curl`/formula paths.\n\n## Required GitHub secrets\n\nAdd these under **Settings → Secrets and variables → Actions** (repo secrets).\nBoth the cert (`APPLE_CERTIFICATE_P12`) **and** the API key (`APPLE_API_KEY`)\nmust be present for signing to engage.\n\n| Secret | What it is |\n| --- | --- |\n| `APPLE_CERTIFICATE_P12` | base64 of the exported Developer ID Application `.p12` (cert + private key). |\n| `APPLE_CERTIFICATE_PASSWORD` | password you set when exporting the `.p12`. |\n| `APPLE_API_KEY_ID` | App Store Connect API **Key ID**. |\n| `APPLE_API_ISSUER_ID` | App Store Connect API **Issuer ID** (UUID). |\n| `APPLE_API_KEY` | base64 of the App Store Connect `.p8` private key. |\n\n### Producing the credential files\n\nDrop these into a working directory (default `~/omp-signing`):\n\n| File | How |\n| --- | --- |\n| `*.p12` | **Keychain Access** → right-click your *Developer ID Application: …* identity (the entry that expands to a cert **with** a private key) → **Export…** → save as `.p12` and set a password. |\n| `p12-password.txt` | the password you just set on the `.p12`. |\n| `AuthKey_<KEYID>.p8` | App Store Connect → **Users and Access → Integrations → App Store Connect API** → create a key (**Account Holder** role also allows API cert creation; **Developer** is enough for notarization) → **download once** (non-recoverable). |\n| `issuer-id.txt` | the **Issuer ID** (UUID) shown above the keys table. |\n| `key-id.txt` | *optional* — the Key ID; otherwise read from the `.p8` filename. |\n\nThe App Store Connect API key is the one credential that **cannot** be minted\nfrom a CLI — it is the bootstrap credential for the API itself, and the `.p8`\ndownloads exactly once. Everything else is local.\n\n### Uploading (no value leaves disk)\n\n`scripts/ci-macos-upload-secrets.sh` validates the files (opens the `.p12` with\nyour password, sanity-checks the `.p8`) and pipes each value to `gh secret set`\nover stdin — no secret is ever printed to the terminal, argv, or shell history:\n\n```sh\nscripts/ci-macos-upload-secrets.sh ~/omp-signing --dry-run # validate first\nscripts/ci-macos-upload-secrets.sh ~/omp-signing # upload all five\ngh secret list --repo can1357/oh-my-pi # confirm\n```\n\nRe-run it whenever the certificate is renewed.\n\n### Finding your signing identity / Team ID (sanity check)\n\n```sh\nsecurity find-identity -v -p codesigning\n# e.g. \"Developer ID Application: Your Name (TEAMID1234)\"\n```\n\nThe script selects the first `Developer ID Application` identity automatically;\nyou do not need to store the identity string or Team ID as a secret.\n\n## Local dry run\n\nYou can exercise the full sign+notarize path locally (real cert + API key) by\nexporting the five env vars and running:\n\n```sh\nRELEASE_TARGETS=darwin-arm64 bun run ci:release:build-binaries\nAPPLE_CERTIFICATE_P12=… APPLE_CERTIFICATE_PASSWORD=… \\\nAPPLE_API_KEY_ID=… APPLE_API_ISSUER_ID=… APPLE_API_KEY=… \\\n bash scripts/ci-macos-sign.sh packages/coding-agent/binaries/omp-darwin-arm64\n```\n",
|
|
28
|
+
"marketplace.md": "# Marketplace plugin system\n\nThe marketplace system lets you discover, install, and manage plugins from Git, local, or direct-catalog sources. It is compatible with the Claude Code plugin registry format.\n\n## Quick start\n\n```\n/marketplace add anthropics/claude-plugins-official\n/marketplace install wordpress.com@claude-plugins-official\n```\n\nIn the TUI, `/marketplace` with no arguments opens the interactive plugin browser. In non-TUI command handling, `/marketplace` lists configured marketplaces; use `/marketplace discover` to browse.\n\n## Concepts\n\nA **marketplace** is a Git repository (or local directory) containing a catalog file at `.claude-plugin/marketplace.json`. The catalog lists available plugins with their sources, descriptions, and metadata.\n\nA **plugin** is a directory containing Claude/OMP plugin content such as skills, commands, hooks, tools, MCP servers, LSP servers, rules, prompts, or extension modules. Plugins are identified by `name@marketplace` (e.g. `code-review@claude-plugins-official`).\n\n**Scopes**: marketplace plugins can be installed at two scopes:\n\n- **user** (default) -- available in all projects, stored in `~/.omp/plugins/installed_plugins.json`\n- **project** -- available only in the active project, stored in the nearest project `.omp/plugins/installed_plugins.json`\n\nEnabled project-scoped installs shadow enabled user-scoped installs of the same plugin. A disabled project install does not shadow the user install.\n\n## Commands\n\n### Interactive mode\n\n| Command | Effect |\n| -------------- | ----------------------------------------- |\n| `/marketplace` | Open interactive plugin browser (install) |\n\n### Marketplace management\n\n| Command | Effect |\n| ---------------------------- | -------------------------------------------- |\n| `/marketplace add <source>` | Add a marketplace source |\n| `/marketplace remove <name>` | Remove a marketplace |\n| `/marketplace update [name]` | Re-fetch catalog(s); omit name to update all |\n| `/marketplace list` | List configured marketplaces |\n\n### Plugin operations\n\n| Command | Effect |\n| ------------------------------------------------------------------------- | -------------------------------------------------- |\n| `/marketplace discover [marketplace]` | Browse available plugins |\n| `/marketplace install [--force] [--scope user\\|project] name@marketplace` | Install a plugin |\n| `/marketplace uninstall [--scope user\\|project] name@marketplace` | Uninstall a plugin; no args opens the TUI selector |\n| `/marketplace installed` | List installed marketplace plugins |\n| `/marketplace upgrade [--scope user\\|project] [name@marketplace]` | Upgrade one or all plugins |\n| `/plugins list` | List npm/link and marketplace plugins |\n| `/plugins enable [--scope user\\|project] name@marketplace` | Enable a marketplace plugin |\n| `/plugins disable [--scope user\\|project] name@marketplace` | Disable a marketplace plugin |\n\n### CLI equivalents\n\nThe same operations are available from the command line:\n\n```\nomp plugin marketplace add <source>\nomp plugin marketplace remove <name>\nomp plugin marketplace update [name]\nomp plugin marketplace list\nomp plugin discover [marketplace]\nomp plugin install [--force] [--scope user|project] name@marketplace\nomp plugin uninstall [--scope user|project] name@marketplace\nomp plugin upgrade [--scope user|project] [name@marketplace]\nomp plugin enable [--scope user|project] name@marketplace\nomp plugin disable [--scope user|project] name@marketplace\n```\n\n## Marketplace sources\n\nWhen you run `/marketplace add <source>`, the system classifies the source:\n\n| Source format | Type | Example |\n| ------------------------------- | -------------------------------------------------- | -------------------------------------- |\n| `owner/repo` | GitHub shorthand | `anthropics/claude-plugins-official` |\n| `https://...*.json` | Direct catalog URL | `https://example.com/marketplace.json` |\n| `https://...` / `http://...` | Git repository unless the URL path ends in `.json` | `https://github.com/org/repo` |\n| `git@...` / `ssh://...` | Git repository | `git@github.com:org/repo.git` |\n| `./path` or `~/path` or `/path` | Local directory | `./my-marketplace` |\n\nGit and local sources must contain `.claude-plugin/marketplace.json`. Direct catalog URLs cache only the JSON catalog; plugins in URL-sourced catalogs cannot use relative string sources like `\"./plugins/foo\"`.\n\n## Catalog format (marketplace.json)\n\nA marketplace catalog lives at `.claude-plugin/marketplace.json` in the repository root:\n\n```json\n{\n \"$schema\": \"https://anthropic.com/claude-code/marketplace.schema.json\",\n \"name\": \"my-marketplace\",\n \"owner\": {\n \"name\": \"Your Name\",\n \"email\": \"you@example.com\"\n },\n \"metadata\": {\n \"description\": \"A collection of plugins\",\n \"version\": \"1.0.0\",\n \"pluginRoot\": \"plugins\"\n },\n \"plugins\": [\n {\n \"name\": \"my-plugin\",\n \"description\": \"What this plugin does\",\n \"source\": \"./my-plugin\",\n \"category\": \"development\",\n \"homepage\": \"https://github.com/you/my-plugin\"\n }\n ]\n}\n```\n\n### Required fields\n\n| Field | Description |\n| ------------ | ---------------------------------------------------------------------------------------------------------------- |\n| `name` | Marketplace name. Lowercase alphanumeric, hyphens, and dots. Must start and end with alphanumeric. Max 64 chars. |\n| `owner.name` | Marketplace owner name |\n| `plugins` | Array of plugin entries |\n\nTop-level `metadata.description`, `metadata.version`, and `metadata.pluginRoot` are optional. When `metadata.pluginRoot` is set, it is prepended to relative plugin `source` paths.\n\n### Plugin entry fields\n\n| Field | Required | Description |\n| ------------- | -------- | --------------------------------------------------------------------------------------- |\n| `name` | yes | Plugin name (same rules as marketplace name) |\n| `source` | yes | Where to find the plugin (see below) |\n| `description` | no | Short description |\n| `version` | no | Version string; install version falls back to plugin manifest, source SHA, then `0.0.0` |\n| `author` | no | `{ name, email? }` |\n| `homepage` | no | URL |\n| `repository` | no | Repository URL/string |\n| `license` | no | License string |\n| `keywords` | no | Array of string keywords |\n| `category` | no | Category string (e.g. `development`, `productivity`, `security`) |\n| `tags` | no | Array of string tags |\n| `strict` | no | Boolean |\n| `commands` | no | Slash commands provided |\n| `agents` | no | Agents provided |\n| `hooks` | no | Hook definitions |\n| `mcpServers` | no | MCP server definitions |\n| `lspServers` | no | LSP server definitions or path; copied to `.lsp.json` on install |\n\n### Plugin source formats\n\nThe `source` field supports these formats. String sources must start with `./` and are resolved inside the marketplace root, after optional `metadata.pluginRoot` is prepended:\n\n**Relative path** (within the marketplace repo):\n\n```json\n\"source\": \"./my-plugin\"\n```\n\n**Git repository URL**:\n\n```json\n\"source\": {\n \"source\": \"url\",\n \"url\": \"https://github.com/org/repo.git\",\n \"sha\": \"abc123...\"\n}\n```\n\n**GitHub shorthand**:\n\n```json\n\"source\": {\n \"source\": \"github\",\n \"repo\": \"org/repo\",\n \"ref\": \"main\",\n \"sha\": \"abc123...\"\n}\n```\n\n**Git subdirectory** (monorepo):\n\n```json\n\"source\": {\n \"source\": \"git-subdir\",\n \"url\": \"https://github.com/org/monorepo.git\",\n \"path\": \"plugins/my-plugin\",\n \"ref\": \"main\",\n \"sha\": \"abc123...\"\n}\n```\n\n**npm package** (parsed but not installable yet):\n\n```json\n\"source\": {\n \"source\": \"npm\",\n \"package\": \"@scope/my-plugin\",\n \"version\": \"1.0.0\"\n}\n```\n\nCurrent installer behavior rejects npm marketplace sources with `npm plugin sources are not yet supported`; use relative, GitHub, URL, or git-subdir sources.\n\n## On-disk layout\n\n```\n~/.omp/\n marketplaces.json # Registry of added marketplaces\n plugins/\n installed_plugins.json # User-scoped marketplace plugins (version: 2)\n cache/\n marketplaces/<name>/ # Cached marketplace clone/catalog\n plugins/<marketplace>___<plugin>___<version>/ # Cached plugin directories\n\n<project>/.omp/\n plugins/\n installed_plugins.json # Project-scoped marketplace plugins (version: 2)\n```\n\n## Naming rules\n\nMarketplace and plugin names must:\n\n- Start and end with a lowercase letter or digit\n- Contain only lowercase letters, digits, hyphens, and dots\n- Be at most 64 characters\n\nPlugin IDs (`name@marketplace`) must be at most 128 characters total.\n\nValid examples: `my-plugin`, `code-review`, `wordpress.com`, `ai-firstify`\nInvalid examples: `-bad`, `bad-`, `.bad`, `Bad`, `under_score`\n",
|
|
29
|
+
"mcp-config.md": "# MCP configuration in OMP\n\nThis guide explains how to add, edit, and validate MCP servers for the OMP coding agent.\n\nSource of truth in code:\n\n- Runtime config types: `packages/coding-agent/src/mcp/types.ts`\n- Config writer: `packages/coding-agent/src/mcp/config-writer.ts`\n- Loader + validation: `packages/coding-agent/src/mcp/config.ts`\n- Standalone `mcp.json` discovery: `packages/coding-agent/src/discovery/mcp-json.ts`\n- Schema: `packages/coding-agent/src/config/mcp-schema.json`\n\n## Preferred config locations\n\nOMP can discover MCP servers from multiple tools (`.claude/`, `.cursor/`, `.vscode/`, `opencode.json`, and more), but for OMP-native configuration you should usually use one of these primary files:\n\n- Project: `.omp/mcp.json`\n- User: `~/.omp/agent/mcp.json`\n\nThe native provider also reads `.omp/.mcp.json` and `~/.omp/agent/.mcp.json` for compatibility, but OMP writes to the primary `mcp.json` paths above.\n\nOMP also accepts fallback standalone files in the project root:\n\n- `mcp.json`\n- `.mcp.json`\n\nUse `.omp/mcp.json` or `~/.omp/agent/mcp.json` when you want OMP to own the configuration. Use root `mcp.json` / `.mcp.json` only when you want a portable fallback file that other MCP clients may also read.\n\n## Add a schema reference\n\nAdd this line at the top of the file for editor autocomplete and validation:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {}\n}\n```\n\nOMP now writes this automatically when `/mcp add`, `/mcp enable`, `/mcp disable`, `/mcp reauth`, or other config-writing flows create or update an OMP-managed MCP file.\n\n## File shape\n\nOMP supports this top-level structure:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"server-name\": {\n \"type\": \"stdio\",\n \"command\": \"npx\",\n \"args\": [\"-y\", \"some-mcp-server\"]\n }\n },\n \"disabledServers\": [\"server-name\"]\n}\n```\n\nTop-level keys:\n\n- `$schema` — optional JSON Schema URL for tooling\n- `mcpServers` — map of server name to server config\n- `disabledServers` — user-level denylist used to turn off discovered servers by name; runtime loading reads this list from `~/.omp/agent/mcp.json`\n\nServer names must match `^[a-zA-Z0-9_.-]{1,100}$`.\n\n## Supported server fields\n\nShared fields for every transport:\n\n- `enabled?: boolean` — skip this server when `false`\n- `timeout?: number` — MCP request timeout in milliseconds; `0` disables client-side MCP timeouts\n- `auth?: { ... }` — auth metadata used by OMP for OAuth/API-key flows\n- `oauth?: { ... }` — explicit OAuth client settings used during auth/reauth\n\nSet `OMP_MCP_TIMEOUT_MS=0` to disable the client-side timeout for every MCP server in the current process. Set it to a positive millisecond value, such as `OMP_MCP_TIMEOUT_MS=120000`, to apply one global timeout without editing each server entry.\n\n### `stdio` transport\n\n`stdio` is the default when `type` is omitted.\n\nRequired:\n\n- `command: string`\n\nOptional:\n\n- `type?: \"stdio\"`\n- `args?: string[]`\n- `env?: Record<string, string>`\n- `cwd?: string`\n\nExample:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"filesystem\": {\n \"command\": \"npx\",\n \"args\": [\n \"-y\",\n \"@modelcontextprotocol/server-filesystem\",\n \"/Users/alice/projects\",\n \"/Users/alice/Documents\"\n ]\n }\n }\n}\n```\n\nThis follows the official Filesystem MCP server package (`@modelcontextprotocol/server-filesystem`).\n\n### `http` transport\n\nRequired:\n\n- `type: \"http\"`\n- `url: string`\n\nOptional:\n\n- `headers?: Record<string, string>`\n\nExample:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"github\": {\n \"type\": \"http\",\n \"url\": \"https://api.githubcopilot.com/mcp/\"\n }\n }\n}\n```\n\nThis matches GitHub's hosted GitHub MCP server endpoint.\n\n### `sse` transport\n\nRequired:\n\n- `type: \"sse\"`\n- `url: string`\n\nOptional:\n\n- `headers?: Record<string, string>`\n\nExample:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"legacy-remote\": {\n \"type\": \"sse\",\n \"url\": \"https://example.com/mcp/sse\"\n }\n }\n}\n```\n\n`sse` is still supported for compatibility, but the MCP spec now prefers Streamable HTTP (`type: \"http\"`) for new servers.\n\n## Auth fields\n\nOMP understands two auth-related objects.\n\n### `auth`\n\n```json\n{\n \"type\": \"oauth\" | \"apikey\",\n \"credentialId\": \"optional-stored-credential-id\",\n \"tokenUrl\": \"optional-token-endpoint\",\n \"clientId\": \"optional-client-id\",\n \"clientSecret\": \"optional-client-secret\"\n}\n```\n\nUse this when OMP should remember how to rehydrate credentials for a server.\n\n### `oauth`\n\n```json\n{\n \"clientId\": \"...\",\n \"clientSecret\": \"...\",\n \"redirectUri\": \"...\",\n \"callbackPort\": 3334,\n \"callbackPath\": \"/oauth/callback\"\n}\n```\n\nUse this when the MCP server requires explicit OAuth client settings.\n\nSlack is the clearest current example. Slack's MCP server is hosted at `https://mcp.slack.com/mcp`, uses Streamable HTTP, and requires confidential OAuth with your Slack app's client credentials.\n\nExample:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"slack\": {\n \"type\": \"http\",\n \"url\": \"https://mcp.slack.com/mcp\",\n \"oauth\": {\n \"clientId\": \"YOUR_SLACK_CLIENT_ID\",\n \"clientSecret\": \"YOUR_SLACK_CLIENT_SECRET\"\n },\n \"auth\": {\n \"type\": \"oauth\",\n \"tokenUrl\": \"https://slack.com/api/oauth.v2.user.access\",\n \"clientId\": \"YOUR_SLACK_CLIENT_ID\",\n \"clientSecret\": \"YOUR_SLACK_CLIENT_SECRET\"\n }\n }\n }\n}\n```\n\nRelevant Slack endpoints from Slack's docs:\n\n- MCP endpoint: `https://mcp.slack.com/mcp`\n- Authorization endpoint: `https://slack.com/oauth/v2_user/authorize`\n- Token endpoint: `https://slack.com/api/oauth.v2.user.access`\n\n## Common copy-paste examples\n\n### Filesystem server via stdio\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"filesystem\": {\n \"command\": \"npx\",\n \"args\": [\n \"-y\",\n \"@modelcontextprotocol/server-filesystem\",\n \"/absolute/path/one\",\n \"/absolute/path/two\"\n ]\n }\n }\n}\n```\n\n### GitHub hosted server via HTTP\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"github\": {\n \"type\": \"http\",\n \"url\": \"https://api.githubcopilot.com/mcp/\"\n }\n }\n}\n```\n\n### GitHub local server via Docker\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"github\": {\n \"command\": \"docker\",\n \"args\": [\n \"run\",\n \"-i\",\n \"--rm\",\n \"-e\",\n \"GITHUB_PERSONAL_ACCESS_TOKEN\",\n \"ghcr.io/github/github-mcp-server\"\n ],\n \"env\": {\n \"GITHUB_PERSONAL_ACCESS_TOKEN\": \"GITHUB_PERSONAL_ACCESS_TOKEN\"\n }\n }\n }\n}\n```\n\nThis matches GitHub's official local Docker image `ghcr.io/github/github-mcp-server`.\n\n### Slack hosted server via OAuth\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"mcpServers\": {\n \"slack\": {\n \"type\": \"http\",\n \"url\": \"https://mcp.slack.com/mcp\",\n \"oauth\": {\n \"clientId\": \"YOUR_SLACK_CLIENT_ID\",\n \"clientSecret\": \"YOUR_SLACK_CLIENT_SECRET\"\n },\n \"auth\": {\n \"type\": \"oauth\",\n \"tokenUrl\": \"https://slack.com/api/oauth.v2.user.access\",\n \"clientId\": \"YOUR_SLACK_CLIENT_ID\",\n \"clientSecret\": \"YOUR_SLACK_CLIENT_SECRET\"\n }\n }\n }\n}\n```\n\n## Secrets and variable resolution\n\nThis is the part that usually trips people up.\n\n### Discovery-time `${...}` expansion\n\nOMP expands `${VAR}` and `${VAR:-default}` placeholders while discovering MCP configs from OMP-native files and standalone fallback files. Expansion applies recursively to string values in `command`, `args`, `env`, `cwd`, `url`, `headers`, `auth`, and `oauth`; unresolved placeholders remain literal strings.\n\nExample:\n\n```json\n{\n \"mcpServers\": {\n \"github\": {\n \"type\": \"http\",\n \"url\": \"https://api.githubcopilot.com/mcp/\",\n \"headers\": {\n \"Authorization\": \"Bearer ${GITHUB_TOKEN}\"\n }\n }\n }\n}\n```\n\n### Pre-connect env/header resolution\n\nBefore OMP launches a stdio server or makes an HTTP/SSE request, it resolves stdio `env` values and HTTP/SSE `headers` values like this:\n\n1. If a value starts with `!`, OMP runs the rest as a shell command with a 10s timeout and uses trimmed stdout.\n2. If the command fails, times out, or prints only whitespace, that `env`/`headers` entry is omitted.\n3. Otherwise OMP checks whether the value names an environment variable.\n4. If that environment variable is set to a non-empty value, OMP uses the environment value; otherwise it uses the string literally.\n\nExamples:\n\n```json\n{\n \"env\": {\n \"GITHUB_PERSONAL_ACCESS_TOKEN\": \"GITHUB_PERSONAL_ACCESS_TOKEN\"\n },\n \"headers\": {\n \"X-MCP-Insiders\": \"true\"\n }\n}\n```\n\nThat means this is valid and convenient for local secrets:\n\n- `\"GITHUB_PERSONAL_ACCESS_TOKEN\": \"GITHUB_PERSONAL_ACCESS_TOKEN\"` → copy from the current shell environment\n- `\"Authorization\": \"Bearer hardcoded-token\"` → use the literal value\n- `\"Authorization\": \"!printf 'Bearer %s' \\\"$GITHUB_TOKEN\\\"\"` → build the header from a command\n\n## `disabledServers`\n\n`disabledServers` is read from the user config file (`~/.omp/agent/mcp.json`) when a server is discovered from any source and you want OMP to ignore it without editing that other tool's config.\n\nExample:\n\n```json\n{\n \"$schema\": \"https://raw.githubusercontent.com/can1357/oh-my-pi/main/packages/coding-agent/src/config/mcp-schema.json\",\n \"disabledServers\": [\"github\", \"slack\"]\n}\n```\n\n## `/mcp add` vs editing JSON directly\n\nUse `/mcp add` when you want guided setup.\n\nUse direct JSON editing when:\n\n- you need a transport or auth option the wizard does not prompt for yet\n- you want to paste a server definition from another MCP client\n- you want schema-backed validation in your editor\n\nAfter editing, use:\n\n- `/mcp reload` to rediscover and reconnect servers in the current session\n- `/mcp list` to see which config file a server came from\n- `/mcp test <name>` to test a single server\n- `/mcp reconnect <name>` to reconnect one server without rediscovering all configs\n- `/mcp resources`, `/mcp prompts`, and `/mcp notifications` to inspect non-tool MCP capabilities\n\n## Validation rules OMP enforces\n\nFrom `validateServerConfig()` in `packages/coding-agent/src/mcp/config.ts`:\n\n- `stdio` requires `command`\n- `http` and `sse` require `url`\n- a server cannot set both `command` and `url`\n- unknown `type` values are rejected\n\nPractical implications:\n\n- Omitting `type` means `stdio`\n- If you paste a remote server config and forget `\"type\": \"http\"`, OMP will treat it as `stdio` and complain that `command` is missing\n- `sse` remains valid for compatibility, but new hosted servers should usually be configured as `http`\n\n## Discovery and precedence\n\nOMP does not merge duplicate server definitions across files. Discovery providers are prioritized, and the higher-priority definition wins. Separately, `disabledServers` from `~/.omp/agent/mcp.json` can suppress a discovered server by name.\n\nIn practice:\n\n- prefer `.omp/mcp.json` or `~/.omp/agent/mcp.json` when you want an OMP-specific override\n- keep server names unique across tools when possible\n- use `disabledServers` in the user config when a third-party config keeps reintroducing a server you do not want\n\n## Troubleshooting\n\n### `Server \"name\": stdio server requires \"command\" field`\n\nYou probably omitted `type: \"http\"` on a remote server.\n\n### `Server \"name\": both \"command\" and \"url\" are set`\n\nPick one transport. OMP treats `command` as stdio and `url` as http/sse.\n\n### `/mcp add` worked but the server still does not connect\n\nThe JSON is valid, but the server may still be unreachable. Use `/mcp test <name>` and check whether:\n\n- the binary or Docker image exists\n- required environment variables are set\n- the remote URL is reachable\n- the OAuth or API token is valid\n\n### The server exists in another tool's config but not in OMP\n\nRun `/mcp list`. OMP discovers many third-party MCP files, but project-level loading can also be disabled via the `mcp.enableProjectConfig` setting, and a user-level `disabledServers` entry can suppress a server by name.\n\n## References\n\n- MCP transport spec: https://modelcontextprotocol.io/specification/2025-03-26/basic/transports\n- Filesystem server package: https://www.npmjs.com/package/@modelcontextprotocol/server-filesystem\n- GitHub MCP server: https://github.com/github/github-mcp-server\n- Slack MCP server docs: https://docs.slack.dev/ai/slack-mcp-server/\n",
|
|
30
|
+
"mcp-protocol-transports.md": "# MCP Protocol and Transport Internals\n\nThis document describes how coding-agent implements MCP JSON-RPC messaging and how protocol concerns are split from transport concerns.\n\n## Scope\n\nCovers:\n\n- JSON-RPC request/response and notification flow\n- Server-to-client request handling (`ping`, `roots/list`)\n- Request correlation and lifecycle for stdio and HTTP/SSE transports\n- Timeout, cancellation, and auth-refresh behavior\n- Error propagation and malformed payload handling\n- Transport selection boundaries (`stdio` vs `http`/`sse`)\n- Which reconnect/retry responsibilities are transport-level vs manager/tool-bridge-level\n\nDoes not cover extension authoring UX or command UI.\n\n## Implementation files\n\n- [`src/mcp/types.ts`](../packages/coding-agent/src/mcp/types.ts)\n- [`src/mcp/transports/stdio.ts`](../packages/coding-agent/src/mcp/transports/stdio.ts)\n- [`src/mcp/transports/http.ts`](../packages/coding-agent/src/mcp/transports/http.ts)\n- [`src/mcp/transports/index.ts`](../packages/coding-agent/src/mcp/transports/index.ts)\n- [`src/mcp/json-rpc.ts`](../packages/coding-agent/src/mcp/json-rpc.ts)\n- [`src/mcp/client.ts`](../packages/coding-agent/src/mcp/client.ts)\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts)\n\n## Layer boundaries\n\n### Protocol layer (JSON-RPC + MCP methods)\n\n- Message shapes are defined in `types.ts` (`JsonRpcRequest`, `JsonRpcNotification`, `JsonRpcResponse`, `JsonRpcMessage`).\n- MCP client logic (`client.ts`) decides method order and session handshake:\n 1. `initialize` request\n 2. for HTTP/SSE transports, start the optional background SSE listener after the initialize response has established any session id\n 3. `notifications/initialized` notification\n 4. method calls like `tools/list`, `tools/call`\n\n### Transport layer (`MCPTransport`)\n\n`MCPTransport` abstracts delivery and lifecycle:\n\n- `request(method, params, options?) -> Promise<T>`\n- `notify(method, params?) -> Promise<void>`\n- `close()`\n- `connected`\n- optional callbacks: `onClose`, `onError`, `onNotification`, `onRequest`\n\nTransport implementations own framing and I/O details:\n\n- `StdioTransport`: newline-delimited JSON over subprocess stdio\n- `HttpTransport`: JSON-RPC over HTTP POST, with optional SSE responses/listening\n\n### Manager/client wiring\n\n`connectToServer()` always installs an `onRequest` handler for standard server-to-client requests. `MCPManager` installs notification handlers, OAuth refresh hooks for HTTP OAuth servers, and `onClose` reconnect handling for managed connections.\n\n## Transport selection\n\n`client.ts:createTransport()` chooses transport from config:\n\n- `type` omitted or `\"stdio\"` -> `createStdioTransport`\n- `\"http\"` or `\"sse\"` -> `createHttpTransport`\n\n`\"sse\"` is treated as an HTTP transport variant (same class), not a separate transport implementation.\n\n## JSON-RPC message flow and correlation\n\n## Request IDs\n\nEach transport generates per-request IDs with `Snowflake.next()`. IDs are transport-local correlation tokens.\n\n## Stdio correlation path\n\n- Outbound request is serialized as one JSON object + `\\n`.\n- `#pendingRequests: Map<id, {resolve,reject}>` stores in-flight requests.\n- Read loop parses JSONL from stdout and calls `#handleMessage`.\n- If inbound message has matching `id`, request resolves/rejects.\n- If inbound message has `method` and no `id`, treated as notification and sent to `onNotification`.\n- If inbound message has both `method` and `id`, treated as a server-to-client request and answered through `onRequest`; without a handler the transport replies with JSON-RPC `-32601 Method not found`.\n\nUnknown response IDs are ignored (no rejection, no error callback).\n\n## HTTP correlation path\n\n- Outbound request is HTTP `POST` with JSON body and generated `id`.\n- Non-SSE response path: parse one JSON-RPC response and return `result`/throw on `error`.\n- SSE response path (`Content-Type: text/event-stream`): stream events, return first message whose `id` matches expected request ID and has `result` or `error`.\n- SSE messages with `method` and no `id` are treated as notifications.\n- SSE messages with both `method` and `id` are treated as server-to-client requests and answered with a POSTed JSON-RPC response.\n\nIf SSE stream ends before matching response, request fails with `No response received for request ID ...`. After the matching response is captured, the transport drains remaining SSE messages in the background.\n\n## Notifications\n\nClient emits JSON-RPC notifications via `transport.notify(...)`.\n\n- Stdio: writes notification frame to stdin (`jsonrpc`, `method`, optional `params`) plus newline.\n- HTTP: sends POST body without `id`; success accepts `2xx` or `202 Accepted`.\n\nServer-initiated notifications are surfaced through transport `onNotification`; `MCPManager` consumes known MCP list/update notifications and can forward all notifications through its own callback.\n\n## Stdio transport internals\n\n## Lifecycle and state transitions\n\n- Initial: `connected=false`, `process=null`, pending map empty\n- `connect()`:\n - spawn subprocess with configured command/args/env/cwd\n - mark connected\n - start stdout read loop (`readJsonl`)\n - start stderr loop (read/discard; currently silent)\n- `close()`:\n - mark disconnected\n - reject all pending requests (`Transport closed`)\n - kill subprocess\n - await read loop shutdown\n - emit `onClose`\n\nIf read loop exits unexpectedly, `finally` triggers `#handleClose()` which performs the same pending-request rejection and close callback.\n\n## Timeout and cancellation\n\nPer request:\n\n- timeout defaults to `config.timeout ?? 30000`\n- optional `AbortSignal` from caller\n- abort and timeout both reject the pending promise and clean map entry\n\nCancellation is local only: transport does not send protocol-level cancellation notification to the server.\n\n## Malformed payload handling\n\nIn read loop:\n\n- each parsed JSONL line is passed to `#handleMessage` in `try/catch`\n- malformed/invalid message handling exceptions are dropped (`Skip malformed lines` comment)\n- loop continues, so one bad message does not kill the connection\n\nIf the underlying stream parser throws, `onError` is invoked (when still connected), then connection closes.\n\n## Disconnect/failure behavior\n\nWhen process exits or stream closes:\n\n- all in-flight requests are rejected with `Transport closed`\n- no automatic restart or reconnect\n- higher layers must reconnect by creating a new transport\n\n## Backpressure/streaming notes\n\n- Outbound writes use `stdin.write()` + `flush()` without awaiting drain semantics.\n- There is no explicit queue or high-watermark management in transport.\n- Inbound processing is stream-driven (`for await` over `readJsonl`), one parsed message at a time.\n\n## HTTP/SSE transport internals\n\n## Lifecycle and connection semantics\n\nHTTP transport has logical connection state, but request path is stateless per HTTP call:\n\n- `connect()` sets `connected=true` (no socket/session handshake)\n- optional server session tracking via `Mcp-Session-Id` header\n- `close()` optionally sends `DELETE` with `Mcp-Session-Id`, aborts SSE listener, emits `onClose`\n\nSo `connected` means \"transport usable\", not \"persistent stream established\".\n\n## Session header behavior\n\n- On POST response, if `Mcp-Session-Id` header is present, transport stores it.\n- Subsequent requests/notifications include `Mcp-Session-Id`.\n- `close()` tries to terminate server session with HTTP DELETE; termination failures are ignored.\n\n## Timeout, cancellation, and auth refresh\n\nFor `request()`:\n\n- timeout uses `AbortController` (`config.timeout ?? 30000`)\n- external signal, if provided, is merged via `AbortSignal.any([...])`\n- AbortError handling distinguishes caller abort vs timeout\n\nFor `notify()`:\n\n- timeout uses an internal `AbortController` (`config.timeout ?? 30000`)\n- there is no external abort option on the transport interface\n\nFor HTTP OAuth configs managed by `MCPManager`, outbound requests and best-effort server-request responses retry once on `HTTP 401`/`403` if token refresh returns replacement headers.\n\n## HTTP error propagation\n\nOn non-OK response:\n\n- response text is included in thrown error (`HTTP <status>: <text>`)\n- if present, auth hints from `WWW-Authenticate` and `Mcp-Auth-Server` are appended\n\nOn JSON-RPC error object:\n\n- throws `MCP error <code>: <message>`\n\nMalformed JSON body (`response.json()` failure) propagates as parse exception.\n\n## SSE behavior and modes\n\nTwo SSE paths exist:\n\n1. **Per-request SSE response** (`#parseSSEResponse`)\n - used when POST response content type is `text/event-stream`\n - consumes stream until matching response id found\n - can process interleaved notifications during same stream\n\n2. **Background SSE listener** (`startSSEListener()`)\n - optional GET listener for server-initiated notifications and server-to-client requests\n - `connectToServer()` starts it for HTTP/SSE transports after `initialize` and before `notifications/initialized`\n - listener startup waits up to one second, or less for very small request timeouts; `timeout: 0` / `OMP_MCP_TIMEOUT_MS=0` disables that startup deadline\n - if GET returns `405`, another non-OK status, no body, or times out, listener silently disables itself\n\n## Malformed payload and disconnect handling\n\nSSE JSON parsing errors bubble out of `readSseJson` and reject request/listener.\n\n- Request SSE parse errors reject the active request.\n- Background listener errors trigger `onError` (except AbortError), and an established listener ending while still connected triggers `onClose` so the manager can reconnect.\n- Transport does not restart the listener itself; managed connections may reconnect through manager `onClose` handling.\n\n## `json-rpc.ts` utility vs transport abstraction\n\n`src/mcp/json-rpc.ts` provides `callMCP()` and `parseSSE()` helpers for direct HTTP MCP calls (used by Exa integration), not the `MCPTransport` abstraction used by `MCPClient`/`MCPManager`.\n\nNotable differences from `HttpTransport`:\n\n- parses entire response text first, then extracts first `data: ` line (`parseSSE`), with JSON fallback\n- no request timeout management, no abort API, no session-id handling, no transport lifecycle\n- returns raw JSON-RPC envelope object\n\nThis path is lightweight but less robust than full transport implementation.\n\n## Retry/reconnect responsibilities\n\n## Transport-level\n\nCurrent transport implementations do **not**:\n\n- retry ordinary failed requests, except the HTTP transport's single OAuth-refresh retry when `onAuthError` is wired\n- reconnect after stdio process exit\n- reconnect SSE listeners by themselves\n- resend in-flight requests after disconnect\n\nThey fail fast and propagate errors.\n\n## Manager/tool-bridge level\n\n`MCPManager` wires `transport.onClose` for managed connections and runs `reconnectServer(name)` when a transport closes unexpectedly. Reconnect tears down the stale connection, re-resolves auth/config values, retries with backoff (`500`, `1000`, `2000`, `4000` ms), reloads tools, and preserves stale tools while reconnecting.\n\n`MCPTool` and `DeferredMCPTool` also attempt one reconnect + retry for retriable connection errors during a tool call. This is tool availability recovery, not transport-level retry.\n\n## Failure scenarios summary\n\n- **Malformed stdio message line**: dropped; stream continues.\n- **Stdio stream/process ends**: transport closes; pending requests rejected as `Transport closed`; manager-managed connections trigger reconnect.\n- **HTTP non-2xx**: request/notify throws HTTP error; managed OAuth requests can refresh auth and retry once on 401/403.\n- **Invalid JSON response**: parse exception propagated.\n- **SSE ends without matching id**: request fails with `No response received for request ID ...`.\n- **Timeout**: transport-specific timeout error.\n- **Caller abort**: AbortError/reason propagated from caller signal where the method accepts one.\n\n## Practical boundary rule\n\nIf the concern is message shape, id correlation, or MCP method ordering, it belongs to protocol/client logic.\n\nIf the concern is framing (JSONL vs HTTP/SSE), stream parsing, fetch/spawn lifecycle, timeout clocks, or connection teardown, it belongs to transport implementation.\n",
|
|
31
|
+
"mcp-runtime-lifecycle.md": "# MCP runtime lifecycle\n\nThis document describes how MCP servers are discovered, connected, exposed as tools, refreshed, and torn down in the coding-agent runtime.\n\n## Lifecycle at a glance\n\n1. **SDK startup** calls `discoverAndLoadMCPTools()` (unless MCP is disabled).\n2. **Discovery** (`loadAllMCPConfigs`) resolves MCP server configs from capability sources, filters disabled/project/Exa entries and browser MCP servers when the built-in browser tool is enabled, and preserves source metadata.\n3. **Manager connect phase** (`MCPManager.connectServers`) starts per-server connect + `tools/list` in parallel.\n4. **Fast startup gate** waits up to 250ms, then may return:\n - fully loaded `MCPTool`s,\n - failures per server,\n - or cached `DeferredMCPTool`s for still-pending servers.\n5. **SDK wiring** merges MCP tools into runtime tool registry for the session.\n6. **Post-connect enrichment** best-effort loads resources, resource templates, prompts, and optional resource subscriptions.\n7. **Live session** can refresh MCP tools via `/mcp` flows (`disconnectAll` + rediscover + `session.refreshMCPTools`) and can reconnect individual servers on transport close or `/mcp reconnect`.\n8. **Teardown** happens when callers invoke `disconnectServer`/`disconnectAll`; manager also clears MCP tool/resource/prompt registrations for disconnected servers.\n\n## Discovery and load phase\n\n### Entry path from SDK\n\n`createAgentSession()` in `src/sdk.ts` performs MCP startup when `enableMCP` is true (default):\n\n- calls `discoverAndLoadMCPTools(cwd, { ... })`,\n- passes `authStorage`, cache storage, `mcp.enableProjectConfig`, and browser-MCP filtering based on the `browser.enabled` setting,\n- always sets `filterExa: true`,\n- logs per-server load/connect errors,\n- stores returned manager in `toolSession.mcpManager` and session result.\n\nIf `enableMCP` is false, MCP discovery is skipped entirely.\n\n### Config discovery and filtering\n\n`loadAllMCPConfigs()` (`src/mcp/config.ts`) loads canonical MCP server items through capability discovery, then converts to legacy `MCPServerConfig`.\n\nFiltering behavior:\n\n- `enableProjectConfig: false` removes project-level entries (`_source.level === \"project\"`).\n- `enabled: false` servers are skipped before connect attempts.\n- Exa servers are filtered out by default and API keys are extracted for native Exa tool integration; browser automation MCP servers are filtered when `filterBrowser` is true.\n\nResult includes both `configs` and `sources` (metadata used later for provider labeling).\n\n### Discovery-level failure behavior\n\n`discoverAndLoadMCPTools()` distinguishes two failure classes:\n\n- **Discovery hard failure** (exception from `manager.discoverAndConnect`, typically from config discovery): returns an empty tool set and one synthetic error `{ path: \".mcp.json\", error }`.\n- **Per-server runtime/connect failure**: manager returns partial success with `errors` map; other servers continue.\n\nSo startup does not fail the whole agent session when individual MCP servers fail.\n\n## Manager state model\n\n`MCPManager` tracks runtime lifecycle with separate registries:\n\n- `#connections: Map<string, MCPServerConnection>` — fully connected servers.\n- `#pendingConnections: Map<string, Promise<MCPServerConnection>>` — handshake in progress.\n- `#pendingToolLoads: Map<string, Promise<{ connection, serverTools }>>` — connected but tools still loading.\n- `#tools: CustomTool[]` — current MCP tool view exposed to callers.\n- `#sources: Map<string, SourceMeta>` — provider/source metadata even before connect completes.\n- `#pendingReconnections: Map<string, Promise<MCPServerConnection | null>>` — reconnects in progress after a dropped transport or explicit reconnect.\n- `#serverConfigs: Map<string, MCPServerConfig>` — original unresolved configs preserved so reconnect can re-resolve credentials without leaking resolved tokens.\n\n`getConnectionStatus(name)` derives status from these maps:\n\n- `connected` if in `#connections`,\n- `connecting` if pending connect, pending tool load, or pending reconnect,\n- `disconnected` otherwise.\n\n## Connection establishment and startup timing\n\n## Per-server connect pipeline\n\nFor each discovered server in `connectServers()`:\n\n1. store/update source metadata,\n2. skip if already connected/pending/reconnecting,\n3. validate transport fields (`validateServerConfig`),\n4. resolve auth/shell substitutions (`#resolveAuthConfig`),\n5. call `connectToServer(name, resolvedConfig)` with manager notification/request handlers,\n6. wire HTTP OAuth refresh and transport `onClose` reconnect handling,\n7. call `listTools(connection)`,\n8. cache tool definitions (`MCPToolCache.set`) best-effort,\n9. best-effort load resources, resource templates, prompts, and subscriptions after tools load.\n\n`connectToServer()` behavior (`src/mcp/client.ts`):\n\n- creates stdio or HTTP/SSE transport,\n- performs MCP `initialize`,\n- for HTTP/SSE, starts the optional background SSE listener before `notifications/initialized`,\n- sends `notifications/initialized`,\n- uses timeout (`OMP_MCP_TIMEOUT_MS`, `config.timeout`, or 30s default; `0` disables the client-side timeout),\n- closes transport on init failure.\n\n### Fast startup gate + deferred fallback\n\n`connectServers()` waits on a race between:\n\n- all connect/tool-load tasks settled, and\n- `STARTUP_TIMEOUT_MS = 250`.\n\nAfter 250ms:\n\n- fulfilled tasks become live `MCPTool`s,\n- rejected tasks produce per-server errors,\n- still-pending tasks:\n - use cached tool definitions if available (`MCPToolCache.get`) to create `DeferredMCPTool`s,\n - otherwise block until those pending tasks settle.\n\nThis is a hybrid startup model: fast return when cache is available, correctness wait when cache is not.\n\n### Background completion behavior\n\nEach pending `toolsPromise` also has a background continuation that eventually:\n\n- replaces that server’s tool slice in manager state via `#replaceServerTools`,\n- writes cache,\n- logs late failures only after startup (`allowBackgroundLogging`).\n\n## Tool exposure and live-session availability\n\n### Startup registration\n\n`discoverAndLoadMCPTools()` converts manager tools into `LoadedCustomTool[]` and decorates paths (`mcp:<server> via <providerName>` when known).\n\n`createAgentSession()` then pushes these tools into `customTools`, which are wrapped and added to the runtime tool registry with names like `mcp__<server>_<tool>`.\n\n### Tool calls\n\n- `MCPTool` calls tools through an already connected `MCPServerConnection`.\n- `DeferredMCPTool` waits for `waitForConnection(server)` before calling; this allows cached tools to exist before connection is ready.\n- Both attempt a reconnect + single retry for retriable connection failures.\n\nBoth return structured tool output and convert remaining transport/tool errors into `MCP error: ...` tool content (abort remains abort).\n\n## Refresh/reload paths (startup vs live reload)\n\n### Initial startup path\n\n- one-time discovery/load in `sdk.ts`,\n- tools are registered in initial session tool registry.\n\n### Interactive reload path\n\n`/mcp reload` path (`src/modes/controllers/mcp-command-controller.ts`) does:\n\n1. `mcpManager.disconnectAll()`,\n2. `mcpManager.discoverAndConnect()`,\n3. `session.refreshMCPTools(mcpManager.getTools())`.\n\n`session.refreshMCPTools()` (`src/session/agent-session.ts`) removes all `mcp__` tools, re-wraps latest MCP tools, and re-activates tool set so MCP changes apply without restarting session.\n\nThere is also a follow-up path for late connections: after waiting for a specific server, if status becomes `connected`, it re-runs `session.refreshMCPTools(...)` so newly available tools are rebound in-session.\n\n## Health, reconnect, and partial failure behavior\n\nCurrent runtime behavior is connection-event driven:\n\n- **No autonomous polling health monitor** in manager/client.\n- **Automatic reconnect is wired to `transport.onClose`** for managed connections.\n- Reconnect retries with backoff (`500`, `1000`, `2000`, `4000` ms), reloads tools, and notifies consumers on success.\n- Tool calls that see retriable connection errors also attempt one reconnect + retry.\n- Reconnect is also explicit via `/mcp reconnect <name>` or broader `/mcp reload`.\n\nOperationally:\n\n- one server failing does not remove tools from healthy servers,\n- connect/list failures are isolated per server,\n- stale tools may remain visible while reconnect is attempted; calls report MCP errors if recovery fails,\n- tool cache, resource/prompt loading, subscriptions, and background updates are best-effort (warnings/errors logged, no hard stop).\n\n## Teardown semantics\n\n### Server-level teardown\n\n`disconnectServer(name)`:\n\n- removes pending entries, source metadata, saved config, resource refresh/subscription state,\n- detaches `onClose` so explicit close does not trigger reconnect,\n- closes transport if connected,\n- removes manager tool entries using the current raw-name prefix filter (`mcp__${name}_`); generated tool names are sanitized by `tool-bridge.ts`.\n\n### Global teardown\n\n`disconnectAll()`:\n\n- detaches `onClose` for all active transports, then closes them with `Promise.allSettled`,\n- clears pending maps, sources, saved configs, connections, subscriptions, resource refreshes, and manager tool list.\n\nIn current wiring, explicit teardown is used in MCP command flows (for reload/remove/disable). Startup stores the manager on the session; callers that need deterministic MCP shutdown should invoke manager disconnect methods.\n\n## Failure modes and guarantees\n\n| Scenario | Behavior | Hard fail vs best-effort |\n| ---------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------ |\n| Discovery throws (capability/config load path) | Loader returns empty tools + synthetic `.mcp.json` error | Best-effort session startup |\n| Invalid server config | Server skipped with validation error entry | Best-effort per server |\n| Connect timeout/init failure | Server error recorded; others continue | Best-effort per server |\n| `tools/list` still pending at startup with cache hit | Deferred tools returned immediately | Best-effort fast startup |\n| `tools/list` still pending at startup without cache | Startup waits for pending to settle | Hard wait for correctness |\n| Late background tool-load failure | Logged after startup gate | Best-effort logging |\n| Runtime dropped transport | Manager attempts reconnect; stale tools remain while reconnecting and future calls may retry once or fail with MCP errors | Best-effort automatic recovery |\n\n## Public API surface\n\n`src/mcp/index.ts` re-exports loader/manager/client APIs for external callers. `src/sdk.ts` exposes `discoverMCPServers()` as a convenience wrapper returning the same loader result shape.\n\n## Implementation files\n\n- [`src/mcp/loader.ts`](../packages/coding-agent/src/mcp/loader.ts) — loader facade, discovery error normalization, `LoadedCustomTool` conversion.\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts) — lifecycle state registries, parallel connect/list flow, refresh/disconnect.\n- [`src/mcp/client.ts`](../packages/coding-agent/src/mcp/client.ts) — transport setup, initialize handshake, list/call/disconnect.\n- [`src/mcp/index.ts`](../packages/coding-agent/src/mcp/index.ts) — MCP module API exports.\n- [`src/sdk.ts`](../packages/coding-agent/src/sdk.ts) — startup wiring into session/tool registry.\n- [`src/mcp/config.ts`](../packages/coding-agent/src/mcp/config.ts) — config discovery/filtering/validation used by manager.\n- [`src/mcp/tool-bridge.ts`](../packages/coding-agent/src/mcp/tool-bridge.ts) — `MCPTool` and `DeferredMCPTool` runtime behavior.\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — `refreshMCPTools` live rebinding.\n- [`src/modes/controllers/mcp-command-controller.ts`](../packages/coding-agent/src/modes/controllers/mcp-command-controller.ts) — interactive reload/reconnect flows.\n- [`src/task/executor.ts`](../packages/coding-agent/src/task/executor.ts) — subagent MCP proxying via parent manager connections.\n",
|
|
32
|
+
"mcp-server-tool-authoring.md": "# MCP server and tool authoring\n\nThis document explains how MCP server definitions become callable `mcp__*` tools in coding-agent, and what operators should expect when configs are invalid, duplicated, disabled, or auth-gated.\n\n## Architecture at a glance\n\n```text\nConfig sources (.omp/.claude/.cursor/.vscode/mcp.json, mcp.json, etc.)\n -> discovery providers normalize to canonical MCPServer\n -> capability loader dedupes by server name (higher provider priority wins)\n -> loadAllMCPConfigs converts to MCPServerConfig + skips enabled:false\n -> MCPManager connects/listTools (with auth/header/env resolution)\n -> manager best-effort loads resources/prompts and subscribes to resource updates when enabled\n -> MCPTool/DeferredMCPTool bridge exposes tools as mcp__<server>_<tool>\n -> AgentSession.refreshMCPTools replaces live MCP tools immediately\n```\n\n## 1) Server config model and validation\n\n`src/mcp/types.ts` defines the authoring shape used by MCP config writers and runtime:\n\n- `stdio` (default when `type` missing): requires `command`, optional `args`, `env`, `cwd`\n- `http`: requires `url`, optional `headers`\n- `sse`: requires `url`, optional `headers` (kept for compatibility)\n- shared fields: `enabled`, `timeout`, `auth`, `oauth`\n\n`validateServerConfig()` (`src/mcp/config.ts`) enforces transport basics:\n\n- rejects configs that set both `command` and `url`\n- requires `command` for stdio\n- requires `url` for http/sse\n- rejects unknown `type`\n\n`config-writer.ts` applies this validation for add/update operations and also validates server names:\n\n- non-empty\n- max 100 chars\n- only `[a-zA-Z0-9_.-]`\n\n### Transport pitfalls\n\n- `type` omitted means stdio. If you intended HTTP/SSE but omitted `type`, `command` becomes mandatory.\n- `sse` is still accepted but treated as HTTP transport internally (`createHttpTransport`).\n- Validation is structural, not reachability: a syntactically valid URL can still fail at connect time.\n\n## 2) Discovery, normalization, and precedence\n\n### Capability-based discovery\n\n`loadAllMCPConfigs()` (`src/mcp/config.ts`) loads canonical `MCPServer` items via `loadCapability(mcpCapability.id)`.\n\nThe capability layer (`src/capability/index.ts`) then:\n\n1. loads providers in priority order\n2. dedupes by `server.name` (first win = highest priority)\n3. validates deduped items\n\nResult: duplicate server names across sources are not merged. One definition wins; lower-priority duplicates are shadowed.\n\n### `.mcp.json` and related files\n\nThe dedicated fallback provider in `src/discovery/mcp-json.ts` reads project-root `mcp.json` and `.mcp.json` (low priority).\n\nIn practice MCP servers also come from higher-priority providers (for example native `.omp/...` and tool-specific config dirs). Authoring guidance:\n\n- Prefer `.omp/mcp.json` (project) or `~/.omp/agent/mcp.json` (user) for explicit control.\n- Use root `mcp.json` / `.mcp.json` when you need fallback compatibility.\n- Reusing the same server name in multiple sources causes precedence shadowing, not merge.\n\n### Normalization behavior\n\n`convertToLegacyConfig()` (`src/mcp/config.ts`) maps canonical `MCPServer` to runtime `MCPServerConfig`.\n\nKey behavior:\n\n- transport inferred as `server.transport ?? (command ? \"stdio\" : url ? \"http\" : \"stdio\")`\n- disabled servers (`enabled === false`) and names in the user `disabledServers` list are dropped before connection\n- optional fields are preserved when present\n\n### Environment expansion during discovery\n\nOMP-native MCP config (`.omp/mcp.json`, `~/.omp/agent/mcp.json`, plus their `.mcp.json` variants) expands `${VAR}` and `${VAR:-default}` placeholders recursively before converting to runtime config. It also accepts boolean/string forms for `enabled` (`true`, `false`, `1`, `0`) and numeric strings for `timeout`.\n\nThe standalone fallback provider in `src/discovery/mcp-json.ts` reads project-root `mcp.json` and `.mcp.json`, expands the same `${...}` placeholders, and type-checks `enabled`/`timeout` without coercing string values.\n\nInvalid `enabled`/`timeout` values are ignored with warnings rather than failing the whole file.\n\n## 3) Auth and runtime value resolution\n\n`MCPManager.prepareConfig()`/`#resolveAuthConfig()` (`src/mcp/manager.ts`) is the final pre-connect pass.\n\n### OAuth credential injection\n\nIf config has:\n\n```ts\nauth: { type: \"oauth\", credentialId: \"...\" }\n```\n\nand credential exists in auth storage:\n\n- `http`/`sse`: injects `Authorization: Bearer <access_token>` header\n- `stdio`: injects `OAUTH_ACCESS_TOKEN` env var\n\nIf credential lookup fails, manager logs a warning and continues with unresolved auth.\n\n### Header/env value resolution\n\nBefore connect, manager resolves stdio `env` values and HTTP/SSE `headers` values via `resolveConfigValue()` (`src/config/resolve-config-value.ts`):\n\n- value starting with `!` => execute shell command, use trimmed stdout (cached)\n- failed, timed-out, or whitespace-only commands produce `undefined`, so that entry is omitted\n- otherwise, treat value as environment variable name first (`process.env[name]`), fallback to literal value\n\nOperational caveat: a mistyped `!` secret command can silently remove that header/env entry, producing downstream 401/403 or server startup failures. A mistyped environment variable name is sent literally unless that literal happens to be meaningful to the server.\n\n## 4) Tool bridge: MCP -> agent-callable tools\n\n`src/mcp/tool-bridge.ts` converts MCP tool definitions into `CustomTool`s.\n\n### Naming and collision domain\n\nTool names are generated as:\n\n```text\nmcp__<sanitized_server_name>_<sanitized_tool_name>\n```\n\nRules:\n\n- lowercases\n- non-`[a-z_]` chars become `_`\n- repeated underscores collapse\n- redundant `<server>_` prefix in tool name is stripped once\n\nThis avoids many collisions, but not all. Different raw names can still sanitize to the same identifier (for example `my-server` and `my.server` both sanitize similarly), and registry insertion is last-write-wins.\n\n### Schema mapping\n\n`tool-bridge.ts` passes each MCP `inputSchema` through `normalizeSchemaForMCP()` before registering it as a `CustomTool` schema.\n\n### Execution mapping\n\n`MCPTool.execute()` / `DeferredMCPTool.execute()`:\n\n- calls MCP `tools/call`\n- flattens MCP content into displayable text\n- returns structured details (`serverName`, `mcpToolName`, provider metadata)\n- maps server-reported `isError` to `Error: ...` text result\n- attempts reconnect + one retry for retriable connection errors\n- maps remaining thrown transport/runtime failures to `MCP error: ...`\n- preserves abort semantics by translating AbortError into `ToolAbortError`\n\n## 5) Operator lifecycle: add/edit/remove and live updates\n\nInteractive mode exposes `/mcp` in `src/modes/controllers/mcp-command-controller.ts`.\n\nSupported operations:\n\n- `add` (wizard or quick-add)\n- `remove` / `rm`\n- `enable` / `disable`\n- `test`\n- `reauth` / `unauth`\n- `reconnect`\n- `reload`\n- `resources`, `prompts`, `notifications`\n- Smithery search/login/logout flows\n\nConfig writes are atomic (`writeMCPConfigFile`: temp file + rename).\n\nAfter changes, controller calls `#reloadMCP()`:\n\n1. `mcpManager.disconnectAll()`\n2. `mcpManager.discoverAndConnect()`\n3. `session.refreshMCPTools(mcpManager.getTools())`\n\n`refreshMCPTools()` replaces all `mcp__` registry entries and immediately re-activates the latest MCP tool set, so changes take effect without restarting the session.\n\n### Mode differences\n\n- **Interactive/TUI mode**: `/mcp` gives in-app UX (wizard, OAuth flow, connection status text, immediate runtime rebinding).\n- **SDK/headless integration**: `discoverAndLoadMCPTools()` (`src/mcp/loader.ts`) returns loaded tools + per-server errors; no `/mcp` command UX.\n\n## 6) User-visible error surfaces\n\nCommon error strings users/operators see:\n\n- add/update validation failures:\n - `Invalid server config: ...`\n - `Server \"<name>\" already exists in <path>`\n- quick-add argument issues:\n - `Use either --url or -- <command...>, not both.`\n - `--token requires --url (HTTP/SSE transport).`\n- connect/test failures:\n - `Failed to connect to \"<name>\": <message>`\n - timeout help text suggests increasing timeout\n - auth help text for `401/403`\n- auth/OAuth flows:\n - `Authentication required ... OAuth endpoints could not be discovered`\n - `OAuth flow timed out. Please try again.`\n - `OAuth authentication failed: ...`\n- disabled server usage:\n - `Server \"<name>\" is disabled. Run /mcp enable <name> first.`\n\nBad source JSON in discovery is generally handled as warnings/logs; config-writer paths throw explicit errors.\n\n## 7) Practical authoring guidance\n\nFor robust MCP authoring in this codebase:\n\n1. Keep server names globally unique across all MCP-capable config sources.\n2. Prefer names that remain distinct after MCP tool-name sanitization to avoid generated `mcp__` collisions.\n3. Use explicit `type` to avoid accidental stdio defaults.\n4. Treat `enabled: false` as hard-off: server is omitted from runtime connect set.\n5. For OAuth configs, store a valid `credentialId`; otherwise auth injection is skipped.\n6. If using command-based secret resolution (`!cmd`), verify command output is stable and non-empty.\n\n## Implementation files\n\n- [`src/mcp/types.ts`](../packages/coding-agent/src/mcp/types.ts)\n- [`src/mcp/config.ts`](../packages/coding-agent/src/mcp/config.ts)\n- [`src/mcp/config-writer.ts`](../packages/coding-agent/src/mcp/config-writer.ts)\n- [`src/mcp/tool-bridge.ts`](../packages/coding-agent/src/mcp/tool-bridge.ts)\n- [`src/discovery/mcp-json.ts`](../packages/coding-agent/src/discovery/mcp-json.ts)\n- [`src/modes/controllers/mcp-command-controller.ts`](../packages/coding-agent/src/modes/controllers/mcp-command-controller.ts)\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts)\n- [`src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`src/config/resolve-config-value.ts`](../packages/coding-agent/src/config/resolve-config-value.ts)\n- [`src/mcp/loader.ts`](../packages/coding-agent/src/mcp/loader.ts)\n",
|
|
33
|
+
"memory.md": "# Autonomous Memory\n\nWhen the local memory backend is enabled, the agent automatically extracts durable knowledge from past sessions and injects a compact summary into future sessions for the same project. Over time it builds a project-scoped memory store — technical decisions, recurring workflows, pitfalls — that carries forward without manual effort.\n\nDisabled by default. Enable the local summary pipeline via `/settings` or `config.yml`:\n\n```yaml\nmemory:\n backend: local\n```\n\n## Usage\n\n### What gets injected\n\nAt session start, if a memory summary exists for the current project, it is injected into the system prompt as a **Memory Guidance** block. The agent is instructed to:\n\n- Treat memory as heuristic context — useful for process and prior decisions, not authoritative on current repo state.\n- Cite the memory artifact path when memory changes the plan, and pair it with current-repo evidence before acting.\n- Prefer repo state and user instruction when they conflict with memory; treat conflicting memory as stale.\n\n### Reading memory artifacts\n\nThe agent can read memory files directly using `memory://` URLs with the `read` tool:\n\n| URL | Content |\n| -------------------------------------- | ----------------------------------- |\n| `memory://root` | Compact summary injected at startup |\n| `memory://root/MEMORY.md` | Full long-term memory document |\n| `memory://root/skills/<name>/SKILL.md` | A generated skill playbook |\n\n### `/memory` slash command\n\n| Subcommand | Effect |\n| --------------------- | --------------------------------------------------------- |\n| `view` | Show the current backend injection payload |\n| `stats` | Show backend-specific memory statistics, when supported |\n| `diagnose` | Show backend-specific diagnostics, when supported |\n| `clear` / `reset` | Delete active backend memory data/artifacts |\n| `enqueue` / `rebuild` | Force consolidation/retention work for the active backend |\n\n## How it works\n\nLocal summary memories are built by a background pipeline that runs at startup or when manually triggered via slash command. The pipeline is skipped for subagents and for sessions that are not persisted to a session file.\n\n**Phase 1 — per-session extraction:** For each past session that has changed since it was last processed, a model reads the session history and extracts durable signal: technical decisions, constraints, resolved failures, recurring workflows. Sessions that are too recent, too old, currently active, or beyond the configured scan/age limits are skipped. Each extraction produces a raw memory block and a short synopsis for that session.\n\n**Phase 2 — consolidation:** After extraction, a second model pass reads all per-session extractions and produces three outputs written to disk:\n\n- `MEMORY.md` — a curated long-term memory document\n- `memory_summary.md` — the compact text injected at session start\n- `skills/` — reusable procedural playbooks, each in its own subdirectory\n\nPhase 2 uses a lease and heartbeat to prevent double-running when multiple processes start simultaneously. Stale skill directories from prior runs are pruned automatically.\n\nConsolidated output is redacted for common secret/token patterns before `MEMORY.md`, `memory_summary.md`, or generated skills are written to disk.\n\n### Extraction behavior\n\nMemory extraction and consolidation behavior is driven by static prompt files in `packages/coding-agent/src/prompts/memories/`.\n\n| File | Purpose | Variables |\n| --------------------- | ------------------------------------------- | ------------------------------------------- |\n| `stage_one_system.md` | System prompt for per-session extraction | — |\n| `stage_one_input.md` | User-turn template wrapping session content | `{{thread_id}}`, `{{response_items_json}}` |\n| `consolidation.md` | Prompt for cross-session consolidation | `{{raw_memories}}`, `{{rollout_summaries}}` |\n| `read_path.md` | Memory guidance injected into live sessions | `{{memory_summary}}` |\n\n### Model selection\n\nMemory piggybacks on the model role system.\n\n| Phase | Role | Purpose |\n| ----------------------- | ------------------------------------------------------------------- | -------------------------------- |\n| Phase 1 (extraction) | `default` | Per-session knowledge extraction |\n| Phase 2 (consolidation) | `smol` (falls back to `default`, then current/first registry model) | Cross-session synthesis |\n\nIf the requested memory role is not configured, memory model resolution falls back to the `default` role, then the active session model, then the first model in the registry.\n\n## Configuration\n\n| Setting | Default | Description |\n| ------------------------------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------- |\n| `memory.backend` | `off` | Select `local` for this pipeline; legacy `memories.enabled: true` is migrated to `memory.backend: local` when no explicit backend is set |\n| `memories.maxRolloutAgeDays` | `30` | Sessions older than this are not processed |\n| `memories.minRolloutIdleHours` | `12` | Sessions active more recently than this are skipped |\n| `memories.maxRolloutsPerStartup` | `64` | Cap on sessions processed in a single startup |\n| `memories.summaryInjectionTokenLimit` | `5000` | Max tokens of the summary injected into the system prompt |\n\nAdditional tuning knobs (concurrency, lease durations, token budgets) are available in config for advanced use.\n\n## Key files\n\n- `packages/coding-agent/src/memories/index.ts` — pipeline orchestration, injection, slash command handling\n- `packages/coding-agent/src/memories/storage.ts` — SQLite-backed job queue and thread registry\n- `packages/coding-agent/src/prompts/memories/` — memory prompt templates\n- `packages/coding-agent/src/internal-urls/memory-protocol.ts` — `memory://` URL handler\n",
|
|
34
|
+
"mnemosyne-memory-backend.md": "# Mnemopi memory backend\n\nOh My Pi can use `@oh-my-pi/pi-mnemopi` as a local long-term memory backend.\n\nSet:\n\n```yaml\nmemory:\n backend: mnemopi\n```\n\nExample:\n\n```yaml\nmemory:\n backend: mnemopi\nmnemopi:\n scoping: per-project-tagged\n```\n\nWith this backend enabled, the coding agent:\n\n1. Opens one or more local Mnemopi SQLite databases according to the configured bank scoping.\n2. Recalls relevant memories into a `<memories>` block for the first model turn of a session and refreshes the base prompt if recall happens from the `agent_start` listener.\n3. Retains completed conversation turns into the retain bank after agent turns, no more often than `mnemopi.retainEveryNTurns`.\n4. Adds recalled memory as extra compaction context when compaction asks the memory backend for `preCompactionContext`.\n5. Uses the normal `/memory view`, `/memory stats`, `/memory diagnose`, `/memory clear`, and `/memory enqueue` commands through the shared memory backend interface.\n\nRecalled memory is background context, not instructions. Current user messages and tool output take precedence when they conflict.\n\n## Settings\n\n| Setting | Default | Description |\n| ------------------------------- | ---------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `memory.backend` | `off` | Set to `mnemopi` to enable this backend. |\n| `mnemopi.dbPath` | agent memories dir | Optional SQLite database path. |\n| `mnemopi.bank` | project directory name | Base bank name passed to `Mnemopi`; the coding-agent wrapper scopes from this base according to `mnemopi.scoping`. |\n| `mnemopi.scoping` | `per-project` | Memory visibility mode: `global` = one shared bank, `per-project` = isolated project memory, `per-project-tagged` = project-local writes plus global recall visibility. |\n| `mnemopi.autoRecall` | `true` | Recall memory on the first turn of a session. |\n| `mnemopi.autoRetain` | `true` | Retain completed turns automatically. |\n| `mnemopi.retainEveryNTurns` | `4` | Minimum user turns between automatic retain writes. |\n| `mnemopi.recallLimit` | `8` | Maximum recalled memories in the prompt block. |\n| `mnemopi.recallContextTurns` | `3` | Prior user-bounded turns included in recall queries. |\n| `mnemopi.recallMaxQueryChars` | `4000` | Maximum composed recall query length. |\n| `mnemopi.injectionTokenLimit` | `5000` | Approximate token budget for memory prompt injection. |\n| `mnemopi.debug` | `false` | Enable debug logging for backend failures. |\n| `mnemopi.noEmbeddings` | `false` | Pass `noEmbeddings` to `Mnemopi` and force FTS-only recall. |\n| `mnemopi.embeddingModel` | env/default | Embedding model passed to `Mnemopi`. |\n| `mnemopi.embeddingApiUrl` | env/default | OpenAI-compatible embedding endpoint passed to `Mnemopi`. |\n| `mnemopi.embeddingApiKey` | env/default | Embedding API key passed to `Mnemopi`. |\n| `mnemopi.llmMode` | `smol` | `smol` uses the configured pi-ai smol model, `remote` uses the settings below, and `none` disables LLM calls. |\n| `mnemopi.llmBaseUrl` | env/default | OpenAI-compatible LLM endpoint for `llmMode: remote`. |\n| `mnemopi.llmApiKey` | env/default | LLM API key for `llmMode: remote`. |\n| `mnemopi.llmModel` | env/default | LLM model id for `llmMode: remote`. |\n\n## Scoping\n\nThe coding-agent wrapper applies scoping on top of the underlying `Mnemopi` package:\n\n- `global` uses one shared bank for recall and writes.\n- `per-project` writes to and recalls from a bank derived from the current git repository root (or cwd) plus a stable hash.\n- `per-project-tagged` writes to the project-local bank and recalls from both the project-local bank and the shared global bank, with duplicate recall results merged.\n\nThe combined project-plus-global behavior lives in the wrapper. The `@oh-my-pi/pi-mnemopi` package itself still exposes banks and constructor options directly, including `bank` for selecting a bank name. Project-local banks other than the shared bank are stored as sibling bank databases managed by Mnemopi's `BankManager`.\n\n## LLM and embeddings\n\nThe backend passes these settings to the `Mnemopi` constructor; if a setting is omitted, Mnemopi falls back to its `MNEMOPI_*` environment defaults. The backend does not download or run a local GGUF LLM. LLM-dependent paths use a configured pi-ai model, a dynamic completion function, a remote OpenAI-compatible endpoint, or deterministic no-LLM fallbacks.\n\nFTS-only:\n\n```yaml\nmemory:\n backend: mnemopi\nmnemopi:\n noEmbeddings: true\n```\n\nEquivalent constructor shape:\n\n```ts\nnew Mnemopi({ noEmbeddings: true });\n```\n\nRemote embeddings:\n\n```yaml\nmnemopi:\n embeddingModel: text-embedding-3-small\n embeddingApiUrl: https://api.openai.com/v1\n embeddingApiKey: ${OPENAI_API_KEY}\n```\n\nEquivalent constructor shape:\n\n```ts\nnew Mnemopi({\n embeddingModel: \"text-embedding-3-small\",\n embeddingApiUrl: \"https://api.openai.com/v1\",\n embeddingApiKey,\n});\n```\n\nRemote LLM:\n\n```yaml\nmnemopi:\n llmMode: remote\n llmBaseUrl: https://api.openai.com/v1\n llmApiKey: ${OPENAI_API_KEY}\n llmModel: gpt-4.1-mini\n```\n\nEquivalent constructor shapes:\n\n```ts\nnew Mnemopi({ llm: { baseUrl, apiKey, model } });\nnew Mnemopi({ llmBaseUrl: baseUrl, llmApiKey: apiKey, llmModel: model });\n```\n\nDynamic function LLM for rotating OAuth tokens:\n\n```ts\nnew Mnemopi({\n llm: async (prompt, opts) => {\n const token = await getFreshOauthToken();\n return await completeWithPiAi(prompt, {\n token,\n maxTokens: opts?.maxTokens,\n temperature: opts?.temperature,\n });\n },\n});\n```\n\npi-ai smol model LLM:\n\n```yaml\nmnemopi:\n llmMode: smol\n```\n\nThe coding agent resolves its configured smol role and passes a dynamic completion function so every Mnemopi LLM call can fetch the current provider credentials at call time:\n\n```ts\nnew Mnemopi({\n llm: async (prompt, opts) => completeSmolWithCurrentAuth(prompt, opts),\n});\n```\n\n## Operational notes\n\n- The default shared database lives under the agent memories directory in `mnemopi/mnemopi.db`; project-scoped banks use sibling database paths under that Mnemopi directory.\n- `/memory clear` removes every scoped Mnemopi SQLite database and sidecar WAL/SHM files for the active configuration.\n- `/memory enqueue` forces retention of the current session, flushes pending fact extractions, and runs Mnemopi sleep/consolidation.\n- `/memory stats` and `/memory diagnose` render backend-specific bank statistics/diagnostics when the Mnemopi backend is active.\n- Subagents do not own separate Mnemopi retain loops; they alias the parent state when a parent Mnemopi state exists, and otherwise remain inert.\n",
|
|
35
|
+
"models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects initial/smol/slow models\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — API key + OAuth resolution order\n- `packages/ai/src/models.ts` and `packages/ai/src/types.ts` — built-in providers/models and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.omp/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n <provider-id>:\n # provider-level config\nequivalence:\n overrides:\n <provider-id>/<model-id>: <canonical-model-id>\n exclude:\n - <provider-id>/<model-id>\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n`equivalence` is optional and configures canonical model grouping on top of concrete provider models:\n\n- `overrides` maps an exact concrete selector (`provider/modelId`) to an official upstream canonical id\n- `exclude` opts a concrete selector out of canonical grouping\n\n## Provider-level fields\n\n```yaml\nproviders:\n my-provider:\n baseUrl: https://api.example.com/v1\n apiKey: MY_PROVIDER_API_KEY\n api: openai-completions\n headers:\n X-Team: platform\n authHeader: true\n auth: apiKey\n disableStrictTools: false # set true for Anthropic-compatible endpoints that reject the strict field\n discovery:\n type: ollama\n modelOverrides:\n some-model-id:\n name: Renamed model\n models:\n - id: some-model-id\n name: Some Model\n api: openai-completions\n reasoning: false\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 128000\n maxTokens: 16384\n headers:\n X-Model: value\n compat:\n supportsStore: true\n supportsDeveloperRole: true\n supportsReasoningEffort: true\n maxTokensField: max_completion_tokens\n openRouterRouting:\n only: [anthropic]\n vercelGatewayRouting:\n order: [anthropic, openai]\n extraBody:\n gateway: m1-01\n controller: mlx\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `anthropic-messages`\n- `google-generative-ai`\n- `google-vertex`\n\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default), `none`, or `oauth`; for `models.yml` custom models, `oauth` is accepted by schema but does not waive the `apiKey` requirement\n- `discovery.type`: `ollama`, `llama.cpp`, `lm-studio`, `openai-models-list`, or `proxy`\n- `transport`: `pi-native` only. When set, every model under that provider is sent to an `omp auth-gateway` compatible `baseUrl` via `POST /v1/pi/stream`; `apiKey` is the gateway bearer.\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `apiKey`\n- `headers`\n- `compat`\n- `disableStrictTools`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`.\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n\n### Command-resolved secrets\n\nProvider `apiKey` values and provider/model `headers` values may start with `!` to read a secret from command stdout. The command is run with a 10 s timeout, stdout is trimmed, and empty/failing commands are omitted:\n\n```yaml\nproviders:\n openai:\n apiKey: \"!op read op://dev/openai/api-key\"\n headers:\n X-Team-Key: \"!bw get password omp-team-key\"\n```\n\nSuccessful command outputs are cached for the process lifetime so the command is not re-run for every model.\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@oh-my-pi/pi-ai`.\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`, `disableStrictTools`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n - same `provider + id` replaces existing\n - otherwise append\n6. Load cached/runtime-discovered models (Ollama, llama.cpp, LM Studio, plus built-in provider managers), then re-apply model overrides.\n\n### Provider-model cache and static fingerprint\n\nCached per-provider model lists are persisted in the model-cache SQLite\ndatabase (schema v3) with a `static_fingerprint` column that hashes the\nstatic catalog slice merged into the row. When `resolveProviderModels`\nskips the network fetch and the fingerprint of the in-memory static\ncatalog matches the cached one, the cached rows are returned verbatim —\nthe static + dynamic merge is bypassed entirely. The fingerprint is\nmemoized per process via a WeakMap keyed by the static-models array\nreference, so repeated cold-start calls do not re-hash.\n\n## Canonical model equivalence and coalescing\n\nThe registry keeps every concrete provider model and then builds a canonical layer above them.\n\nCanonical ids are official upstream ids only, for example:\n\n- `claude-opus-4-6`\n- `claude-haiku-4-5`\n- `gpt-5.3-codex`\n\n### `models.yml` equivalence config\n\nExample:\n\n```yaml\nproviders:\n zenmux:\n baseUrl: https://api.zenmux.example/v1\n apiKey: ZENMUX_API_KEY\n api: openai-codex-responses\n models:\n - id: codex\n name: Zenmux Codex\n reasoning: true\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 200000\n maxTokens: 32768\n\nequivalence:\n overrides:\n zenmux/codex: gpt-5.3-codex\n p-codex/codex: gpt-5.3-codex\n exclude:\n - demo/codex-preview\n```\n\nBuild order for canonical grouping:\n\n1. exact user override from `equivalence.overrides`\n2. bundled official-id matches from built-in model metadata\n3. conservative heuristic normalization for gateway/provider variants\n4. fallback to the concrete model's own id\n\nCurrent heuristics are intentionally narrow:\n\n- embedded upstream prefixes can be stripped when present, for example `anthropic/...` or `openai/...`\n- dotted and dashed version variants can normalize only when they map to an existing official id, for example `4.6 -> 4-6`\n- ambiguous families or versions are not merged without a bundled match or explicit override\n\n### Canonical resolution behavior\n\nWhen multiple concrete variants share a canonical id, resolution uses:\n\n1. availability and auth\n2. `config.yml` `modelProviderOrder`\n3. existing registry/provider order if `modelProviderOrder` is unset\n\nDisabled or unauthenticated providers are skipped.\n\nSession state and transcripts continue to record the concrete provider/model that actually executed the turn.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `thinking`, `input`, `cost`, `premiumMultiplier`, `contextWindow`, `maxTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`, `extraBody`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-responses`\n- base URL: `OLLAMA_BASE_URL`, or `OLLAMA_HOST`, or `http://127.0.0.1:11434`\n- context window: `OLLAMA_CONTEXT_LENGTH` if set, otherwise Ollama `/api/show` metadata, otherwise `128000`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls Ollama endpoints and normalizes discovered OpenAI-compatible models to `openai-responses`.\n\n`OLLAMA_CONTEXT_LENGTH` does not configure Ollama's runtime `num_ctx`; set that in Ollama/model configuration separately.\n\n### Implicit llama.cpp discovery\n\nIf `llama.cpp` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `llama.cpp`\n- api: `openai-responses`\n- base URL: `LLAMA_CPP_BASE_URL` or `http://127.0.0.1:8080`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls llama.cpp model endpoints and synthesizes model entries with local defaults.\n\n### Implicit LM Studio discovery\n\nIf `lm-studio` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `lm-studio`\n- api: `openai-completions`\n- base URL: `LM_STUDIO_BASE_URL` or `http://127.0.0.1:1234/v1`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery fetches models (`GET /models`) and synthesizes model entries with local defaults.\n\nThis path also works for local OpenAI-compatible servers that are not LM Studio. For example, if oMLX is bound to Ollama's usual port, set `LM_STUDIO_BASE_URL=http://127.0.0.1:11434/v1` to discover it through the existing `/v1/models` flow. Running oMLX and Ollama side by side requires assigning a different port to one of them. Do not configure oMLX as `ollama`: Ollama discovery uses native `/api/tags` and `/api/show` endpoints, not OpenAI `/v1/models`.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n ollama:\n baseUrl: http://127.0.0.1:11434\n api: openai-responses\n auth: none\n discovery:\n type: ollama\n\n llama.cpp:\n baseUrl: http://127.0.0.1:8080\n api: openai-responses\n auth: none\n discovery:\n type: llama.cpp\n```\n\n### Proxy discovery (`discovery.type: proxy`)\n\nFor Anthropic+OpenAI-compatible proxies (new-api / one-api / similar)\nthat expose both `/v1/messages` and `/v1/chat/completions` behind the same\nhost. Discovery hits `GET /v1/models` (10s timeout, OpenAI-style payload) and\nderives each model's `api` from the entry's `supported_endpoint_types`:\n\n- contains `\"anthropic\"` -> `api: anthropic-messages` (routes via `/v1/messages`)\n- contains `\"openai\"` -> `api: openai-completions` (routes via `/v1/chat/completions`)\n- otherwise -> falls back to provider-level `api` if set, else dropped\n\nProvider-level `api` is **optional** with `discovery.type: proxy` because the\nper-model wire is auto-detected. The Anthropic SDK strips a trailing `/v1`\nfrom `baseUrl` before appending `/v1/messages`, so a single discovery `baseUrl`\n(ending in `/v1`) round-trips correctly to both wires.\n\n```yaml\nproviders:\n newapi-reseller:\n baseUrl: https://api.example.com/v1\n apiKey: xxxx\n authHeader: true # injects Authorization: Bearer for openai models\n disableStrictTools: true # most anthropic-fronted proxies reject `strict`\n discovery:\n type: proxy\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n### Broker mode\n\nWhen `OMP_AUTH_BROKER_URL` (or `auth.broker.url`) is set, the local SQLite credential store is replaced by `RemoteAuthCredentialStore`. Layers 2 and 3 above (stored API key / OAuth in `agent.db`) are served from a broker-supplied snapshot whose `refresh` tokens are redacted; expiry triggers `POST /v1/credential/:id/refresh` on the broker rather than a local refresh.\n\n`AuthStorage.setConfigApiKey` lets a `models.yml` `apiKey` win over a broker-resolved OAuth token without overriding a runtime `--api-key`. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full broker / gateway design and env surface (`OMP_AUTH_BROKER_URL`, `OMP_AUTH_BROKER_TOKEN`, `auth.broker.url`, `auth.broker.token`).\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact canonical model id\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\nResolution precedence for exact selectors:\n\n1. exact `provider/modelId` bypasses coalescing\n2. exact canonical id resolves through the canonical index\n3. exact bare concrete id still works\n4. fuzzy and glob matching run after the exact paths\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default`, `smol`, `slow`, `vision`, `plan`, `designer`, `commit`, `task`\n\nRole aliases like `pi/smol` expand through `settings.modelRoles`. Each role value can also append a thinking selector such as `:minimal`, `:low`, `:medium`, or `:high`.\n\nIf a role points at another role, the target model still inherits normally and any explicit suffix on the referring role wins for that role-specific use.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `modelProviderOrder` (global canonical-provider precedence)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI Codex transport)\n\n`modelRoles` may store either:\n\n- `provider/modelId` to pin a concrete provider variant\n- a canonical id such as `gpt-5.3-codex` to allow provider coalescing\n\nFor `enabledModels` and CLI `--models`:\n\n- exact canonical ids expand to all concrete variants in that canonical group\n- explicit `provider/modelId` entries stay exact\n- globs and fuzzy matches still operate on concrete models\n\nGlobal `enabledModels` and `disabledProviders` entries may also be scoped to a path prefix:\n\n```yaml\nenabledModels:\n - claude-sonnet-4-5\n - path: ~/work\n models:\n - anthropic/claude-opus-4-5\ndisabledProviders:\n - ollama\n - path: ~/private\n providers:\n - anthropic\n```\n\nString entries apply everywhere. Scoped entries apply when the current working directory is the configured path or one of its subdirectories. Use `path`, `paths`, `pathPrefix`, or `pathPrefixes`; use `models` for `enabledModels`, `providers` for `disabledProviders`, or `values` for either.\n\n## `/model` and `--list-models`\n\nBoth surfaces keep provider-prefixed models visible and selectable.\n\nThey now also expose canonical/coalesced models:\n\n- `/model` includes a canonical view alongside provider tabs\n- `--list-models` prints a canonical section plus the concrete provider rows\n\nSelecting a canonical entry stores the canonical selector. Selecting a provider row stores the explicit `provider/modelId`.\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI Codex websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for an explicit OpenAI fallback:\n\n```yaml\nproviders:\n openai-codex:\n modelOverrides:\n gpt-5.5:\n contextPromotionTarget: openai-codex/gpt-5.4\n```\n\nThe built-in model policy currently links OpenAI `codex-spark` variants to `gpt-5.5`, and `gpt-5.5` to `gpt-5.4`, when that target exists on the same provider/API.\n\n## Compatibility and routing fields\n\nThe `compat` block on a provider or model overrides the URL-based auto-detection in `packages/ai/src/providers/openai-completions-compat.ts`. It is validated by `OpenAICompatSchema` in `packages/coding-agent/src/config/models-config-schema.ts` and consumed by every `openai-completions` transport (`packages/ai/src/providers/openai-completions.ts`). The canonical type is `OpenAICompat` in `packages/ai/src/types.ts`.\n\n`models.yml` accepts the following keys (all optional; unset falls back to URL detection):\n\nRequest shaping:\n\n- `supportsStore` — emit `store: false` on requests. Default: auto (off for non-standard endpoints).\n- `supportsDeveloperRole` — use the `developer` system role for reasoning models instead of `system`. Default: auto.\n- `supportsMultipleSystemMessages` — preserve separate leading system/developer messages instead of coalescing them. Default: auto (known OpenAI-compatible hosted APIs preserve; strict-template/local hosts coalesce).\n- `supportsUsageInStreaming` — send `stream_options: { include_usage: true }` to receive token usage on streaming responses. Default: `true`.\n- `maxTokensField` — `\"max_completion_tokens\"` or `\"max_tokens\"`. Default: auto.\n- `supportsToolChoice` — emit the `tool_choice` parameter when the caller forces a specific tool. Default: `true`. Set `false` for endpoints that 400 on `tool_choice` (e.g. DeepSeek when reasoning is on).\n- `disableReasoningOnForcedToolChoice` — drop `reasoning_effort` / OpenRouter `reasoning` whenever `tool_choice` forces a call. Default: auto (Kimi/Anthropic-fronted endpoints).\n- `disableReasoningOnToolChoice` — drop reasoning fields whenever any `tool_choice` is sent. Default: auto (DeepSeek reasoning models).\n- `extraBody` — extra top-level fields merged into every request body (gateway hints, controller selectors, etc.).\n\nReasoning / thinking:\n\n- `supportsReasoningEffort` — accept `reasoning_effort`. Default: auto (off for Grok and zAI).\n- `reasoningEffortMap` — partial map from internal effort levels (`minimal|low|medium|high|xhigh`) to provider-specific strings (e.g. DeepSeek maps `xhigh -> \"max\"`).\n- `thinkingFormat` — request shape for thinking: `\"openai\"` (`reasoning_effort`), `\"openrouter\"` (`reasoning: { effort }`), `\"zai\"` (`thinking: { type: \"enabled\" }`), `\"qwen\"` (top-level `enable_thinking`), or `\"qwen-chat-template\"` (`chat_template_kwargs.enable_thinking`). Default: `\"openai\"`.\n- `reasoningContentField` — assistant field carrying chain-of-thought: `\"reasoning_content\"`, `\"reasoning\"`, or `\"reasoning_text\"`. Default: auto.\n- `requiresReasoningContentForToolCalls` — assistant tool-call turns must round-trip the reasoning field (DeepSeek-R1, Kimi, OpenRouter when reasoning is on). Default: `false`.\n- `allowsSyntheticReasoningContentForToolCalls` — allow a placeholder reasoning field when a prior assistant tool-call turn lacks provider reasoning content. Default: `true`; set `false` for providers that validate the exact reasoning value.\n- `requiresAssistantContentForToolCalls` — assistant tool-call turns must include non-empty text content (Kimi). Default: `false`.\n\nTool / message normalization:\n\n- `requiresToolResultName` — tool-result messages need a `name` field (Mistral). Default: auto.\n- `requiresAssistantAfterToolResult` — a user message after a tool result needs an assistant turn in between. Default: auto.\n- `requiresThinkingAsText` — convert thinking blocks to text wrapped in `<thinking>` delimiters (Mistral). Default: auto.\n- `requiresMistralToolIds` — normalize tool-call ids to exactly 9 alphanumeric chars. Default: auto.\n- `supportsStrictMode` — accept the per-tool `strict` field on tool schemas. Default: conservative auto-detect per provider/baseUrl.\n- `toolStrictMode` — `\"all_strict\"` forces strict on every tool, `\"none\"` forces it off; unset keeps the existing per-tool mixed behavior.\n\nGateway routing (only applied when `baseUrl` matches the gateway):\n\n- `openRouterRouting.only` / `openRouterRouting.order` — provider routing on `openrouter.ai` (see <https://openrouter.ai/docs/provider-routing>).\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order` — provider routing on `ai-gateway.vercel.sh` (see <https://vercel.com/docs/ai-gateway/models-and-providers/provider-options>).\n\nProvider-level `compat` is the baseline; per-model `compat` is deep-merged on top, with `openRouterRouting`, `vercelGatewayRouting`, and `extraBody` merged as nested objects.\n\n### Anthropic compatibility (`anthropic-messages`)\n\nFor `anthropic-messages` models the runtime uses a separate `AnthropicCompat` shape (`packages/ai/src/types.ts`). The `models.yml` schema currently exposes only the strict-tools opt-out as a top-level provider field (see below); the remaining Anthropic-side knobs (`disableAdaptiveThinking`, `supportsEagerToolInputStreaming`, `supportsLongCacheRetention`, `supportsMidConversationSystem`) are set by built-in catalog metadata and are not user-configurable from `models.yml`.\n\n### Strict tool schemas (`disableStrictTools`)\n\nAnthropic's API supports a `strict` field on tool definitions that forces the model to always follow the provided schema exactly. OMP enables it by default for a small allowlist of high-frequency built-in `anthropic-messages` tools (`bash`, `python`, `edit`, and `find`) whose schemas fit Anthropic's strict grammar limits; other tools still send normalized schemas but omit `strict`.\n\nThird-party providers that front the Anthropic API (AWS Bedrock, Azure, self-hosted proxies) do not always implement this field and will reject requests that include it. Set `disableStrictTools: true` at the provider level to opt out of strict mode for the allowlisted tools:\n\n```yaml\nproviders:\n bedrock-anthropic:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com/anthropic\n apiKey: AWS_BEARER_TOKEN\n api: anthropic-messages\n disableStrictTools: true\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Bedrock)\n input: [text, image]\n contextWindow: 200000\n maxTokens: 16384\n cost:\n input: 3.00\n output: 15.00\n cacheRead: 0.30\n cacheWrite: 3.75\n```\n\n`disableStrictTools` is a provider-level flag that applies to all models in the provider. It disables the Anthropic `strict` marker only for tools that OMP would otherwise mark strict; it does not change runtime tool argument validation. OMP can automatically retry without strict tools after Anthropic reports a strict-grammar-too-large error before the first streamed token, but proxies that reject the `strict` field for other reasons should set this flag explicitly.\n\nTool schemas going on the wire are normalized by the unified flow in\n`packages/ai/src/utils/schema/normalize.ts` (Google/CCA/MCP dispatchers\nplus the OpenAI strict-mode sanitize+enforce pipeline). See\n[`ai-schema-normalize.md`](./ai-schema-normalize.md) for the strict-mode\nedge cases (local `$ref` inlining, single-item `allOf` collapse,\n`anyOf`-wrapper description hoist, enum/const primitive-type inference)\nand the per-provider dispatcher mapping.\n\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n local-openai:\n baseUrl: http://127.0.0.1:8000/v1\n auth: none\n api: openai-completions\n models:\n - id: Qwen/Qwen2.5-Coder-32B-Instruct\n name: Qwen 2.5 Coder 32B (local)\n```\n\nFor oMLX or another local OpenAI-compatible server with a discoverable `/v1/models` endpoint, prefer discovery instead of listing models by hand. Set `api` to the endpoint family your server actually exposes: `openai-completions` uses `/v1/chat/completions`; servers that expose `/v1/responses` need `openai-responses` instead.\n\n```yaml\nproviders:\n omlx:\n baseUrl: http://127.0.0.1:11434/v1\n auth: none\n api: openai-completions\n discovery:\n type: openai-models-list\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n anthropic-proxy:\n baseUrl: https://proxy.example.com/anthropic\n apiKey: ANTHROPIC_PROXY_API_KEY\n api: anthropic-messages\n authHeader: true\n disableStrictTools: true # if the proxy doesn't support strict tool schemas\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Proxy)\n reasoning: true\n input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n openrouter:\n baseUrl: https://my-proxy.example.com/v1\n headers:\n X-Team: platform\n modelOverrides:\n anthropic/claude-sonnet-4:\n name: Sonnet 4 (Corp)\n compat:\n openRouterRouting:\n only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`. Explicit `.json` / `.jsonc` paths remain supported only when passed programmatically to `ModelRegistry`; the default user config is `~/.omp/agent/models.yml`.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
|
|
36
|
+
"natives-addon-loader-runtime.md": "# Natives Addon Loader Runtime\n\nThis document covers the runtime loader shipped by `@oh-my-pi/pi-natives`: how `native/index.js` decides which `.node` file to require, how compiled-binary embedded payloads are extracted, and what startup failures report.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/native/embedded-addon.js`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/package.json`\n\n## Scope and responsibility\n\nThe loader is intentionally narrow:\n\n- Build a platform/CPU-aware candidate list for addon filenames and directories.\n- Treat an embedded-addon manifest as a compiled-binary signal when present.\n- Optionally materialize embedded addon archive contents into a versioned per-user cache directory.\n- On Windows `node_modules` installs, stage addon files into the versioned cache to avoid locked-DLL update failures.\n- Attempt candidates in deterministic order and return the first addon that `require(...)` loads and validates.\n\nFor install and compiled-binary paths, the loader verifies a release sentinel export named from `package.json#version` (for example `__piNativesV15_7_2`). Workspace-dev loads skip this validation so a local checkout can rebuild after a pull. The loader does not validate the full export surface; stale same-version or incomplete binaries still surface as missing members or native errors at use sites.\n\n## Runtime inputs and derived state\n\nAt module initialization, `native/index.js` computes:\n\n- **Platform tag**: `${process.platform}-${process.arch}` (for example `darwin-arm64`).\n- **Package version**: from `packages/natives/package.json`.\n- **Core directories**:\n - `leafPackageDir`: directory of the platform leaf package, resolved via `require.resolve(\"@oh-my-pi/pi-natives-<tag>/package.json\")`; `null` when no leaf is installed (e.g. local dev).\n - `nativeDir`: package-local `packages/natives/native`.\n - `execDir`: directory containing `process.execPath`.\n - `versionedDir`: `<getNativesDir()>/<packageVersion>`.\n - `userDataDir` fallback:\n - Windows: `%LOCALAPPDATA%/omp` or `%USERPROFILE%/AppData/Local/omp`.\n - Non-Windows: `~/.local/bin`.\n- **Natives cache root** (`getNativesDir()`):\n - if `$XDG_DATA_HOME/omp` exists, `$XDG_DATA_HOME/omp/natives`;\n - otherwise `~/.omp/natives`.\n- **Compiled-binary mode** (`detectCompiledBinary`): true if any of:\n - embedded-addon manifest is non-null,\n - `PI_COMPILED` env var is set,\n - `import.meta.url` contains Bun embedded markers (`$bunfs`, `~BUN`, `%7EBUN`).\n- **Windows staging mode** (`shouldStageNodeModulesAddon`): true only on Windows, in non-compiled mode, when `nativeDir` is inside `node_modules`.\n- **Variant override**: `PI_NATIVE_VARIANT` (`modern`/`baseline` only; invalid values ignored).\n- **Selected variant**: explicit override, otherwise runtime AVX2 detection on x64 (`modern` if AVX2, else `baseline`).\n\n## Platform support and tag resolution\n\n`SUPPORTED_PLATFORMS` is fixed to:\n\n- `linux-x64`\n- `linux-arm64`\n- `darwin-x64`\n- `darwin-arm64`\n- `win32-x64`\n\nUnsupported platforms are not rejected before probing. The loader first tries the computed candidate paths. If all fail and `platformTag` is unsupported, it throws an unsupported-platform error listing supported tags.\n\n## Variant selection (`modern` / `baseline` / default)\n\n### x64 behavior\n\n1. `PI_NATIVE_VARIANT=modern|baseline` wins when valid.\n2. Otherwise AVX2 support is detected:\n - Linux: scan `/proc/cpuinfo` for `avx2`.\n - macOS: `sysctl -n machdep.cpu.leaf7_features`, then `machdep.cpu.features`.\n - Windows: PowerShell `[System.Runtime.Intrinsics.X86.Avx2]::IsSupported`.\n3. AVX2 selects `modern`; unavailable or undetectable AVX2 selects `baseline`.\n\n### Non-x64 behavior\n\nNo variant suffix is used; the filename is `pi_natives.<platform>-<arch>.node`.\n\n### Filename construction\n\n`loader-state.js#getAddonFilenames` returns:\n\n- Non-x64 or no variant: `pi_natives.<tag>.node`\n- x64 + `modern`:\n 1. `pi_natives.<tag>-modern.node`\n 2. `pi_natives.<tag>-baseline.node`\n 3. `pi_natives.<tag>.node`\n- x64 + `baseline`:\n 1. `pi_natives.<tag>-baseline.node`\n 2. `pi_natives.<tag>.node`\n\nThe default unsuffixed fallback remains part of the x64 candidate list.\n\n## Candidate path construction and fallback ordering\n\n`resolveLoaderCandidates(...)` expands every filename across directories, then de-duplicates while preserving first occurrence order.\n\n### Non-compiled runtime\n\nFor each filename, candidates are, in order:\n\n1. `<leafPackageDir>/<filename>` (omitted when `leafPackageDir` is `null`)\n2. `<nativeDir>/<filename>`\n3. `<execDir>/<filename>`\n\nThe leaf package dir comes first so the optional-dependency binary published with the release is preferred over any `.node` left in the core package's `native/` (e.g. a stale local-dev build).\n\nOn Windows installs where `nativeDir` is inside a `node_modules` segment (`shouldStageNodeModulesAddon`), `<versionedDir>/<filename>` staging candidates are prepended ahead of the leaf candidates so a locked `node_modules` binary can be sidestepped during `bun install -g` updates. The staged file is copied from `leafPackageDir ?? nativeDir` before probing.\n\n### Compiled runtime\n\nFor each filename, candidates are:\n\n1. `<versionedDir>/<filename>`\n2. `<userDataDir>/<filename>`\n3. `<nativeDir>/<filename>`\n4. `<execDir>/<filename>`\n\nAt load time, an extracted embedded candidate, or a staged Windows candidate when no embedded candidate exists, is prepended ahead of these de-duplicated candidates.\n\n## Embedded addon extraction lifecycle\n\n`embedded-addon.js` is generated by `scripts/embed-native.ts`. The reset stub exports `embeddedAddon = null`. A populated manifest has:\n\n- `platformTag`\n- `version`\n- `archive`: `{ format: \"tar.gz\", filename, filePath }`\n- `files[]` entries with `variant`, `filename`, and `size`\n\nExtraction (`maybeExtractEmbeddedAddon`) runs only when:\n\n1. compiled-binary mode is true,\n2. `embeddedAddon` is non-null,\n3. manifest `platformTag` equals the runtime platform tag,\n4. manifest `version` equals the package version,\n5. a variant-appropriate embedded file exists.\n\nVariant file selection:\n\n- Non-x64: prefer `default`, then first available file.\n- x64 + `modern`: prefer `modern`, fallback to `baseline`.\n- x64 + `baseline`: require `baseline`.\n\nMaterialization:\n\n1. Ensure `<versionedDir>` exists.\n2. Select `<versionedDir>/<selected filename>`.\n3. If the current cached file exists and its size matches manifest metadata, reuse it.\n4. Otherwise extract `embeddedAddon.archive.filePath` into `<versionedDir>` using the manifest `files[]` allowlist.\n5. Verify the selected target by size and return it as the first candidate.\n\nArchive, directory, or write failures are appended to the loader error list; probing continues through normal candidates.\n\n## Lifecycle and state transitions\n\n```text\nInit\n -> Load package metadata and embedded-addon manifest\n -> Compute platform/version/variant/filenames/candidate paths\n -> (compiled + embedded manifest matches?)\n yes -> extract archive to versionedDir when needed (record errors, continue)\n no -> skip extraction\n -> (Windows non-compiled node_modules install and no embedded candidate?)\n yes -> stage leaf/core addon to versionedDir (record errors, continue)\n no -> skip staging\n -> For each runtime candidate in order:\n require(candidate)\n -> sentinel validation passes or is workspace-dev: return addon exports (READY)\n -> failure: record error, continue\n -> none loaded:\n if unsupported platform tag -> throw Unsupported platform\n else -> throw Failed to load (tried-path diagnostics + hints)\n```\n\n## Failure behavior and diagnostics\n\n### Unsupported platform\n\nIf all candidates fail and `platformTag` is not supported, the loader throws:\n\n- `Unsupported platform: <tag>`\n- supported platform list\n- issue-reporting guidance\n\n### No loadable candidate\n\nIf the platform is supported but no candidate can be loaded, the final error includes:\n\n- `Failed to load pi_natives native addon for <platformTag>` or `<platformTag> (<variant>)`\n- every attempted path with the corresponding `require(...)` or sentinel-validation error\n- mode-specific remediation hints\n\n### Compiled-binary startup failures\n\nCompiled mode diagnostics include:\n\n- expected versioned cache target paths (`<versionedDir>/<filename>`),\n- remediation to delete the versioned cache and rerun,\n- direct release download `curl` commands for each expected filename.\n- release sentinel mismatch details when a loadable `.node` belongs to another `@oh-my-pi/pi-natives` version.\n\n### Non-compiled startup failures\n\nNormal package/runtime diagnostics include:\n\n- reinstall hint (`bun install @oh-my-pi/pi-natives`),\n- local rebuild command (`bun --cwd=packages/natives run build`),\n- optional x64 variant build hint (`TARGET_VARIANT=baseline|modern bun --cwd=packages/natives run build`).\n",
|
|
37
|
+
"natives-architecture.md": "# Natives Architecture\n\n`@oh-my-pi/pi-natives` is a two-layer package around an ESM loader:\n\n1. **ESM loader/package entrypoint** resolves and loads the correct `.node` addon with `createRequire`, validates the release sentinel outside workspace-dev loads, and re-exports generated classes/functions plus enum runtime objects as explicit named ESM exports.\n2. **Rust N-API module layer** implements the exported functions/classes and emits the generated TypeScript declarations.\n\nThis document is the foundation for deeper module-level docs.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/index.d.ts`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/native/embedded-addon.js`\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/scripts/gen-enums.ts`\n- `packages/natives/package.json`\n- `crates/pi-natives/src/lib.rs`\n\n## Package entrypoint and public surface\n\n`packages/natives/package.json` points at generated native artifacts:\n\n- `main`: `./native/index.js`\n- `types`: `./native/index.d.ts`\n- `exports[\".\"].types`: `./native/index.d.ts`\n- `exports[\".\"].import`: `./native/index.js`\n\nThere is no current `packages/natives/src` TypeScript wrapper layer. Consumers import functions/classes/enums directly from `@oh-my-pi/pi-natives`; the type contract is the generated `native/index.d.ts` plus the explicit named exports generated into `native/index.js` by `scripts/gen-enums.ts`.\n\nCurrent capability groups in the generated API include:\n\n- **Search/text/code primitives**: `grep`, `search`, `hasMatch`, `fuzzyFind`, `glob`, `astGrep`, `astEdit`, `blockRangeAt`, `summarizeCode`, text width/slicing/wrapping/sanitization, syntax highlighting, token counting.\n- **Execution/process/terminal primitives**: `executeShell`, `Shell`, `PtySession`, `Process`, key parsing, bash fixups.\n- **System/media/isolation/conversion primitives**: clipboard, SIXEL encoding, HTML-to-Markdown, macOS appearance/power helpers, work profiling, workspace scanning, isolation backend helpers (`iso*`).\n\n## Loader layer\n\n`packages/natives/native/index.js` owns runtime addon selection and optional embedded extraction.\n\n### Candidate resolution model\n\n- Platform tag is `${process.platform}-${process.arch}`.\n- Supported tags are currently:\n - `linux-x64`\n - `linux-arm64`\n - `darwin-x64`\n - `darwin-arm64`\n - `win32-x64`\n- x64 can use CPU variants:\n - `modern` (AVX2-capable)\n - `baseline` (fallback)\n- Non-x64 uses the default filename without a variant suffix.\n\nFilename strategy:\n\n- Default: `pi_natives.<platform>-<arch>.node`\n- x64 variant: `pi_natives.<platform>-<arch>-modern.node` or `...-baseline.node`\n- x64 runtime fallback includes the unsuffixed default filename after variant candidates.\n\n### Platform-specific variant detection\n\nFor x64, variant selection uses:\n\n- Linux: `/proc/cpuinfo`\n- macOS: `sysctl -n machdep.cpu.leaf7_features`, then `machdep.cpu.features`\n- Windows: PowerShell check for `System.Runtime.Intrinsics.X86.Avx2`\n\n`PI_NATIVE_VARIANT` can force `modern` or `baseline`; invalid values are ignored.\n\n### Binary distribution and extraction model\n\nThe published `@oh-my-pi/pi-natives` package ships **only** the loader layer in `native/`: the ESM loader (`index.js`), generated declarations (`index.d.ts`), the `loader-state.js`/`.d.ts` helpers, and the embedded-addon manifest stub (`embedded-addon.js`). It carries no `.node` binaries.\n\nEach platform's prebuilt `.node` is published as a separate optional-dependency leaf package — `@oh-my-pi/pi-natives-<platform>-<arch>`, one per supported tag — which the core lists in `optionalDependencies` at the lockstep version during publish. npm/bun install only the leaf whose `os`/`cpu` match the host. The working-tree package keeps built `.node` files under `native/` for local dev; the release-publish rewrite (`prepareNativeCorePackage` in `scripts/ci-release-publish.ts`) strips them from the core tarball, and the leaves are generated by `packages/natives/scripts/gen-npm-packages.ts` (`LEAF_TARGETS`). Adding a build target therefore requires a matching `LEAF_TARGETS` entry, or the binary never reaches npm users.\n\nFor compiled binaries, loader behavior is:\n\n1. Check versioned user cache path: `<getNativesDir()>/<packageVersion>/...`.\n2. Check legacy compiled-binary location:\n - Windows: `%LOCALAPPDATA%/omp` (fallback `%USERPROFILE%/AppData/Local/omp`)\n - non-Windows: `~/.local/bin`\n3. Fall back to packaged `native/` and executable directory candidates.\n\n`getNativesDir()` uses `$XDG_DATA_HOME/omp/natives` when `$XDG_DATA_HOME/omp` exists; otherwise it uses `~/.omp/natives`.\n\nIf a populated embedded addon manifest is present, it is also treated as a compiled-binary signal. Current embedded manifests point at a gzip-compressed tar archive (`embedded-addons.<tag>.tar.gz`) that contains one or more matching `.node` files. The loader extracts the archive into the versioned cache directory, validates the selected file by size, and prepends that cache path before normal candidate probing.\n\nFor npm/bun installs (non-compiled), `loader-state.js` resolves the platform leaf directory via `require.resolve(\"@oh-my-pi/pi-natives-<tag>/package.json\")` and probes its `.node` **before** the core package's `native/` directory and the executable directory. The optional-dependency binary is therefore preferred over any `.node` left in the core (e.g. a stale local-dev build). On Windows `node_modules` installs, the loader first stages the selected leaf/core addon into `<getNativesDir()>/<packageVersion>/...` and prepends that staged path so running processes do not lock the `node_modules` copy during global updates.\n\n### Failure modes\n\nLoader failures are explicit:\n\n- **Unsupported platform tag**: after failed probing, throws with supported platform list.\n- **No loadable candidate**: throws with all attempted paths and remediation hints.\n- **Embedded/staging errors**: directory/write/archive/staging failures are recorded and included in final load diagnostics if no candidate loads.\n- **Release mismatch**: outside workspace-dev loads, a candidate that loads but lacks the version sentinel export for `package.json#version` is rejected with a reinstall hint.\n\n## Rust N-API module layer\n\n`crates/pi-natives/src/lib.rs` declares exported module ownership:\n\n- `appearance`\n- `ast`\n- `block`\n- `clipboard`\n- `fd`\n- `fs_cache`\n- `glob`\n- `glob_util`\n- `grep`\n- `highlight`\n- `html`\n- `iso`\n- `keys`\n- `language` (re-exported from `pi_ast`)\n- `power`\n- `prof`\n- `ps`\n- `pty`\n- `shell`\n- `sixel`\n- `summary`\n- `task`\n- `text`\n- `tokens`\n- `utils` (crate-private helpers)\n- `workspace`\n\nN-API exports are generated from Rust `#[napi]` functions/classes/objects/enums. Snake_case Rust names are exposed as camelCase JavaScript names unless explicitly configured by napi-rs.\n\n## Ownership boundaries\n\n- **Loader/package ownership (`packages/natives/native`, `packages/natives/scripts`)**\n - runtime binary selection\n - CPU variant selection and override handling\n - compiled-binary embedded archive extraction\n - Windows `node_modules` addon staging\n - generated TypeScript declarations and explicit ESM export/enum patching\n- **Rust ownership (`crates/pi-natives/src`)**\n - algorithmic and system-level implementation\n - platform-native behavior and performance-sensitive logic\n - N-API symbol implementation consumed directly by package callers\n- **Consumer ownership (`packages/coding-agent`, `packages/tui`)**\n - user-facing policy and fallbacks that are not built into the native API\n - higher-level rendering, artifact, shell-session, and command behavior\n\n## Runtime flow (high level)\n\n1. Consumer imports from `@oh-my-pi/pi-natives`.\n2. `native/index.js` computes platform/arch/variant and candidate paths.\n3. Optional embedded archive extraction or Windows `node_modules` staging can prepend a versioned-cache candidate.\n4. Each candidate is `require(...)`d; install/compiled loads must expose the package-version sentinel.\n5. The loaded addon object is bound to explicit named ESM exports, including generated enum objects.\n6. Caller invokes generated N-API functions/classes directly.\n\n## Glossary\n\n- **Native addon**: A `.node` binary loaded via Node-API (N-API).\n- **Platform tag**: Runtime tuple `platform-arch` (for example `darwin-arm64`).\n- **Platform leaf package**: Per-platform npm package `@oh-my-pi/pi-natives-<tag>` that carries one platform's prebuilt `.node`. The core depends on every leaf via `optionalDependencies`; the package manager installs only the host-matching one (`os`/`cpu`).\n- **Variant**: x64 CPU-specific build flavor (`modern` AVX2, `baseline` fallback).\n- **Generated binding declaration**: `native/index.d.ts` emitted by napi-rs during `build-native.ts`.\n- **Version sentinel**: Rust export named from the package version (for example `__piNativesV15_7_2`) that lets the loader reject a `.node` from a different release.\n- **Compiled binary mode**: Runtime mode where the CLI is bundled and native addons are resolved from embedded/cache paths before package-local paths.\n- **Embedded addon**: Build artifact metadata and archive reference generated into `native/embedded-addon.js` so compiled binaries can extract matching `.node` payloads.\n",
|
|
38
|
+
"natives-binding-contract.md": "# Natives Binding Contract (JavaScript/TypeScript Side)\n\nThis document defines the JS/TS contract between `@oh-my-pi/pi-natives` callers and the loaded N-API addon.\n\nCurrent package shape is direct-to-native: there is no `packages/natives/src/<module>` TypeScript wrapper layer. The public API is the generated `packages/natives/native/index.d.ts` declaration file, the ESM loader/export wrapper in `packages/natives/native/index.js`, and the Rust `#[napi]` exports in `crates/pi-natives/src`.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/index.d.ts`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/gen-enums.ts`\n- `packages/natives/package.json`\n- `crates/pi-natives/src/lib.rs`\n- Rust modules under `crates/pi-natives/src/*.rs`\n\n## Contract model\n\nThe contract has three parts:\n\n1. **ESM runtime loader/export wrapper** (`native/index.js`)\n - calls `loadNative()` from `loader-state.js`, which `require(...)`s the `.node` addon;\n - binds generated classes/functions as explicit named ESM exports;\n - emits enum runtime objects generated by `scripts/gen-enums.ts`.\n2. **Generated TypeScript declarations** (`native/index.d.ts`)\n - generated by napi-rs during `scripts/build-native.ts`;\n - declares exported functions, classes, object interfaces, and native enums;\n - is the package `types` entry.\n3. **Rust N-API exports** (`crates/pi-natives/src`)\n - `#[napi]` functions/classes/objects/enums are the source of generated declarations and runtime symbols;\n - snake_case Rust names become camelCase JavaScript names by napi-rs convention.\n\nThere is no current `NativeBindings` declaration-merging lifecycle and no full required-export list in the loader. Install/compiled loads do validate the package-version sentinel export; workspace-dev loads skip that check.\n\n## Public export surface organization\n\n`packages/natives/package.json` exposes the package root only:\n\n```json\n{\n \"main\": \"./native/index.js\",\n \"types\": \"./native/index.d.ts\",\n \"exports\": {\n \".\": {\n \"types\": \"./native/index.d.ts\",\n \"import\": \"./native/index.js\"\n }\n }\n}\n```\n\nConsumers in `packages/coding-agent` and `packages/tui` import directly from `@oh-my-pi/pi-natives`.\n\n## JS API ↔ native export mapping (representative)\n\n| Category | Public JS API | Rust source | Return style |\n| ----------------- | --------------------------------------------------------------------------------------------------------- | ------------------------------------------------ | -------------------------- |\n| Grep | `grep(options, onMatch?)` | `grep.rs` | `Promise<GrepResult>` |\n| Grep | `search(content, options)` | `grep.rs` | `SearchResult` |\n| Grep | `hasMatch(content, pattern, ignoreCase?, multiline?)` | `grep.rs` | `boolean` |\n| Fuzzy path search | `fuzzyFind(options)` | `fd.rs` | `Promise<FuzzyFindResult>` |\n| Glob/workspace | `glob(options, onMatch?)`, `listWorkspace(options)` | `glob.rs`, `workspace.rs` | `Promise<...>` |\n| Glob cache | `invalidateFsScanCache(path?)` | `fs_cache.rs` | `void` |\n| AST/block/summary | `astGrep(options)`, `astEdit(options)`, `blockRangeAt(options)`, `summarizeCode(options)` | `ast.rs`, `block.rs`, `summary.rs` | mixed |\n| Shell | `executeShell(options, onChunk?)` | `shell.rs` | `Promise<ShellRunResult>` |\n| Shell | `new Shell(options?)`, `shell.run(...)`, `shell.abort()` | `shell.rs` | class / promises |\n| PTY | `new PtySession()`, `start/write/resize/kill` | `pty.rs` | class / promises |\n| Process | `Process.fromPid/fromPath`, `status/children/killTree/terminate/waitForExit` | `ps.rs` | class / mixed |\n| Keys | `parseKey`, `matchesKey`, Kitty/legacy helpers | `keys.rs` | sync |\n| Text | `wrapTextWithAnsi`, `truncateToWidth`, `sliceWithWidth`, `extractSegments`, `visibleWidth` | `text.rs` | sync |\n| Highlight | `highlightCode`, `supportsLanguage`, `getSupportedLanguages` | `highlight.rs` | sync |\n| HTML | `htmlToMarkdown(html, options?)` | `html.rs` | `Promise<string>` |\n| SIXEL | `encodeSixel` | `sixel.rs` | sync |\n| Clipboard | `copyToClipboard`, `readImageFromClipboard` | `clipboard.rs` | sync / promise |\n| Tokens | `countTokens(input, encoding?)` | `tokens.rs` | sync |\n| System/isolation | `detectMacOSAppearance`, `MacAppearanceObserver`, `MacOSPowerAssertion`, `getWorkProfile`, `iso*` helpers | `appearance.rs`, `power.rs`, `prof.rs`, `iso.rs` | mixed |\n\n## Sync vs async contract differences\n\nThe contract preserves Rust/N-API call style:\n\n- **Promise-returning exports** for worker-thread or async runtime work (`grep`, `glob`, `fuzzyFind`, `astGrep`, `astEdit`, `htmlToMarkdown`, shell/PTY runs, `isoStart`/`isoStop`/`isoDiff`, clipboard image read, workspace scan).\n- **Synchronous exports** for deterministic in-memory transforms/parsers or direct system calls (`search`, `hasMatch`, highlighting, text utilities, token counting, process construction/status, `copyToClipboard`, `encodeSixel`, isolation probe/resolve helpers).\n- **Constructor exports** for stateful runtime objects (`Shell`, `PtySession`, `Process`, macOS observer/power handles).\n\nChanging sync ↔ async for an existing export is a breaking public API change because consumers call these exports directly.\n\n## Object and enum typing patterns\n\n### Object patterns\n\n`#[napi(object)]` Rust structs become TS interfaces, for example:\n\n- `GrepResult`, `SearchResult`, `GlobResult`, `FuzzyFindResult`\n- `ShellRunResult`, `ShellExecuteResult`, `PtyRunResult`, `MinimizerResult`\n- `AstFindResult`, `AstReplaceResult`, `BlockRange`, `SummaryResult`\n- `System`/media/isolation payloads such as `ClipboardImage`, `WorkProfile`, `ParsedKittyResult`, `IsoResolveResult`\n\nRuntime shape correctness is owned by napi-rs and the Rust implementation.\n\n### Enum patterns\n\nNative enums are represented in generated declarations and also emitted as runtime objects by `scripts/gen-enums.ts`, because napi-rs string enums are TS-only without explicit JS exports. Current enum objects include:\n\n- `AstMatchStrictness`\n- `Ellipsis`\n- `Encoding`\n- `FileType`\n- `GrepOutputMode`\n- `IsoBackendKind`\n- `IsoChangeKind`\n- `KeyEventType`\n- `MacOSAppearance`\n- `ProcessStatus`\n\n## Error behavior and caveats\n\n- Addon load failure or unsupported platform throws during package import from `native/index.js`.\n- The loader rejects install/compiled candidates that lack the package-version sentinel export. It does not verify the full export set after `require(...)`; stale same-version or incomplete binaries surface as native load errors or missing members at use sites.\n- N-API conversion validates basic argument conversion, but TS optional fields do not guarantee semantic validity for untyped callers.\n- Numeric enum declarations do not prevent out-of-range numeric values from untyped callers unless the Rust function rejects them during conversion.\n- Callback exports use napi-rs `ThreadsafeFunction` shape: `(error: Error | null, value) => void`. Native code generally emits successful values; hard failures reject/throw through the owning call.\n\n## Maintainer checklist for binding changes\n\nWhen adding/changing an export, update all of:\n\n1. Rust `#[napi]` implementation in the owning `crates/pi-natives/src/<module>.rs`.\n2. `crates/pi-natives/src/lib.rs` if a new module is added.\n3. Any consumer imports/callsites in `packages/coding-agent` or `packages/tui`.\n4. Build output by running the natives build so `native/index.d.ts` and `native/index.js` stay in sync.\n5. `scripts/gen-enums.ts` if enum runtime export patching needs to change.\n\nDo not add a parallel TS wrapper convention unless the package design intentionally moves back to wrappers; current consumers depend on the direct generated API.\n",
|
|
39
|
+
"natives-build-release-debugging.md": "# Natives Build, Release, and Debugging Runbook\n\nThis runbook describes how `@oh-my-pi/pi-natives` produces `.node` addons, generated declarations, and compiled-binary embedded payloads, and how to debug loader/build failures.\n\nIt follows the architecture terms from `docs/natives-architecture.md`:\n\n- **build-time artifact production** (`scripts/build-native.ts`)\n- **embedded addon manifest generation** (`scripts/embed-native.ts`)\n- **runtime addon loading** (`native/index.js`, `native/loader-state.js`)\n\n## Implementation files\n\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/scripts/gen-enums.ts`\n- `packages/natives/package.json`\n- `packages/natives/native/index.js`\n- `packages/natives/native/loader-state.js`\n- `crates/pi-natives/Cargo.toml`\n\n## Build pipeline overview\n\n### 1) Build entrypoints\n\n`packages/natives/package.json` scripts:\n\n- `bun scripts/build-native.ts` (`build`) → N-API build, addon install, generated declarations install, explicit ESM export and enum runtime patch.\n- `bun scripts/embed-native.ts` (`embed:native`) → generate `native/embedded-addon.js` plus `native/embedded-addons.<tag>.tar.gz` from built files.\n\nRoot scripts include `build:native` as `bun --cwd=packages/natives run build`.\n\n### 2) N-API/Rust artifact build\n\n`build-native.ts` invokes the `@napi-rs/cli` binary directly from `node_modules/.bin` with:\n\n- `napi build`\n- `--manifest-path crates/pi-natives/Cargo.toml`\n- `--package-json-path packages/natives/package.json`\n- `--platform`\n- `--no-js`\n- `--dts index.d.ts`\n- `--profile local` for non-CI local native builds, otherwise `--profile ci`\n- optional `--target <CROSS_TARGET>`\n\n`crates/pi-natives/Cargo.toml` declares `crate-type = [\"cdylib\"]`; napi-rs emits `.node` artifacts plus generated `index.d.ts` in an isolated temporary output directory under `packages/natives/native/.build/`.\n\n### 3) Artifact install\n\nAfter napi-rs succeeds, `build-native.ts`:\n\n1. resolves the built addon in the isolated output directory;\n2. normalizes its name to `pi_natives.<platform>-<arch>(-variant).node` when needed;\n3. installs the addon into `packages/natives/native/` with temp-file + rename semantics;\n4. copies generated `index.d.ts` into `packages/natives/native/`;\n5. runs `generateEnumExports()` to render explicit named ESM exports for classes/functions and runtime enum objects in the checked-in `native/index.js`.\n\nWindows locked-DLL update failures are handled at runtime by staging install candidates into the versioned native cache; install/rename failures during local builds still include explicit file-operation diagnostics.\n\n## Target/variant model and naming conventions\n\n## Platform tag\n\nBoth build and runtime use platform tag:\n\n`<platform>-<arch>` (example: `darwin-arm64`, `linux-x64`).\n\n## Variant model (x64 only)\n\nx64 supports CPU variants:\n\n- `modern` (AVX2-capable path)\n- `baseline` (fallback)\n\nNon-x64 uses a single default artifact with no variant suffix.\n\n### Output filenames\n\n- x64: `pi_natives.<platform>-<arch>-modern.node` or `...-baseline.node`\n- non-x64: `pi_natives.<platform>-<arch>.node`\n\nRuntime x64 candidate order also includes the unsuffixed default filename after the selected variant candidates.\n\n## Environment flags and build options\n\n## Runtime flags\n\n- `PI_NATIVE_VARIANT`: x64 runtime override; valid values are `modern` and `baseline`.\n- `PI_COMPILED`: legacy compiled-mode signal. A populated embedded-addon manifest is also a compiled-mode signal; compiled release builds additionally define `process.env.PI_COMPILED=\"true\"` during `bun build --compile`.\n\n## Build-time flags/options\n\n- `CROSS_TARGET`: passed to napi-rs as `--target <CROSS_TARGET>`.\n- `TARGET_PLATFORM`: override output platform tag naming.\n- `TARGET_ARCH`: override output arch naming.\n- `TARGET_VARIANT` (x64 only): force `modern` or `baseline` for output filename and RUSTFLAGS policy.\n- `CARGO_TARGET_DIR`: respected if set; otherwise the default `target/` dir is used so `Swatinem/rust-cache` can cache cleanly.\n- `RUSTFLAGS`:\n - if unset and not cross-compiling, script sets:\n - modern: `-C target-cpu=x86-64-v3`\n - baseline: `-C target-cpu=x86-64-v2`\n - non-x64 / no variant: `-C target-cpu=native`\n - if already set, script does not override.\n\n## Build state/lifecycle transitions\n\n### Build lifecycle (`build-native.ts`)\n\n1. **Init**: parse env, resolve target tuple, cross/local mode, profile label.\n2. **Variant resolve**:\n - non-x64 → no variant;\n - x64 + `TARGET_VARIANT` → explicit variant;\n - x64 cross-build without `TARGET_VARIANT` → hard error;\n - x64 local build without override → detect host AVX2.\n3. **CPU policy**: set `RUSTFLAGS` for the resolved variant unless the caller already provided one.\n4. **Compile**: run napi-rs against `crates/pi-natives` into an isolated output directory.\n5. **Locate artifact**: accept the canonical filename or a single napi-rs-generated `pi_natives.<platform>-<arch>*.node` candidate.\n6. **Install**: copy/rename addon into `packages/natives/native`.\n7. **Install generated declarations**: copy `index.d.ts`.\n8. **Patch exports/enums**: regenerate explicit ESM exports and enum runtime objects.\n9. **Cleanup**: remove the temporary build output directory.\n\nFailure exits have explicit error text for invalid variants, failed napi build, missing/multiple output artifacts, generated binding install failure, stripped CI ELF artifacts that still contain forbidden symbol/string-table sections, and install/rename failure.\n\n### Embed lifecycle (`embed-native.ts`)\n\n1. **Init**: compute platform tag from `TARGET_PLATFORM`/`TARGET_ARCH` or host values.\n2. **Candidate set**:\n - x64 looks for `modern` and `baseline` files;\n - non-x64 looks for one default file.\n3. **Validate availability**: at least one expected file must exist in `packages/natives/native`.\n4. **Generate archive + manifest**: write `native/embedded-addons.<platform>-<arch>.tar.gz` containing all available target addon files and `native/embedded-addon.js` with package version, archive metadata, and file sizes.\n5. **Runtime extraction ready** for compiled mode.\n\n`--reset` writes the null manifest stub (`embeddedAddon = null`) without validating addon availability.\n\n## Dev workflow vs shipped/compiled behavior\n\n## Local development workflow\n\nTypical local loop:\n\n1. Build addon: `bun --cwd=packages/natives run build`.\n2. Loader resolves package-local `native/` candidates, then executable-dir fallback candidates.\n3. Generated declarations in `native/index.d.ts` describe the public TS API.\n\n## Shipped/compiled binary workflow\n\nIn compiled mode (`PI_COMPILED`, Bun embedded URL markers, or populated embedded manifest):\n\n1. Loader computes versioned cache dir: `<getNativesDir()>/<packageVersion>`.\n2. If embedded manifest matches current platform+version, loader extracts the selected file from `embedded-addons.<tag>.tar.gz` into that versioned dir when the cached file is absent or has the wrong size.\n3. Runtime candidate order includes:\n - extracted versioned cache path, if available,\n - versioned cache dir,\n - legacy compiled-binary dir (`%LOCALAPPDATA%/omp` on Windows, `~/.local/bin` elsewhere),\n - package/executable directories.\n4. First successfully loaded addon with the expected version sentinel is returned.\n\nThis is why packaging + runtime loader expectations must align: filenames, platform tags, CPU variants, and embedded manifest version must match what `native/index.js` probes.\n\n## JS API ↔ Rust export mapping (build sanity subset)\n\nGenerated declarations currently include exports from these Rust modules:\n\n| Area | Representative JS exports | Rust source |\n| ---------------------- | ------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------- |\n| Search/workspace | `grep`, `search`, `hasMatch`, `fuzzyFind`, `glob`, `listWorkspace`, `invalidateFsScanCache` | `grep.rs`, `fd.rs`, `glob.rs`, `workspace.rs`, `fs_cache.rs` |\n| AST/block/summary | `astGrep`, `astEdit`, `blockRangeAt`, `summarizeCode` | `ast.rs`, `block.rs`, `summary.rs` |\n| Text/highlight/tokens | `visibleWidth`, `truncateToWidth`, `highlightCode`, `countTokens` | `text.rs`, `highlight.rs`, `tokens.rs` |\n| Shell/PTY/process/keys | `executeShell`, `Shell`, `PtySession`, `Process`, `parseKey`, `applyBashFixups` | `shell.rs`, `pty.rs`, `ps.rs`, `keys.rs` |\n| Media/system/iso | `encodeSixel`, clipboard, macOS appearance/power, `getWorkProfile`, `isoBackend`, `isoStart`, `isoDiff` | `sixel.rs`, `clipboard.rs`, `appearance.rs`, `power.rs`, `prof.rs`, `iso.rs` |\n\n## Failure behavior and diagnostics\n\n## Build-time failures\n\n- Invalid variant configuration:\n - `TARGET_VARIANT` set on non-x64 → immediate error.\n - unsupported `TARGET_VARIANT` value → immediate error.\n - x64 cross-build without explicit `TARGET_VARIANT` → immediate error.\n- napi-rs build failure: script surfaces non-zero exit and stderr.\n- Artifact not found or ambiguous: script prints expected/candidate filenames and output directory contents.\n- Install failure: explicit message; Windows includes locked-file hint.\n- Generated binding install failure: explicit source/destination message.\n\n## Runtime loader failures (`native/index.js`)\n\n- Unsupported platform tag: throws with supported platform list after probing fails.\n- No candidate could load: throws with full candidate error list and mode-specific remediation hints.\n- Embedded extraction and Windows staging problems: archive/mkdir/write/copy errors are recorded and included in final diagnostics if load fails.\n- Version mismatch: install/compiled loads that lack the package-version sentinel are rejected during candidate probing.\n\n## Troubleshooting matrix\n\n| Symptom | Likely cause | Verify | Fix |\n| ---------------------------------------------------------------------- | ------------------------------------------------------------------------------------------- | ----------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- |\n| `Cannot find module` or dynamic library load error for every candidate | Missing release artifact, wrong platform tag, or stale compiled cache | Inspect loader error list and `packages/natives/native` filenames | Build correct target/variant; delete stale cache for the package version |\n| Export is missing at runtime but present in TypeScript | Stale `.node` loaded, generated declarations newer than binary, or Rust export not compiled | Require the actual candidate and inspect `Object.keys(mod)` | Rebuild native package and remove stale candidate/cache paths |\n| x64 machine loads baseline when modern expected | `PI_NATIVE_VARIANT=baseline`, no AVX2 detected, or modern file unavailable | Check env and filenames in `native/` | Build modern variant (`TARGET_VARIANT=modern ... build`) and ship it |\n| Cross-build produces wrong-labeled binary | Mismatch between `CROSS_TARGET` and `TARGET_PLATFORM`/`TARGET_ARCH`, or missing x64 variant | Confirm env tuple and output filename | Re-run with consistent env values and explicit x64 `TARGET_VARIANT` |\n| Compiled binary fails after upgrade | Stale extracted cache, embedded archive mismatch, or embedded manifest version mismatch | Inspect `<getNativesDir()>/<version>` and loader error list | Delete versioned cache for the package version; regenerate embedded archive/manifest during packaging |\n| `embed:native` fails with `No native addons found` | Required platform artifact was not built before embedding | Check expected list in error text | Build at least one expected artifact for the target, then rerun `embed:native` |\n\n## Operational commands\n\n```bash\n# Release artifact for current host\nbun --cwd=packages/natives run build\n\n# Build explicit x64 variants\nTARGET_VARIANT=modern bun --cwd=packages/natives run build\nTARGET_VARIANT=baseline bun --cwd=packages/natives run build\n\n# Generate embedded addon manifest from built native files\nbun --cwd=packages/natives run embed:native\n# Output archive: packages/natives/native/embedded-addons.<platform>-<arch>.tar.gz\n\n# Reset embedded manifest to null stub\nbun --cwd=packages/natives run embed:native -- --reset\n```\n\n## Orchestrator-side content-addressed build cache (robomp)\n\nWhen `pi-natives` is built inside the robomp orchestrator (`python/robomp/`), workspaces share built artifacts through a content-addressed cache instead of rebuilding from scratch in every per-issue worktree. The cache is **orchestrator-side only** — `bun --cwd=packages/natives run build` itself is unchanged; the cache lives outside the build pipeline and is populated/captured around `ensure_workspace` and post-task success in `python/robomp/src/natives_cache.py`.\n\n### What is cached\n\nThe complete set of files in `packages/natives/native/` that are pure functions of the cache-key inputs:\n\n- `pi_natives.<platform>-<arch>[-variant].node` (glob `pi_natives.*.node`)\n- `index.d.ts`\n- `index.js`\n- `embedded-addon.js`\n- `manifest.json` (cache metadata: key, target triple, capture timestamp, source workspace, commit)\n\nAn entry is only considered a hit when the `.node` glob matches AND every companion plus the manifest is present. Partial entries are evicted on GC.\n\n### Cache key\n\nThe key is `sha256` over `(path \\t git-tree-hash \\n)` pairs for the following inputs, in this order (order is significant), followed by the target triple:\n\n1. `crates` (whole subtree — pi-natives transitively depends on other workspace crates)\n2. `Cargo.lock`\n3. `Cargo.toml`\n4. `rust-toolchain.toml`\n5. `packages/natives` (whole subtree — build script, `scripts/*`, package.json with napi config)\n\nTree hashes come from one `git cat-file --batch-check` invocation against `HEAD`; paths missing from `HEAD` fold in as a fixed null hash so the key stays deterministic across repos that don't ship every input. The target-triple suffix matches the napi addon basename convention (`<platform>-<arch>` for non-x64, `<platform>-<arch>-<variant>` for x64). When `TARGET_VARIANT` is unset on an x64 host the variant component is `host` rather than autodetected — the key is stable on a given machine but a `modern`/`baseline` build with an explicit `TARGET_VARIANT` gets a different key.\n\nAnything outside this input set (Rust toolchain auto-installed delta, host glibc, env vars other than `TARGET_VARIANT`) is **not** in the key. If you need to invalidate after such a change, delete the cache directory by hand or bump one of the input files.\n\n### Layout and ownership\n\n- Root: `/data/cache/pi-natives` (provisioned by `entrypoint.sh` alongside the cargo caches, owned `root:omp`, mode `02770` setgid so cached files inherit `gid=omp` and stay readable by every slot user).\n- Per-repo subdirectory: `<root>/<repo-slug>/` where the slug is `owner__repo` (mirrors `SandboxManager.pool_path`).\n- Per-entry directory: `<root>/<repo-slug>/<sha256-key>/` containing the cached files plus `manifest.json`.\n- Per-repo lockfile: `<root>/<repo-slug>/.lock` (advisory `fcntl.flock`, exclusive on capture and GC).\n- Staging dirs (`.<key>.tmp.<pid>`) during capture; renamed atomically into the final entry path. Stale staging dirs from crashed captures are swept on GC.\n\n### Populate and capture semantics\n\n- **Populate** (workspace ← cache) runs inside `ensure_workspace`. On a key hit the `.node` is **hardlinked** into the workspace (zero-copy, shared inode); the companion `index.d.ts` / `index.js` / `embedded-addon.js` are **copied** (independent inodes) because the napi build's `installGeneratedBindings` and `gen-enums.ts` rewrite those files via `open(..., 'w')` — an in-place truncate that would otherwise propagate through a hardlink and corrupt the cache. Cross-device hardlink failures (`EXDEV`) fall back to copy.\n- **Capture** (cache ← workspace) runs from the post-task success path when the build produced a complete artifact set. Capture uses **copy**, not hardlink: hardlinking a slot-owned workspace file would preserve slot UID ownership on the cached inode and defeat the shared-group model. Copying creates a fresh root-owned, `gid=omp` inode via the setgid cache root. Capture is idempotent under the per-repo flock: a concurrent capture for the same key returns the existing entry.\n\n### Garbage collection\n\nA periodic GC loop runs in `WorkerPool` with two caps per repo. When either cap is exceeded, oldest entries (by `manifest.json.captured_at`) are dropped first:\n\n- entry count cap (`max_entries_per_repo`, default 8)\n- byte cap (`max_bytes`, default 4 GiB)\n\nWorkspaces that hardlinked a `.node` before GC retain access via the kernel inode refcount — `rmtree` of the cache entry does not delete the file from the workspace.\n\n### Configuration (settings on `robomp.config.Settings`)\n\n| Env var | Default | Effect |\n| ------------------------------------------- | ------------------------ | --------------------------------------------------------------------------------------------------- |\n| `ROBOMP_NATIVES_CACHE_ENABLED` | `true` | Master switch. When false the populate/capture hooks no-op and every workspace builds from scratch. |\n| `ROBOMP_NATIVES_CACHE_ROOT` | `/data/cache/pi-natives` | Cache root directory. Must be `root:omp 02770` for cross-slot reads. |\n| `ROBOMP_NATIVES_CACHE_MAX_ENTRIES_PER_REPO` | `8` | LRU entry-count cap, per repo slug. |\n| `ROBOMP_NATIVES_CACHE_MAX_BYTES` | `4294967296` (4 GiB) | LRU byte cap, per repo slug. |\n| `ROBOMP_NATIVES_CACHE_GC_INTERVAL_SECONDS` | `3600` | Period of the background GC loop in `WorkerPool`. |\n\n### Manual invalidation\n\n- One key: `rm -rf /data/cache/pi-natives/<repo-slug>/<sha256>`.\n- One repo: `rm -rf /data/cache/pi-natives/<repo-slug>`.\n- Everything: `rm -rf /data/cache/pi-natives/*` (preserve the root so its setgid mode survives).\n- Stuck lock: `rm /data/cache/pi-natives/<repo-slug>/.lock` (only when no orchestrator process is touching the repo).\n\nTrigger an automatic miss by editing any path in the key set: a single touched byte under `crates/`, `Cargo.lock`, `Cargo.toml`, `rust-toolchain.toml`, or `packages/natives/` shifts the tree hash and forces a fresh build at the next populate.\n",
|
|
40
|
+
"natives-media-system-utils.md": "# Natives media + system utilities\n\nThis document covers the media/system/conversion exports currently present in `@oh-my-pi/pi-natives`: terminal SIXEL image encoding, HTML conversion, clipboard access, token counting, macOS appearance/power helpers, and work profiling.\n\n## Implementation files\n\n- `crates/pi-natives/src/sixel.rs`\n- `crates/pi-natives/src/html.rs`\n- `crates/pi-natives/src/clipboard.rs`\n- `crates/pi-natives/src/tokens.rs`\n- `crates/pi-natives/src/appearance.rs`\n- `crates/pi-natives/src/power.rs`\n- `crates/pi-natives/src/prof.rs`\n- `crates/pi-natives/src/task.rs`\n- `packages/natives/native/index.d.ts`\n\nThere is no native `PhotonImage` class, `image.rs`, or ProjFS overlay helper module in the current `pi-natives` addon. General-purpose image decode/resize/encode is expected to live outside this native surface; the native image export here is only terminal SIXEL encoding.\n\n## JS API ↔ Rust export/module mapping\n\n| JS export | Rust N-API export | Rust module |\n| ------------------------------------- | ------------------------------ | --------------- |\n| `encodeSixel(bytes, width, height)` | `encode_sixel` | `sixel.rs` |\n| `htmlToMarkdown(html, options?)` | `html_to_markdown` | `html.rs` |\n| `copyToClipboard(text)` | `copy_to_clipboard` | `clipboard.rs` |\n| `readImageFromClipboard()` | `read_image_from_clipboard` | `clipboard.rs` |\n| `countTokens(input, encoding?)` | `count_tokens` | `tokens.rs` |\n| `detectMacOSAppearance()` | `detect_mac_os_appearance` | `appearance.rs` |\n| `MacAppearanceObserver.start(cb)` | `MacAppearanceObserver::start` | `appearance.rs` |\n| `MacOSPowerAssertion.start(options?)` | `MacOSPowerAssertion::start` | `power.rs` |\n| `getWorkProfile(lastSeconds)` | `get_work_profile` | `prof.rs` |\n\n## Data format boundaries and conversions\n\n### SIXEL image encoding (`sixel`)\n\n- **JS input boundary**: `Uint8Array` containing encoded image bytes.\n- **Rust decode boundary**: format is guessed with `ImageReader::with_guessed_format()`, then decoded to `DynamicImage`.\n- **Resize boundary**: image is resized with `resize_exact(..., FilterType::Lanczos3)` only when source dimensions differ from `targetWidthPx`/`targetHeightPx`.\n- **Output boundary**: `encodeSixel(...)` returns a SIXEL escape string synchronously.\n\nSupported decode formats are whatever the compiled `image` crate supports for `ImageReader` in this build (commonly PNG/JPEG/WebP/GIF). Invalid target dimensions (`0` width or height) fail with `Target SIXEL dimensions must be greater than zero`.\n\n### HTML conversion (`html`)\n\n- **JS input boundary**: HTML `string` + optional `{ cleanContent?: boolean; skipImages?: boolean }`.\n- **Rust conversion boundary**: conversion is scheduled through `task::blocking(\"html_to_markdown\", (), ...)`; there is no timeout/abort option on this export.\n- **Output boundary**: Markdown `string` promise.\n\nConversion behavior:\n\n- `cleanContent` defaults to `false`.\n- When `cleanContent=true`, preprocessing is enabled with `PreprocessingPreset::Aggressive`, `remove_navigation=true`, and `remove_forms=true`.\n- `skipImages` defaults to `false` and is passed to `html_to_markdown_rs::ConversionOptions`.\n\n### Clipboard (`clipboard`)\n\n- `copyToClipboard(text)` is a synchronous native call using `arboard::Clipboard::set_text`.\n- `readImageFromClipboard()` runs in `task::blocking(\"clipboard.read_image\", (), ...)`.\n- Image read returns `null`/`undefined` when `arboard` reports `ContentNotAvailable`.\n- Successful image read converts clipboard RGBA data into PNG bytes and returns `{ data: Uint8Array, mimeType: \"image/png\" }`.\n- Clipboard access or image encoding failures reject/throw as native errors.\n\nThere is no current `packages/natives` TS wrapper that emits OSC52, handles Termux, or suppresses native clipboard failures. Any best-effort clipboard policy must live in consumers.\n\n### Tokens (`tokens`)\n\n- `countTokens(input, encoding?)` accepts a single string or an array of strings.\n- Arrays return one aggregate token count; array elements are encoded in parallel via rayon.\n- Default encoding is `O200kBase`; `Cl100kBase` is also exported.\n- The implementation uses `encode_ordinary`, not special-token handling.\n- BPE tables are initialized once through `LazyLock` and reused.\n\n### macOS appearance and power helpers\n\n- `detectMacOSAppearance()` returns `\"dark\"`, `\"light\"`, or `null` on non-macOS.\n- `MacAppearanceObserver.start(callback)` returns a handle with `stop()`; on macOS it uses distributed notifications plus a 2-second polling fallback, and on non-macOS it is a no-op observer.\n- `MacOSPowerAssertion.start(options?)` returns a handle with `stop()`; on macOS it acquires one or more IOKit assertions, and on other platforms it is a no-op handle.\n- Power assertion options are `{ reason?, idle?, system?, user?, display? }`. If every boolean is unset or omitted, `idle` behavior is used by default.\n\n### Work profiling (`prof`)\n\n- **Collection boundary**: profiling samples are produced by `profile_region(tag)` guards in `task::blocking` and `task::future`.\n- **Storage format**: fixed-size circular buffer (`MAX_SAMPLES = 10_000`) storing stack path, duration, and timestamp.\n- **Output boundary**: `getWorkProfile(lastSeconds)` returns:\n - `folded`: folded-stack text (flamegraph input)\n - `summary`: markdown table summary\n - `svg`: optional flamegraph SVG\n - `totalMs`, `sampleCount`\n\n## Lifecycle and state transitions\n\n### SIXEL lifecycle\n\n1. `encodeSixel(bytes, targetWidthPx, targetHeightPx)` validates target dimensions.\n2. Rust guesses and decodes the encoded image.\n3. Image is resized exactly to the target dimensions when needed.\n4. Pixels are converted to RGBA8 and encoded with `icy_sixel::sixel_encode`.\n5. The SIXEL escape string is returned synchronously.\n\nFailure transitions:\n\n- Format detection/decode failure throws.\n- Invalid target dimensions throw.\n- SIXEL encoding failure throws with `Failed to encode SIXEL: ...`.\n\n### HTML lifecycle\n\n1. `htmlToMarkdown(html, options)` schedules a blocking conversion task.\n2. Conversion runs with defaulted options (`cleanContent=false`, `skipImages=false`) unless specified.\n3. Returns markdown string or rejects with `Conversion error: ...`.\n\n### Clipboard lifecycle\n\n- Text copy constructs an `arboard::Clipboard` and calls `set_text` synchronously.\n- Image read constructs an `arboard::Clipboard`, calls `get_image`, encodes PNG on success, maps `ContentNotAvailable` to `None`, and rejects other errors.\n\n### Work profiling lifecycle\n\n1. No explicit start: profiling is active when task helpers execute.\n2. Every instrumented task scope records one sample on guard drop.\n3. Samples overwrite oldest entries after buffer capacity is reached.\n4. `getWorkProfile(lastSeconds)` reads a time window and derives folded/summary/svg artifacts.\n\nFailure transitions:\n\n- SVG generation failure is soft (`svg` omitted/undefined), while folded and summary still return.\n- Empty sample windows return empty folded data and no SVG, not an error.\n\n## Unsupported operations and error propagation\n\n### SIXEL\n\n- Unsupported or corrupted image input is a strict failure.\n- Invalid SIXEL target dimensions are a strict failure.\n- No JS fallback path is exposed by the natives package.\n\n### HTML\n\n- Conversion errors are strict failures.\n- Option omission is defaulting, not failure.\n\n### Clipboard\n\n- Text copy is strict at the native API surface.\n- Image read distinguishes \"no image\" (`null`/`undefined`) from operational failure (rejection).\n\n### Work profiling\n\n- Retrieval is strict for the function call itself.\n- Flamegraph SVG generation is nullable/optional.\n- Buffer truncation is expected ring-buffer behavior.\n\n## Platform caveats\n\n- Clipboard access depends on OS/session support exposed through `arboard`.\n- macOS appearance and power helpers intentionally return no-op/null behavior on unsupported platforms.\n- ProjFS is not exposed by this media/system native utility surface. Isolation backend selection, including any ProjFS support, lives in the separate `iso` subsystem.\n",
|
|
41
|
+
"natives-rust-task-cancellation.md": "# Native Rust task execution and cancellation (`pi-natives`)\n\nThis document describes how `crates/pi-natives` schedules native work and how cancellation flows from JS options (`timeoutMs`, `AbortSignal`) into Rust execution.\n\n## Implementation files\n\n- `crates/pi-natives/src/task.rs`\n- `crates/pi-natives/src/grep.rs`\n- `crates/pi-natives/src/glob.rs`\n- `crates/pi-natives/src/fd.rs`\n- `crates/pi-natives/src/ast.rs`\n- `crates/pi-natives/src/shell.rs`\n- `crates/pi-natives/src/pty.rs`\n- `crates/pi-natives/src/html.rs`\n- `crates/pi-natives/src/sixel.rs`\n- `crates/pi-natives/src/clipboard.rs`\n- `crates/pi-natives/src/text.rs`\n- `crates/pi-natives/src/ps.rs`\n\n## Core primitives (`task.rs`)\n\n`task.rs` defines:\n\n1. `task::blocking(tag, cancel_token, work)`\n - Wraps `napi::AsyncTask` / `Task`.\n - `compute()` runs on libuv worker threads.\n - Returns a JS `Promise<T>` for exported functions.\n - Records a profiling sample through `profile_region(tag)`.\n\n2. `task::future(env, tag, work)`\n - Wraps `env.spawn_future(...)`.\n - Runs async work on Tokio's runtime.\n - Returns `PromiseRaw<'env, T>`.\n - Records a profiling sample through `profile_region(tag)`.\n\n3. `CancelToken` / `AbortToken` / `AbortReason`\n - `CancelToken::new(timeout_ms, signal)` combines an optional deadline and optional JS `AbortSignal` converted from `Unknown`.\n - `CancelToken::heartbeat()` is cooperative cancellation for blocking loops.\n - `CancelToken::wait()` asynchronously waits for signal or timeout.\n - `CancelToken::emplace_abort_token()` creates an abortable flag when `AbortSignal`, `Shell.abort()`, or an internal bridge needs one.\n - `AbortToken::abort(reason)` lets external code request abort.\n\n## `blocking` vs `future`: execution model and selection\n\n### Use `task::blocking`\n\nUse when work is CPU-heavy or fundamentally synchronous/blocking:\n\n- regex/file scanning (`grep`, `glob`, `fuzzyFind`)\n- ast-grep search/edit worker work\n- HTML conversion\n- clipboard image read\n\nBehavior:\n\n- Work closure receives a cloned `CancelToken`.\n- Cancellation is only observed where code checks `ct.heartbeat()?`.\n- Closure `Err(...)` rejects the JS promise.\n\n### Use `task::future`\n\nUse when work must `await` async operations:\n\n- shell session orchestration (`Shell.run`, `executeShell`)\n- PTY outer promise (`PtySession.start`) before it enters `spawn_blocking`\n- async task orchestration that must bridge completion and cancellation\n\nBehavior:\n\n- Future code can race normal completion against `ct.wait()`.\n- On cancel path, async implementations typically cancel subordinate machinery and may force-abort after a grace timeout.\n\n## JS API ↔ Rust export mapping (task/cancel relevant)\n\n| JS-facing API | Rust export | Scheduler | Cancellation hookup |\n| --------------------------------------- | --------------------------- | -------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------ |\n| `grep(options, onMatch?)` | `grep` | `task::blocking(\"grep\", ct, ...)` | `CancelToken::new(options.timeoutMs, options.signal)` + heartbeat checks |\n| `glob(options, onMatch?)` | `glob` | `task::blocking(\"glob\", ct, ...)` | `CancelToken::new(...)` + heartbeat checks |\n| `fuzzyFind(options)` | `fuzzy_find` | `task::blocking(\"fuzzy_find\", ct, ...)` | `CancelToken::new(...)` + heartbeat checks |\n| `astGrep(options)` / `astEdit(options)` | ast exports | blocking worker path | timeout/signal fields are accepted by options and checked cooperatively in worker loops |\n| `Shell#run(options, onChunk?)` | `Shell::run` | `task::future(env, \"shell.run\", ...)` | JS `CancelToken` is converted into `pi_shell::cancel::CancelToken`; shell races it against command completion and descendant cleanup |\n| `executeShell(options, onChunk?)` | `execute_shell` | `task::future(env, \"shell.execute\", ...)` | same cancel race and 2s graceful window |\n| `PtySession#start(options, onChunk?)` | `PtySession::start` | `task::future(env, \"pty.start\", ...)` + inner `spawn_blocking` | `CancelToken` checked in sync PTY loop via `heartbeat()` |\n| `htmlToMarkdown(html, options?)` | `html_to_markdown` | `task::blocking(\"html_to_markdown\", (), ...)` | none (`()` token) |\n| `encodeSixel(...)` | `encode_sixel` | synchronous native function | none |\n| `readImageFromClipboard()` | `read_image_from_clipboard` | `task::blocking(\"clipboard.read_image\", (), ...)` | none (`()` token) |\n\n`text.rs`, `tokens.rs`, `keys.rs`, most `ps.rs` functions, SIXEL encoding, and synchronous utility exports do not use `task::blocking`/`task::future` cancellation and therefore do not participate in this cancellation path.\n\n## Cancellation lifecycle and state transitions\n\n### `CancelToken` lifecycle\n\n```text\nCreated\n ├─ no signal + no timeout -> passive token\n ├─ signal registered -> AbortSignal callback can set AbortReason::Signal\n └─ deadline set -> timeout check becomes active\n\nRunning\n ├─ heartbeat()/wait() sees signal -> AbortReason::Signal\n ├─ heartbeat()/wait() sees deadline -> AbortReason::Timeout\n └─ no abort -> continue\n\nAborted\n └─ flag stores first observed cause for waiters; heartbeat formats it as \"Aborted: <reason>\"\n```\n\n### Before-start vs mid-execution cancellation\n\n- **Before start / before first cancellation check**:\n - `task::future` users that race on `ct.wait()` can resolve cancellation once they enter `select!`.\n - `task::blocking` users only observe cancellation when closure code reaches `heartbeat()`.\n\n- **Mid-execution**:\n - `blocking`: next `heartbeat()` returns `Err(\"Aborted: ...\")`.\n - `future`: `ct.wait()` branch wins `select!`, then code cancels subordinate async machinery.\n - shell: cancellation triggers a Tokio cancellation token, sends descendant termination waves, waits up to 2 seconds for the command task, then aborts the task if needed.\n - PTY: heartbeat failure or `kill()` terminates PTY child/process targets and drains output briefly.\n\n## Heartbeat expectations for long-running loops\n\n`heartbeat()` must run at predictable cadence in loops with unbounded or large work sets.\n\nObserved patterns:\n\n- `glob` filtering checks entries during scan/filter work.\n- `fd` scoring checks scanned candidates.\n- `grep` checks before/during expensive search and passes tokens into shared scan/cache helpers.\n- `run_pty_sync` checks every loop tick with a maximum 16ms wait cadence.\n\nPractical rule: no loop over external-size input should exceed a short bounded interval without a heartbeat.\n\n## Failure behavior and error propagation to JS\n\n### Blocking tasks\n\nError path:\n\n1. Closure returns `Err(napi::Error)` (including `heartbeat()` abort).\n2. `Task::compute()` returns `Err`.\n3. `AsyncTask` rejects JS promise.\n\nTypical error strings:\n\n- `Aborted: Timeout`\n- `Aborted: Signal`\n- domain errors (`Failed to decode image: ...`, `Conversion error: ...`, etc.)\n\n### Future tasks\n\nError path:\n\n1. Async body returns `Err(napi::Error)` or join failure is mapped (`... task failed: {err}`).\n2. `task::future`-spawned promise rejects.\n3. Shell and PTY command APIs model cancellation as structured results instead of rejection when the cancellation path wins: `exitCode` omitted, `cancelled` or `timedOut` set.\n\n### Cancellation reporting split\n\n- **Abort as error**: blocking exports using `heartbeat()?`.\n- **Abort as typed result**: shell/PTY command APIs that model cancellation in result structs.\n\nChoose one model per API and document it explicitly.\n\n## Common pitfalls\n\n1. **Missing heartbeat in blocking loops**\n - Symptom: timeout/signal appears ignored until loop ends.\n - Fix: add `ct.heartbeat()?` at loop top and before expensive per-item steps.\n\n2. **Long uncancelable sections**\n - Symptom: cancellation latency spikes during single large call (decode, sort, compression, parser invocation, etc.).\n - Fix: split work into chunks with heartbeat boundaries; if impossible, document latency.\n\n3. **Blocking async executor**\n - Symptom: async API stalls when sync-heavy code runs directly in future.\n - Fix: move CPU/sync blocks to `task::blocking` or `tokio::task::spawn_blocking`.\n\n4. **Inconsistent cancel semantics**\n - Symptom: one API rejects on cancel, another resolves with flags, confusing callers.\n - Fix: standardize per domain and keep docs aligned.\n\n5. **Forgetting cancellation bridge in nested async tasks**\n - Symptom: outer token is cancelled but inner readers/subprocess tasks keep running.\n - Fix: bridge cancellation to inner token/signal and enforce grace timeout + forced abort fallback.\n\n## Checklist for new cancellable exports\n\n1. Classify work correctly:\n - CPU-bound or sync blocking -> `task::blocking`.\n - async I/O / `await` orchestration -> `task::future`.\n\n2. Expose cancel inputs when needed:\n - include `timeoutMs` and `signal` in `#[napi(object)]` options,\n - create `let ct = task::CancelToken::new(timeout_ms, signal);`.\n\n3. Wire cancellation through all layers:\n - blocking loops: `ct.heartbeat()?` at stable intervals,\n - async orchestration: race with `ct.wait()` and cancel sub-tasks/tokens.\n\n4. Decide cancellation contract:\n - reject promise with abort error, or\n - resolve typed `{ cancelled, timedOut, ... }`,\n - keep this contract consistent for the API family.\n\n5. Propagate failures with context:\n - map errors via `Error::from_reason(format!(\"...: {err}\"))`,\n - include stage-specific prefixes (`spawn`, `decode`, `wait`, etc.).\n\n6. Handle before-start and mid-flight cancellation:\n - cancellation check/await must happen before expensive body and during long execution.\n\n7. Validate no executor misuse:\n - no long sync work directly inside async futures without `spawn_blocking`/blocking task wrapper.\n",
|
|
42
|
+
"natives-shell-pty-process.md": "# Natives Shell, PTY, Process, and Key Internals\n\nThis document covers execution/process/terminal primitives in `@oh-my-pi/pi-natives`: `shell`, `pty`, `ps`, and `keys`, using the architecture terms from `docs/natives-architecture.md`.\n\n## Implementation files\n\n- `crates/pi-natives/src/shell.rs`\n- `crates/pi-shell/src/shell.rs`\n- `crates/pi-shell/src/fixup.rs`\n- `crates/pi-shell/src/windows.rs` (Windows-only PATH enrichment)\n- `crates/pi-shell/src/process.rs`\n- `crates/pi-natives/src/pty.rs`\n- `crates/pi-natives/src/ps.rs`\n- `crates/pi-natives/src/keys.rs`\n- `crates/pi-natives/src/task.rs`\n- `packages/natives/native/index.d.ts`\n\n## Layer ownership\n\n- **Package entrypoint** (`packages/natives/native/index.js`): loads the `.node` addon and exports generated N-API bindings.\n- **Rust N-API module layer** (`crates/pi-natives/src/*`): JS-facing shell/PTY/process/key exports and callback bridging.\n- **Runtime core** (`crates/pi-shell/src/*`): brush shell execution, cancellation cleanup, minimizer integration, command fixups, and cross-platform process references.\n- **Consumers** (`packages/coding-agent`, `packages/tui`): higher-level session policy, output artifact/minimizer handling, render policy, and UI key handling.\n\n## Shell subsystem (`shell`)\n\n### API model\n\nShell execution modes:\n\n1. **One-shot** via `executeShell(options, onChunk?)`.\n2. **Persistent session** via `new Shell(options?)` then `shell.run(...)` repeatedly.\n\nBoth stream merged stdout/stderr text through a threadsafe callback and return `{ exitCode?, cancelled, timedOut, minimized? }`.\n\nRelated synchronous helper:\n\n- `applyBashFixups(command)` strips safe trailing `| head`/`| tail` pipeline caps and redundant trailing `2>&1` according to `pi_shell::fixup` rules. It returns `{ command, stripped }` and does not execute anything.\n\n`ShellOptions` supports `sessionEnv`, `snapshotPath`, and optional output `minimizer`. `ShellExecuteOptions` supports command-scoped `env`, session-level `sessionEnv`, `snapshotPath`, timeout/signal, and optional minimizer. `ShellRunOptions` supports command, cwd, command-scoped env, timeout, and signal.\n\n### Session creation and environment model\n\nRust creates `brush_core::Shell` with:\n\n- inherited environment disabled (`do_not_inherit_env: true`), followed by explicit environment reconstruction from host env,\n- profile and rc loading skipped,\n- bash-mode builtins, with `exec` and `suspend` disabled,\n- native `sleep` and `timeout` builtins registered,\n- skip-list for shell-sensitive vars (`PS1`, `PWD`, `SHLVL`, bash function exports, etc.),\n- a non-exported `env=\"$env\"` fallback so PowerShell-style `$env:NAME` survives brush parameter expansion unless the user shadows `env`.\n\nSession env behavior:\n\n- `ShellOptions.sessionEnv` / one-shot `sessionEnv` is applied at session creation.\n- `ShellRunOptions.env` / one-shot `env` is command-scoped (`EnvironmentScope::Command`) and popped after the command.\n- `PATH` is merged specially on Windows with case-insensitive dedupe.\n- Windows-only path enrichment (`pi-shell/src/windows.rs`) appends discovered Git-for-Windows paths when present and not already included.\n- `snapshotPath`, when present, is sourced during session creation with stdout/stderr/stdin wired to null files.\n\n### Runtime lifecycle and state transitions\n\nPersistent shell (`Shell.run`) uses this state machine:\n\n- **Idle/Uninitialized**: `session: None`.\n- **Running**: first `run()` lazily creates a session, stores an abort token, executes command.\n- **Completed + keepalive**: if execution control flow is normal, abort state is cleared and session is reused.\n- **Completed + teardown**: if control flow is loop/script/shell-exit related, session is dropped.\n- **Cancelled/Timed out**: Tokio cancellation token is triggered, descendants started after the baseline snapshot receive termination waves, a 2-second graceful wait is allowed, the task may be aborted, and the persistent session is dropped if the lock can be acquired.\n- **Error**: session is dropped.\n\nOne-shot shell (`executeShell`) always creates and drops a fresh session per call.\n\n### Streaming/output and minimizer behavior\n\n- Stdout/stderr are routed into a shared pipe and read concurrently.\n- Reader decodes UTF-8 incrementally; invalid byte sequences emit `U+FFFD` replacement chunks.\n- The command runs with `ProcessGroupPolicy::NewProcessGroup`.\n- After the foreground command completes, the reader drains until EOF, 250ms of idle output, or 2s maximum; reader shutdown then gets a 250ms timeout.\n- Optional minimizer configuration can capture and rewrite output. When minimization occurs, the result includes `minimized` with filter name, replacement text, original text, and byte counts.\n- Consumers are responsible for persisting or displaying minimizer artifacts; the native result only carries the data.\n\n### Cancellation, timeout, and abort\n\n- `CancelToken` is constructed from `timeoutMs` and optional `AbortSignal`, then converted into the shared `pi_shell::cancel::CancelToken`.\n- On cancellation/timeout, shell cancellation token is triggered, descendant cleanup runs, then the task gets a 2-second graceful window before forced abort.\n- Structured result flags are used:\n - timeout -> `exitCode` omitted, `timedOut: true`.\n - abort signal / `Shell.abort()` -> `exitCode` omitted, `cancelled: true`.\n\n`Shell.abort()` behavior:\n\n- aborts the current running command for that `Shell` instance through the stored `AbortToken`,\n- resolves successfully even when nothing is running.\n\n### Failure behavior\n\nCommon surfaced errors include:\n\n- session init failures (`Failed to initialize shell`),\n- cwd errors (`Failed to set cwd`),\n- env set/pop failures,\n- snapshot source failures (`Failed to source snapshot`),\n- pipe creation/clone failures,\n- execution failure (`Shell execution failed: ...`),\n- task wrapper failures (`Shell execution task failed: ...`).\n\n## PTY subsystem (`pty`)\n\n### API model\n\n`new PtySession()` exposes:\n\n- `start(options, onChunk?) -> Promise<{ exitCode?, cancelled, timedOut }>`\n- `write(data)`\n- `resize(cols, rows)`\n- `kill()`\n\n`PtyStartOptions` supports `command`, optional `cwd`, optional `env`, `timeoutMs`, `signal`, `cols`, `rows`, and optional `shell`. The default shell is `sh`.\n\n### Runtime lifecycle and state transitions\n\n`PtySession` state machine:\n\n- **Idle**: `core: None`.\n- **Reserved**: `start()` installs control channel synchronously (`core: Some`) before async work begins, so `write/resize/kill` become immediately valid.\n- **Running**: blocking PTY loop handles child state, reader events, cancellation heartbeat, and control messages.\n- **Terminal closed / drain**: child exit or cancellation starts a short reader drain window.\n- **Finalized**: `core` is always reset to `None` after start task completion (success or error).\n\nConcurrency guard:\n\n- starting while already running returns `PTY session already running`.\n\n### Spawn/attach/write/read/terminate patterns\n\n- PTY opened via `portable_pty::native_pty_system().openpty(...)`.\n- On Windows, `openpty()` is run on a helper thread with a 5s startup timeout; timeout rejects with `PTY creation timed out (5s). ConPTY may be unavailable on this system.`\n- Command runs through the configured shell:\n - `cmd.exe`/`cmd` gets `/c`,\n - `powershell`/`pwsh` gets `-Command`,\n - other shells get `-lc`.\n- Default size is `120x40`; dimensions are clamped (`cols 20..400`, `rows 5..200`) on start and resize.\n- `write()` sends raw bytes to PTY stdin.\n- `resize()` sends a control message and clamps dimensions again.\n- `kill()` sends a control message that marks the run cancelled and terminates PTY process targets.\n\nOutput path:\n\n- dedicated reader thread reads master stream,\n- incremental UTF-8 decode emits `U+FFFD` for invalid bytes,\n- chunks forwarded through N-API threadsafe callback.\n\nTermination path:\n\n- `terminate_pty_processes` targets the PTY process group when available and the child pid when available.\n- It sends the platform `TERM_SIGNAL`, calls `child.kill()`, then sends the platform `KILL_SIGNAL`.\n- On Windows, ConPTY input is closed before dropping the master; master drop is offloaded to a background thread and waited for up to 2s to avoid deadlock.\n\n### Cancellation and timeout semantics\n\n- `timeoutMs` and `AbortSignal` feed a `CancelToken`.\n- Loop calls `ct.heartbeat()` periodically with a 16ms maximum wait cadence.\n- Timeout classification is based on the heartbeat error string containing `Timeout`.\n- Cancellation/kill starts a 300ms post-cancel drain window; normal child exit starts a 300ms post-exit drain window.\n- Final reader drain is 50ms on non-Windows and 500ms on Windows.\n\n### Failure behavior\n\nError surfaces include:\n\n- PTY allocation/open failure,\n- Windows PTY startup timeout,\n- PTY spawn failure,\n- writer/reader acquisition failure,\n- child status/wait failures,\n- lock poisoning,\n- control-channel disconnection (`PTY session is no longer available`).\n\nControl call failures when not running:\n\n- `write/resize/kill` return `PTY session is not running`.\n\n## Process subsystem (`ps`)\n\n### API model\n\nCurrent JS surface is the `Process` class:\n\n- `Process.fromPid(pid) -> Process | null`\n- `Process.fromPath(path) -> Process[]`\n- getters: `pid`, `ppid`\n- methods: `args()`, `killTree(signal?)`, `terminate(options?)`, `waitForExit(options?)`, `groupId()`, `children()`, `status()`\n\n`ProcessTerminateOptions` supports `{ group?, gracefulMs?, timeoutMs?, signal? }`. `ProcessWaitOptions` supports `{ timeoutMs?, signal? }`.\n\n### Behavior\n\n- `killTree(signal?)` sends the requested signal to the process and descendants, children first; on Windows the signal argument is ignored and processes are terminated via `TerminateProcess`.\n- `terminate(options?)` is async. By default it uses a 1000ms graceful phase and a 5000ms post-hard-kill wait. Passing `gracefulMs < 0` skips the graceful phase.\n- `waitForExit(options?)` resolves `true` when the process exits and `false` on timeout.\n- `status()` returns `\"running\"` or `\"exited\"`.\n\nThe platform-specific implementation lives in `pi_shell::process`; `crates/pi-natives/src/ps.rs` is a N-API shim plus re-exports used by PTY termination.\n\n## Key parsing subsystem (`keys`)\n\n### API model\n\nExposed helpers:\n\n- `parseKey(data, kittyProtocolActive)`\n- `matchesKey(data, keyId, kittyProtocolActive)`\n- `parseKittySequence(data)`\n- `matchesKittySequence(data, expectedCodepoint, expectedModifier)`\n- `matchesLegacySequence(data, keyName)`\n\n### Parsing model\n\nThe parser combines:\n\n- direct single-byte mappings (`enter`, `tab`, `ctrl+<letter>`, printable ASCII),\n- O(1) legacy escape-sequence lookup (PHF map),\n- xterm `modifyOtherKeys` parsing,\n- Kitty protocol parsing (`CSI u`, `CSI ~`, `CSI 1;...<letter>`),\n- normalization to key IDs (`ctrl+c`, `shift+tab`, `pageUp`, `f5`, etc.).\n\nModifier handling:\n\n- only shift/alt/ctrl bits are compared for key matching,\n- lock bits are masked out before comparisons.\n\nLayout behavior:\n\n- base-layout fallback is intentionally constrained so remapped layouts do not create false matches for ASCII letters/symbols.\n\n### Failure behavior\n\n- Unrecognized or invalid sequences produce `null` from parse functions.\n- Match functions return `false` on parse failure or mismatch.\n- No thrown error surface for malformed key input.\n\n## JS API ↔ Rust export mapping\n\n### Shell + PTY + Process\n\n| JS API | Rust N-API export | Notes |\n| --------------------------------- | --------------------------------------- | ----------------------------------------- |\n| `executeShell(options, onChunk?)` | `executeShell` (`execute_shell`) | One-shot shell execution |\n| `new Shell(options?)` | `Shell` class | Persistent shell session |\n| `shell.run(options, onChunk?)` | `Shell::run` | Reuses session on keepalive control flow |\n| `shell.abort()` | `Shell::abort` | Aborts active run for that shell instance |\n| `applyBashFixups(command)` | `applyBashFixups` (`apply_bash_fixups`) | Synchronous command rewrite helper |\n| `new PtySession()` | `PtySession` class | Stateful PTY session |\n| `pty.start(options, onChunk?)` | `PtySession::start` | Interactive PTY run |\n| `pty.write(data)` | `PtySession::write` | Raw stdin passthrough |\n| `pty.resize(cols, rows)` | `PtySession::resize` | Clamped terminal dimensions |\n| `pty.kill()` | `PtySession::kill` | Terminates active PTY child/targets |\n| `Process.fromPid(pid)` | `Process::from_pid` | Stable process reference lookup |\n| `Process.fromPath(path)` | `Process::from_path` | Executable-path process lookup |\n| `process.killTree(signal?)` | `Process::kill_tree` | Children-first process tree termination |\n| `process.terminate(options?)` | `Process::terminate` | Graceful then hard process termination |\n| `process.waitForExit(options?)` | `Process::wait_for_exit` | Async exit wait |\n| `process.children()` | `Process::children` | Direct children as `Process[]` |\n| `process.status()` | `Process::status` | `running` / `exited` |\n\n### Keys\n\n| JS API | Rust N-API export | Notes |\n| ---------------------------------------------- | --------------------------------------------------- | ------------------------------- |\n| `matchesKittySequence(data, cp, mod)` | `matchesKittySequence` (`matches_kitty_sequence`) | Kitty codepoint+modifier match |\n| `parseKey(data, kittyProtocolActive)` | `parseKey` (`parse_key`) | Normalized key-id parser |\n| `matchesLegacySequence(data, keyName)` | `matchesLegacySequence` (`matches_legacy_sequence`) | Exact legacy sequence map check |\n| `parseKittySequence(data)` | `parseKittySequence` (`parse_kitty_sequence`) | Structured Kitty parse result |\n| `matchesKey(data, keyId, kittyProtocolActive)` | `matchesKey` (`matches_key`) | High-level key matcher |\n\n## Abandoned session cleanup and finalization notes\n\n- **Shell persistent session**: if a run is cancelled/timed out/errors/non-keepalive control flow, Rust drops the internal session state. Successful normal runs keep the session for reuse.\n- **PTY session**: `core` is always cleared after `start()` finishes, including failure paths.\n- **No explicit JS finalizer-driven kill contract** is exposed by wrappers; cleanup is primarily tied to run completion/cancellation paths. Callers should use `timeoutMs`, `AbortSignal`, `shell.abort()`, or `pty.kill()` for deterministic teardown.\n",
|
|
43
|
+
"natives-text-search-pipeline.md": "# Natives Text/Search Pipeline\n\nThis document maps the `@oh-my-pi/pi-natives` text/search/code surface from generated JS/TS exports to Rust N-API modules and back to JS result objects.\n\nTerminology follows `docs/natives-architecture.md`:\n\n- **Generated binding**: public API in `packages/natives/native/index.d.ts`.\n- **Rust module layer**: N-API exports in `crates/pi-natives/src/*`.\n- **Shared scan cache**: `fs_cache`-backed directory-entry cache used by discovery/search flows.\n\n## Implementation files\n\n- `packages/natives/native/index.d.ts`\n- `crates/pi-natives/src/grep.rs`\n- `crates/pi-natives/src/glob.rs`\n- `crates/pi-natives/src/glob_util.rs`\n- `crates/pi-natives/src/fs_cache.rs`\n- `crates/pi-natives/src/fd.rs`\n- `crates/pi-natives/src/ast.rs`\n- `crates/pi-natives/src/text.rs`\n- `crates/pi-natives/src/highlight.rs`\n- `crates/pi-natives/src/tokens.rs`\n\n## JS API ↔ Rust export mapping\n\n| JS API | Rust export (`#[napi]`, snake_case -> camelCase) | Rust module |\n| ------------------------------------------------------------------------------- | ------------------------------------------------ | -------------- |\n| `grep(options, onMatch?)` | `grep` | `grep.rs` |\n| `search(content, options)` | `search` | `grep.rs` |\n| `hasMatch(content, pattern, ignoreCase?, multiline?)` | `hasMatch` | `grep.rs` |\n| `fuzzyFind(options)` | `fuzzyFind` | `fd.rs` |\n| `glob(options, onMatch?)` | `glob` | `glob.rs` |\n| `invalidateFsScanCache(path?)` | `invalidateFsScanCache` | `fs_cache.rs` |\n| `astGrep(options)` | `astGrep` | `ast.rs` |\n| `astEdit(options)` | `astEdit` | `ast.rs` |\n| `wrapTextWithAnsi(text, width, tabWidth)` | `wrapTextWithAnsi` | `text.rs` |\n| `truncateToWidth(text, maxWidth, ellipsis, pad, tabWidth)` | `truncateToWidth` | `text.rs` |\n| `sliceWithWidth(line, startCol, length, strict, tabWidth)` | `sliceWithWidth` | `text.rs` |\n| `extractSegments(line, beforeEnd, afterStart, afterLen, strictAfter, tabWidth)` | `extractSegments` | `text.rs` |\n| `visibleWidth(text, tabWidth)` | `visibleWidth` | `text.rs` |\n| `highlightCode(code, lang, colors)` | `highlightCode` | `highlight.rs` |\n| `supportsLanguage(lang)` | `supportsLanguage` | `highlight.rs` |\n| `getSupportedLanguages()` | `getSupportedLanguages` | `highlight.rs` |\n| `countTokens(input, encoding?)` | `countTokens` | `tokens.rs` |\n\n## Pipeline overview by subsystem\n\n## 1) Regex search (`grep`, `search`, `hasMatch`)\n\n### Input/options flow\n\n1. Callers invoke generated native exports directly; there is no package-local TS wrapper that renames `search` to `searchContent`.\n2. Rust option structs in `grep.rs` deserialize camelCase fields (`ignoreCase`, `maxCount`, `contextBefore`, `contextAfter`, `maxColumns`, `timeoutMs`).\n3. `grep` creates `CancelToken` from `timeoutMs` + `AbortSignal` and runs inside `task::blocking(\"grep\", ...)`.\n4. `search` and `hasMatch` operate on provided string/`Uint8Array` content and do not scan the filesystem.\n\n### Execution branches\n\n- **In-memory branch**\n - `search` -> `search_sync` / search helpers over provided content bytes.\n - `hasMatch` compiles/checks pattern against provided content and returns a boolean.\n - No filesystem scan, no `fs_cache`.\n- **Single-file branch**\n - `grep` resolves path, checks metadata is file, and searches that file.\n- **Directory branch**\n - Optional cache lookup via `fs_cache::get_or_scan` when `cache: true`.\n - Fresh scan via `fs_cache::force_rescan` when `cache: false`.\n - Optional empty-result recheck when cached results are older than the empty-result recheck threshold.\n - Entry filtering: file-only + optional glob filter (`glob_util`) + optional type filter mapping (`js`, `ts`, `rust`, etc.).\n\n### Search/collection semantics\n\n- Regex engine: `grep_regex::RegexMatcherBuilder` with `ignoreCase` and `multiline`.\n- Context resolution:\n - `contextBefore/contextAfter` override legacy `context`.\n - Non-content modes do not collect context.\n- Output modes:\n - `content` -> one `GrepMatch` per hit.\n - `count` and `filesWithMatches` map to count-style entries (`lineNumber=0`, `line=\"\"`, `matchCount` set).\n - `offset` and `maxCount` are applied during aggregation across sorted file results.\n - Directory searches use parallel filesystem walking/searching, then aggregate per-file results to preserve global offset/limit semantics in the returned result and callback stream.\n\n### Result shaping back to JS\n\n- Rust `SearchResult`/`GrepResult` fields map to TS interfaces via N-API object conversion.\n- Counters are clamped before crossing N-API where needed.\n- `GrepResult.limitReached` is optional and emitted when true.\n- Streaming callback receives each shaped `GrepMatch` for content or count-style entries.\n\n### Failure behavior\n\n- `search` returns `SearchResult.error` for regex/search failures instead of throwing.\n- `grep` rejects on hard errors such as invalid path, invalid glob/regex, or cancellation timeout/abort.\n- `hasMatch` returns a boolean on success and throws on invalid pattern/UTF-8 conversion errors.\n- File open/search errors in multi-file scans are skipped per-file; scan continues.\n\n### Malformed regex handling\n\n`grep.rs` sanitizes braces before regex compile:\n\n- Invalid repetition-like braces are escaped (`{`/`}` -> `\\{`/`\\}`) when they cannot form `{N}`, `{N,}`, `{N,M}`.\n- This prevents common literal-template fragments (for example `${platform}`) from failing as malformed repetition.\n- Remaining invalid regex syntax still returns a regex error.\n\n## 2) File discovery (`glob`) and fuzzy path search (`fuzzyFind`)\n\n`glob` and `fuzzyFind` share `fs_cache` scans; matching logic differs.\n\n### `glob` flow\n\n1. Caller passes `GlobOptions` directly. `pattern` and `path` are required in the generated type.\n2. Rust resolves the search path and compiles pattern via `glob_util::compile_glob`.\n3. Entry source:\n - `cache=true` -> `get_or_scan` + optional stale-empty `force_rescan`.\n - `cache=false` -> `force_rescan(..., store=false)` (fresh only).\n4. Filtering:\n - skip `.git` always;\n - skip `node_modules` unless requested (`includeNodeModules`) or pattern mentions `node_modules`;\n - apply glob match;\n - apply file-type filter; symlink `file`/`dir` filters resolve target metadata.\n5. Optional sort by mtime descending (`sortByMtime`) before truncating to `maxResults`.\n\n### `fuzzyFind` flow\n\n1. Rust implementation lives in `fd.rs`; generated export is `fuzzyFind`.\n2. Shared scan source from `fs_cache` with the same cache/no-cache split and stale-empty recheck policy.\n3. Scoring:\n - exact / starts-with / contains / subsequence-based fuzzy score;\n - separator/punctuation-normalized scoring path;\n - directory bonus and deterministic tie-break (`score desc`, then `path asc`).\n4. Symlink entries are excluded from fuzzy results.\n\n### Failure behavior\n\n- Invalid glob pattern returns an error from `glob_util::compile_glob`.\n- Search root must resolve to an existing directory for directory discovery flows.\n- Cancellation/timeouts propagate as abort errors via `CancelToken::heartbeat()` checks in loops.\n\n### Malformed glob handling\n\n`glob_util::build_glob_pattern` is tolerant:\n\n- normalizes `\\` to `/`,\n- auto-prefixes simple recursive patterns with `**/` when `recursive=true`,\n- auto-closes unbalanced `{...` alternation groups before compile.\n\n## 3) AST search/edit (`astGrep`, `astEdit`)\n\n`ast.rs` exposes syntax-aware code search and rewrite operations.\n\n- `astGrep(options)` returns matches with byte/line/column coordinates and optional metavariable bindings.\n- `astEdit(options)` returns replacement changes, per-file counts, searched/touched file counts, parse errors, and whether edits were applied.\n- `dryRun` defaults to true for edit options in the generated documentation.\n- Options include language override, path/glob/selector, strictness, limits, parse-error policy, `signal`, and `timeoutMs`.\n\nThese exports are direct native APIs used by tooling; they are not mediated by a TS wrapper in `packages/natives`.\n\n## 4) Shared scan/cache lifecycle (`fs_cache`)\n\n`fs_cache` stores scan results as normalized relative entries (`path`, `fileType`, optional `mtime` and regular-file `size`) keyed by:\n\n- canonical search root,\n- `include_hidden`,\n- `use_gitignore`,\n- `skip_node_modules`,\n- scan detail (`Minimal` vs `Full`).\n\n`follow_links` affects a fresh scan but is not currently part of the cache key.\n\n### Cache state transitions\n\n1. **Miss / disabled**\n - TTL is `0` or key absent/expired -> fresh collection.\n2. **Hit**\n - Entry age is within TTL -> return cached entries + `cache_age_ms`.\n3. **Stale-empty recheck**\n - If query yields zero matches and cache age exceeds the empty-result threshold, force one rescan.\n4. **Invalidation**\n - `invalidateFsScanCache(path?)`:\n - no arg: clear all keys;\n - path arg: remove keys for roots affected by that path.\n\n### Stale-result tradeoff\n\n- Cache favors low-latency repeated scans over immediate consistency.\n- TTL window can return stale positives/negatives.\n- Empty-result recheck reduces stale negatives for older cached scans at the cost of one extra scan.\n- Explicit invalidation is the intended correctness hook after file mutations.\n\n## 5) ANSI text utilities (`text`)\n\nThese are pure, in-memory utilities.\n\n### Boundaries and responsibilities\n\n- `text.rs` owns terminal-cell semantics:\n - ANSI sequence parsing,\n - grapheme-aware width and slicing,\n - wrap/truncate/slice behavior,\n - explicit tab-width parameter on width-sensitive APIs.\n- `grep.rs` line truncation (`maxColumns`) is separate:\n - simple character-boundary truncation of matched lines with `...`,\n - not ANSI-state-preserving and not terminal-cell width aware.\n\n### Key behaviors\n\n- `wrapTextWithAnsi`: wraps by visible width, carries active SGR codes across wrapped lines.\n- `truncateToWidth`: visible-cell truncation with ellipsis policy (`Unicode`, `Ascii`, `Omit`), optional right padding.\n- `sliceWithWidth`: column slicing with optional strict width enforcement.\n- `extractSegments`: extracts before/after segments around an overlay while restoring ANSI state for the `after` segment.\n- `sanitizeText` (ANSI/control/surrogate stripping with line-ending normalization) no longer lives in `text.rs`; it moved to `@oh-my-pi/pi-utils` as a pure-JS implementation in `packages/utils/src/sanitize-text.ts`. The native binding was removed in the same change because the JS version was competitive on the benchmarked workloads, and keeping a Rust copy forced every caller (including `pi-utils`) to pull in `@oh-my-pi/pi-natives`.\n- `visibleWidth`: counts visible terminal cells using caller-supplied tab width.\n\n### Failure behavior\n\nText functions generally return deterministic transformed output; errors are limited to N-API argument/string conversion boundaries.\n\n## 6) Syntax highlighting (`highlight`)\n\n`highlight.rs` is pure transformation; it does not use the filesystem scan cache.\n\n### Flow\n\n1. Caller passes `code`, optional `lang`, and ANSI color palette.\n2. Rust resolves syntax by token/name lookup, extension lookup, alias table fallback, then plain-text fallback.\n3. Each line is parsed with syntect `ParseState` and scope stack.\n4. Scopes map to semantic color categories and ANSI color codes are injected/reset.\n\n### Failure behavior\n\n- Per-line parse failure does not fail the call: that line is appended unhighlighted and processing continues.\n- Unknown/unsupported language falls back to plain text syntax.\n\n## 7) Token counting (`tokens`)\n\n`countTokens(input, encoding?)` is an in-memory utility.\n\n- `input` may be a single string or an array of strings.\n- Arrays return one aggregate count and are encoded in parallel in Rust.\n- Default encoding is `O200kBase`; `Cl100kBase` is also available.\n- The implementation uses ordinary tokenization, not special-token handling.\n\n## Pure utility vs filesystem-dependent flows\n\n| Flow | Filesystem access | Shared cache | Notes |\n| ---------------------------- | ----------------- | -------------------- | --------------------------------------------- |\n| `search` / `hasMatch` | No | No | regex on provided bytes/string only |\n| `text` module functions | No | No | ANSI/width utilities only |\n| `highlight` module functions | No | No | syntax + ANSI coloring only |\n| `countTokens` | No | No | tokenization only |\n| `astGrep` / `astEdit` | Yes | No | syntax-aware file search/edit |\n| `glob` | Yes | Optional | directory scans + glob filtering |\n| `fuzzyFind` | Yes | Optional | directory scans + fuzzy scoring |\n| `grep` (file/dir path) | Yes | Optional in dir mode | ripgrep over files, optional filters/callback |\n\n## End-to-end lifecycle summary\n\n1. Caller invokes generated native export with typed options.\n2. Rust validates/normalizes options and builds matcher/search config.\n3. For filesystem flows, entries are scanned (cache hit/miss/rescan where applicable) then filtered/scored/searched.\n4. Worker loops periodically call cancel heartbeat; timeout/abort can terminate execution.\n5. Rust shapes outputs into N-API objects (`lineNumber`, `matchCount`, `limitReached`, etc.).\n6. Generated bindings return typed JS objects and optional per-match callbacks for `grep`/`glob`.\n",
|
|
44
|
+
"non-compaction-retry-policy.md": "# Non-compaction auto-retry policy\n\nThis document describes the standard API-error retry path in `AgentSession`.\n\nIt explicitly excludes context-overflow recovery via auto-compaction. Overflow is handled by compaction logic and is documented separately in [`compaction.md`](../docs/compaction.md).\n\n## Implementation files\n\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/config/settings-schema.ts`](../packages/coding-agent/src/config/settings-schema.ts)\n- [`../src/modes/controllers/event-controller.ts`](../packages/coding-agent/src/modes/controllers/event-controller.ts)\n- [`../src/modes/rpc/rpc-mode.ts`](../packages/coding-agent/src/modes/rpc/rpc-mode.ts)\n- [`../src/modes/rpc/rpc-client.ts`](../packages/coding-agent/src/modes/rpc/rpc-client.ts)\n- [`../src/modes/rpc/rpc-types.ts`](../packages/coding-agent/src/modes/rpc/rpc-types.ts)\n\n## Scope boundary vs compaction\n\nRetry and compaction are checked from the same `agent_end` path, but they are intentionally separated:\n\n1. `agent_end` inspects the last assistant message.\n2. `#isRetryableError(...)` runs first.\n3. If retry is initiated, compaction checks are skipped for that turn.\n4. Context-overflow errors are hard-excluded from retry classification (`isContextOverflow(...)` short-circuits retry).\n5. Overflow therefore falls through to `#checkCompaction(...)` instead of standard retry.\n\nSo: overload/rate/server/network-style failures use this retry policy; context-window overflow uses compaction recovery.\n\n## Retry classification\n\n`#isRetryableError(...)` requires all of the following:\n\n- assistant `stopReason === \"error\"`\n- `errorMessage` exists\n- message is **not** context overflow\n- `errorMessage` matches transient transport/envelope patterns or `isUsageLimitError(...)`\n\nCurrent retryable inputs are regex/string-classified:\n\n- transient transport/envelope failures, including Anthropic stream-envelope failures before `message_start`\n- overloaded/provider-returned-error wording\n- rate limit / usage limit / too many requests\n- HTTP-like server classes: 429, 500, 502, 503, 504\n- service unavailable / server/internal error\n- provider-suggested retry wording, including OpenAI `retry your request` failures\n- network/connection/socket failures, refused/closed connections, upstream connect/reset-before-headers, socket hang up, timeout/timed out, fetch failed, terminated, retry delay wording, and unexpected socket close messages\n\nThis is string-pattern classification, not typed provider error codes.\n\n## Retry lifecycle and state transitions\n\nSession state used by retry:\n\n- `#retryAttempt: number` (`0` means idle)\n- `#retryPromise: Promise<void> | undefined` (tracks in-progress retry lifecycle)\n- `#retryResolve: (() => void) | undefined` (resolves `#retryPromise`)\n- `#retryAbortController: AbortController | undefined` (cancels backoff sleep)\n\nFlow (`#handleRetryableError`):\n\n1. Read `retry` settings group.\n2. If `retry.enabled === false`, stop immediately (`false`, no retry started).\n3. Increment `#retryAttempt`.\n4. Create `#retryPromise` once (first attempt in a chain).\n5. If attempt exceeded `retry.maxRetries`, emit final failure event and stop.\n6. Compute capped jittered local delay: `min(retry.baseDelayMs * 2^(attempt-1), 8000ms) * (75–100% jitter)`.\n7. For usage-limit errors, parse retry hints and call auth storage (`markUsageLimitReached(...)`); if credential switching succeeds, force delay to `0`. Otherwise wait for whichever comes first — the provider's retry-after/backoff hint, or the earliest moment a temporarily blocked sibling credential frees up (`retryAtMs` + 1s buffer) so the next attempt can pick it up.\n8. If no credential switch occurred, suppress the current model selector for cooldown, try configured retry model fallback chains, and force delay to `0` on model switch.\n9. If the final delay exceeds `retry.maxDelayMs` and no credential/model switch happened, emit final failure and do not sleep.\n10. Emit `auto_retry_start`.\n11. Remove the trailing assistant error message from agent runtime state (kept in persisted session history).\n12. Sleep with abort support.\n13. Schedule `agent.continue()` through the post-prompt task scheduler (`delayMs: 1`) for the same prompt generation.\n\n### What resets retry counters\n\n`#retryAttempt` resets to `0` in these cases:\n\n- first successful non-error, non-aborted assistant message after retries started (emits `auto_retry_end { success: true }`)\n- retry cancellation during backoff sleep\n- max retries exceeded path\n- max delay exceeded path\n\n`#retryPromise` resolves/clears when retry chain ends (success, cancellation, max-exceeded, or max-delay failure), via `#resolveRetry()`.\n\n## Backoff and max-attempt semantics\n\nSettings:\n\n- `retry.enabled` (default `true`)\n- `retry.maxRetries` (default `10`)\n- `retry.baseDelayMs` (default `500`)\n- `retry.maxDelayMs` (default `300000`, 5 minutes; `<= 0` disables the fail-fast cap)\n\nAttempt numbering:\n\n- attempt counter is incremented before max-check\n- start events use current attempt (1-based)\n- max-exceeded end event reports `attempt: this.#retryAttempt - 1` (last attempted retry count)\n\nBackoff sequence with default settings, before jitter:\n\n- attempt 1: 500 ms\n- attempt 2: 1000 ms\n- attempt 3: 2000 ms\n- attempt 4: 4000 ms\n- attempt 5+: 8000 ms\n\nThe actual local sleep is 75–100% of the nominal value, matching Anthropic-style retry jitter so concurrent sessions do not retry in lockstep.\n\nDelay override inputs can come from parsed retry headers (`retry-after-ms`, `retry-after`, `x-ratelimit-reset-ms`, `x-ratelimit-reset`) or usage-limit backoff. Credential/model fallback switches set delay to `0`; otherwise parsed hints can extend the capped local delay. If the computed delay is greater than `retry.maxDelayMs` and no switch succeeded, retry ends immediately with a final error instead of sleeping.\n\n## Abort mechanics\n\n### Explicit retry abort\n\n`abortRetry()`:\n\n- aborts `#retryAbortController` (if present)\n- resolves retry promise (`#resolveRetry()`) so awaiters are unblocked\n\nIf abort hits while sleeping, catch path emits:\n\n- `auto_retry_end { success: false, finalError: \"Retry cancelled\" }`\n- resets attempt/controller\n\n### Global operation abort interaction\n\n`abort()` calls `abortRetry()` before aborting the active agent stream. This guarantees retry backoff is cancelled when user issues a general abort.\n\n### TUI interaction\n\nOn `auto_retry_start`, EventController:\n\n- swaps `Esc` handler to `session.abortRetry()`\n- renders loader text: `Retrying (attempt/maxAttempts) in Ns… (esc to cancel)`\n\nOn `auto_retry_end`, it restores prior `Esc` handler and clears loader state.\n\n## Streaming and prompt completion behavior\n\n`prompt()` ultimately waits on `#waitForRetry()` after `agent.prompt(...)` returns.\n\nEffect:\n\n- a prompt call does not fully resolve until any started retry chain finishes (success/failure/cancel)\n- retry lifecycle is part of one logical prompt execution boundary\n\nThis prevents callers from treating a retrying turn as complete too early.\n\n## Controls: settings and RPC\n\n### Configuration knobs\n\nDefined in settings schema under retry group:\n\n- `retry.enabled`\n- `retry.maxRetries`\n- `retry.baseDelayMs`\n- `retry.maxDelayMs`\n- `retry.fallbackChains`\n- `retry.fallbackRevertPolicy` (`\"cooldown-expiry\"` by default; `\"never\"` disables automatic restoration)\n\nProgrammatic toggles in session:\n\n- `setAutoRetryEnabled(enabled)` writes `retry.enabled`\n- `autoRetryEnabled` reads `retry.enabled`\n- `isRetrying` reports whether retry lifecycle promise is active\n\n### RPC controls\n\nRPC command surface:\n\n- `set_auto_retry` → `session.setAutoRetryEnabled(command.enabled)`\n- `abort_retry` → `session.abortRetry()`\n\nClient helpers:\n\n- `RpcClient.setAutoRetry(enabled)`\n- `RpcClient.abortRetry()`\n\nBoth commands return success responses; retry progress/failure details come from streamed session events, not command response payloads.\n\n## Event emission and failure surfacing\n\nSession-level retry events:\n\n- `auto_retry_start { attempt, maxAttempts, delayMs, errorMessage }`\n- `auto_retry_end { success, attempt, finalError? }`\n- `retry_fallback_applied { from, to, role }`\n- `retry_fallback_succeeded { model, role }`\n\nPropagation:\n\n- emitted through `AgentSession.subscribe(...)`\n- forwarded to extension runner as extension events\n- in RPC mode, forwarded directly as JSON event objects (`session.subscribe(event => output(event))`)\n- in TUI, consumed by `EventController` for loader/error UI\n\nFinal failure surfacing:\n\n- On max-exceeded, max-delay failure, or cancellation, `auto_retry_end.success === false`\n- TUI shows: `Retry failed after N attempts: <finalError>`\n- Extensions/hooks receive `auto_retry_end` with same fields\n- RPC consumers receive same event object on stdout stream\n\n## Permanent stop conditions\n\nRetry stops and will not auto-continue when any of these occur:\n\n- `retry.enabled` is false\n- error is not retry-classified\n- error is context overflow (delegated to compaction path)\n- max retries exceeded\n- provider-requested delay exceeds `retry.maxDelayMs` and no credential/model switch is available\n- user cancels retry (`abort_retry` or `Esc` during retry loader)\n- global abort (`abort`) cancels retry first\n\nA new retry chain can still start later on a future retryable error after counters reset.\n\n## Operational caveats\n\n- Classification is regex text matching; provider-specific structured errors are not used here.\n- Retry strips the failing assistant error from **runtime context** before re-continue, but session history still keeps that error entry.\n- `RpcSessionState` currently exposes `autoCompactionEnabled` but not an `autoRetryEnabled` field; RPC callers must track their own toggle state or query settings through other APIs.\n- Model fallback changes append temporary `model_change` entries and may later restore the primary model when its cooldown expires, depending on `retry.fallbackRevertPolicy`.\n",
|
|
45
|
+
"notebook-tool-runtime.md": "# Notebook file runtime internals\n\nThis document describes current `.ipynb` handling in `coding-agent` and its relationship to the kernel-backed Python runtime.\n\nThe critical distinction: **notebook support is file conversion/editing, not notebook execution**. `.ipynb` files are exposed as editable cell-marked text through `read` and the edit pipeline; no notebook-specific tool starts or talks to a Python kernel.\n\n## Implementation files\n\n- [`src/edit/notebook.ts`](../packages/coding-agent/src/edit/notebook.ts)\n- [`src/edit/read-file.ts`](../packages/coding-agent/src/edit/read-file.ts)\n- [`src/tools/read.ts`](../packages/coding-agent/src/tools/read.ts)\n- [`src/tools/eval.ts`](../packages/coding-agent/src/tools/eval.ts)\n- [`src/eval/py/executor.ts`](../packages/coding-agent/src/eval/py/executor.ts)\n- [`src/eval/py/kernel.ts`](../packages/coding-agent/src/eval/py/kernel.ts)\n- [`src/session/streaming-output.ts`](../packages/coding-agent/src/session/streaming-output.ts)\n\n## 1) Runtime boundary: editing vs executing\n\n## `.ipynb` file conversion (`src/edit/notebook.ts`)\n\n- `read` treats `.ipynb` files as notebooks unless the selector is `:raw`.\n- The default notebook view is editable text with markers:\n - `# %% [code] cell:N`\n - `# %% [markdown] cell:N`\n - `# %% [raw] cell:N`\n- Line selectors and multi-range selectors operate on that virtual text.\n- Edit/write paths round-trip virtual text back to notebook JSON through `serializeEditedNotebookText(...)`.\n- Existing notebook metadata is preserved when a marker references an existing `cell:N`; new cells get fresh empty metadata.\n- Missing notebooks edited through this path start from an empty nbformat 4.5 notebook.\n\nNo kernel lifecycle exists in this path:\n\n- no kernel session ID\n- no code execution\n- no stream chunks from Python\n- no rich display capture\n- no output artifact pipeline from execution\n\n## Kernel-backed execution path (`src/tools/eval.ts` + `src/eval/py/*`)\n\nWhen the agent needs to run cell-style Python code (sequential cells, persistent state, rich displays), that goes through the **`eval` tool** with per-cell `language: \"py\"`, not through notebook file handling.\n\nThat path is where Python subprocess lifecycle, reset/cancel behavior, chunk streaming, rich displays, and output artifact truncation live.\n\n## 2) Notebook cell handling semantics\n\n## Source normalization\n\nNotebook JSON `source` is converted to virtual text by joining source arrays. When virtual text is serialized back, cell source is split with newline preservation:\n\n- each line ending in `\\n` stays as a separate source entry with the newline\n- a final non-newline-terminated line is stored without forcing a trailing newline\n- empty content becomes an empty `source` array\n\nThis mirrors notebook JSON conventions and avoids accidental line concatenation on later edits.\n\n## Marker parsing and cell preservation\n\n- The first representation line must be a marker; text before the first marker, including a blank line, is rejected.\n- Markers must match `# %% [code|markdown|raw]` with optional `cell:N`.\n- If `cell:N` points at an unused existing cell, that cell is cloned, its `cell_type` and `source` are updated, and unrelated metadata is preserved.\n- If no valid unused original index is present, a new cell is created.\n- Code cells ensure `execution_count` exists and `outputs` exists.\n- Markdown/raw cells remove `execution_count` and `outputs`.\n\n## Error surfaces\n\nHard failures are thrown for:\n\n- missing notebook on read\n- invalid JSON\n- missing/non-array `cells`\n- invalid cell objects or cell types\n- invalid editable representation (for example, text before the first cell marker)\n\nThese surface through the caller (`read`, edit, or `write`) as normal tool errors.\n\n## 3) Kernel session semantics (where they actually exist)\n\nKernel semantics are implemented in `executePython` / `PythonKernel` and apply to the Python backend of the `eval` tool.\n\n## Modes\n\n`PythonKernelMode`:\n\n- `session` (default)\n - kernels are cached by `(session id, cwd)`\n - multiple owners can share a retained kernel for the same key\n - execution is serialized by the tool's exclusive concurrency and backend execution path\n - dead kernels are replaced before execution\n- `per-call`\n - creates a subprocess for the request\n - executes\n - always shuts down the subprocess in `finally`\n\n## Reset behavior\n\nEach eval cell has its own optional `reset` flag. `reset: true` resets the selected Python session before that cell executes; it is not a top-level tool parameter.\n\n## Kernel death / restart / retry\n\nIn session mode:\n\n- if the retained subprocess is not alive before execution, it is replaced\n- if execution fails because the subprocess died, the kernel is replaced and the code is retried once\n- explicit `reset` is rejected while another reset for the same session key is already in progress\n\n## 4) Environment/session variable injection\n\nKernel startup and per-execution environment patching can receive:\n\n- `PI_SESSION_FILE`\n- `PI_ARTIFACTS_DIR`\n- `PI_TOOL_BRIDGE_URL`\n- `PI_TOOL_BRIDGE_TOKEN`\n- `PI_TOOL_BRIDGE_SESSION`\n\nThe runner initializes process state so code executes in the requested cwd, managed env entries are reflected in `os.environ`, and cwd is available on `sys.path`.\n\n## 5) Streaming/chunk and display handling (kernel-backed path)\n\nThe Python backend uses an NDJSON subprocess runner. The host processes frames per execution:\n\n- `stdout` / `stderr` -> text chunks to `onChunk`\n- `display` / `result` -> MIME bundle rendering\n- `error` -> traceback text and structured error metadata\n- `done` -> final status, execution count, cancellation state\n\nDisplay text MIME precedence:\n\n1. `text/markdown`\n2. `text/plain`\n3. converted `text/html`\n\nStructured outputs captured separately include:\n\n- `application/json` -> JSON display output\n- `image/png` / `image/jpeg` -> image output\n- `application/x-omp-status` -> status event\n\nCancellation/timeout:\n\n- abort/timeout sends `SIGINT` to the runner\n- if the runner does not settle after the interrupt grace window, shutdown escalates and the kernel is recreated on the next call\n- timeout output is annotated with a timeout message\n\n## 6) Truncation and artifact behavior\n\n`OutputSink` in `src/session/streaming-output.ts` is used by kernel execution paths:\n\n- sanitizes every chunk\n- tracks total/output lines and bytes\n- optionally spills full output to an artifact file\n- keeps a UTF-8-safe in-memory tail buffer when output exceeds the configured threshold\n\n`eval` converts this metadata into result truncation notices and TUI warnings.\n\nNotebook file conversion does **not** use `OutputSink`; it has no stream/artifact truncation pipeline because it does not execute code.\n\n## 7) Renderer assumptions and formatting\n\n## Read/edit notebook representation\n\nNotebook files are rendered to the model as text. The visible cell markers are part of the editable representation, not comments that are ignored during serialization.\n\n## Python renderer (for actual execution output)\n\nKernel-backed execution rendering expects:\n\n- per-cell status transitions (`pending` / `running` / `complete` / `error`)\n- optional structured status events\n- optional JSON output trees\n- image outputs\n- truncation warnings + optional `artifact://<id>` pointer\n\nThis renderer behavior is unrelated to notebook JSON editing except that both reuse shared TUI primitives.\n\n## 8) Practical workflow\n\nIf a workflow needs both notebook mutation and execution:\n\n1. read or edit the `.ipynb` file through the normal file tools\n2. copy the desired cell source into `eval` cells with `language: \"py\"` to execute it\n3. write resulting source changes back to the notebook if needed\n\nCurrent implementation does not provide a single tool that both mutates `.ipynb` and executes notebook cells through kernel context.\n",
|
|
46
|
+
"plugin-manager-installer-plumbing.md": "# Plugin manager and installer plumbing\n\nThis document describes how `omp plugin` npm/link operations mutate plugin state on disk and how installed npm/link plugins become runtime capabilities (tools and extensions today, hooks/commands path resolution available). Marketplace installs use separate marketplace registries and cache plumbing; see `docs/marketplace.md`.\n\n## Scope and architecture\n\nThere are two plugin-management implementations in the codebase:\n\n1. **Active path used by CLI commands**: `PluginManager` (`src/extensibility/plugins/manager.ts`)\n2. **Legacy helper module**: installer functions (`src/extensibility/plugins/installer.ts`)\n\n`omp plugin` npm/link actions go through `PluginManager`; marketplace actions go through `MarketplaceManager`.\n\n`installer.ts` still documents important safety checks and filesystem behavior, but it is not the path used by `src/commands/plugin.ts` + `src/cli/plugin-cli.ts`.\n\n## Lifecycle: from CLI invocation to runtime availability\n\n```text\nomp plugin <npm/link action> ...\n -> src/commands/plugin.ts\n -> runPluginCommand(...) in src/cli/plugin-cli.ts\n -> PluginManager method (install/list/uninstall/link/...)\n -> mutate ~/.omp/plugins/{package.json,node_modules,omp-plugins.lock.json}\n -> runtime discovery: discoverAndLoadCustomTools(...) and discoverAndLoadExtensions(...)\n -> getAllPluginToolPaths(cwd) / getAllPluginExtensionPaths(cwd)\n -> custom tool loader imports tool modules; extension loader imports extension modules\n\nomp plugin install name@marketplace / omp install name@marketplace\n -> MarketplaceManager\n -> mutate ~/.omp/marketplaces.json, ~/.omp/plugins/installed_plugins.json, cache dirs\n -> installed marketplace plugin cache is surfaced as plugin roots/capabilities\n```\n\n### Command entrypoints\n\n- `src/commands/plugin.ts` defines command/flags and forwards to `runPluginCommand`.\n- `src/cli/plugin-cli.ts` maps npm/link subcommands to `PluginManager` methods:\n - `install`, `uninstall`, `list`, `link`, `doctor`, `features`, `config`, `enable`, `disable`\n- `discover`, `upgrade`, and `marketplace ...` subcommands use `MarketplaceManager`.\n- No explicit npm-plugin `update` action exists; update is done by re-running `install` with a new package/version spec.\n\n## On-disk model\n\nGlobal plugin state lives under `~/.omp/plugins`:\n\n- `package.json` — dependency manifest used by `bun install`/`bun uninstall` for npm-installed plugins\n- `node_modules/` — installed npm plugin packages or symlinks\n- `omp-plugins.lock.json` — runtime state for npm/link plugins:\n - enabled/disabled per plugin\n - selected feature set per plugin\n - persisted plugin settings\n\nProject-local overrides live at:\n\n- `<cwd>/.omp/plugin-overrides.json`\n\nOverrides are read-only from manager/loader perspective (no write path here) and can disable plugins or override features/settings for this project.\n\nMarketplace registries live separately:\n\n- `~/.omp/marketplaces.json` — configured marketplace catalogs\n- `~/.omp/plugins/installed_plugins.json` — user-scoped marketplace installs\n- `<cwd>/.omp/plugins/installed_plugins.json` — project-scoped marketplace installs when available\n- `~/.omp/plugins/cache/{marketplaces,plugins}/` — cached catalogs and plugin directories\n\n## Plugin spec parsing and metadata interpretation\n\n## Install spec grammar\n\n`parsePluginSpec` (`parser.ts`) supports:\n\n- `pkg` -> `features: null` (defaults behavior)\n- `pkg[*]` -> enable all manifest features\n- `pkg[]` -> enable no optional features\n- `pkg[a,b]` -> enable named features\n- `@scope/pkg@1.2.3[feat]` -> scoped + versioned package with explicit feature selection\n\n`extractPackageName` strips version suffix for on-disk path lookup after install.\n\n## Manifest source and required fields\n\nManifest is resolved as:\n\n1. `package.json.omp`\n2. fallback `package.json.pi`\n3. fallback `{ version: package.version }`\n\nImplications:\n\n- There is no strict schema validation in manager/loader.\n- A package missing `omp`/`pi` is still installable and listable.\n- Runtime plugin loading (`getEnabledPlugins`) skips packages without `omp`/`pi` manifest.\n- `manifest.version` is always overwritten from package `version`.\n\nMalformed `package.json` JSON is a hard failure at read time; malformed manifest shape may fail later only when specific fields are consumed.\n\n## Install/update flow (`PluginManager.install`)\n\n1. Parse feature bracket syntax from install spec.\n2. Validate package name against regex + shell-metacharacter denylist.\n3. Ensure plugin `package.json` exists (`omp-plugins`, private dependencies map).\n4. Run `bun install <packageSpec>` in `~/.omp/plugins`.\n5. Read installed package `node_modules/<name>/package.json`.\n6. Resolve manifest and compute `enabledFeatures`:\n - `[*]`: all declared features (or `null` if no feature map)\n - `[a,b]`: validates each feature exists in manifest features map\n - `[]`: empty feature list\n - bare spec: `null` (use defaults policy later in loader)\n7. Upsert lockfile runtime state: `{ version, enabledFeatures, enabled: true }`.\n\n### Update semantics\n\nBecause update is install-driven:\n\n- `omp plugin install pkg@newVersion` updates dependency and lockfile version.\n- Existing settings are preserved; state entry is overwritten for version/features/enabled.\n- No separate “check updates” or transactional migration logic exists.\n\n## Remove flow (`PluginManager.uninstall`)\n\n1. Validate package name.\n2. Run `bun uninstall <name>` in plugin dir.\n3. Remove plugin runtime state from lockfile:\n - `config.plugins[name]`\n - `config.settings[name]`\n\nIf uninstall command fails, runtime state is not changed.\n\n## List flow (`PluginManager.list`)\n\n1. Read plugin dependency map from `~/.omp/plugins/package.json`.\n2. Load lockfile runtime config (missing file -> empty defaults).\n3. Load project overrides (`<cwd>/.omp/plugin-overrides.json`, parse/read errors -> empty object with warning).\n4. For each dependency with a resolvable package.json:\n - build `InstalledPlugin` record\n - merge feature/enable state:\n - base from lockfile (or defaults)\n - project overrides can replace feature selection\n - project `disabled` list masks plugin as disabled\n\nThis is the effective state used by CLI status output and settings/features operations.\n\n## Link flow (`PluginManager.link`)\n\n`link` supports local plugin development by symlinking a local package into `~/.omp/plugins/node_modules/<pkg.name>`.\n\nBehavior:\n\n1. Resolve `localPath` against manager cwd.\n2. Require local `package.json` and `name` field.\n3. Ensure plugin dirs exist.\n4. For scoped names, create scope directory.\n5. Remove existing path at target link location.\n6. Create symlink.\n7. Add runtime lockfile entry enabled with default features (`null`).\n\nCaveat: current `PluginManager.link` does not enforce the `cwd` path-boundary check present in legacy `installer.ts` (`normalizedPath.startsWith(normalizedCwd)`), so trust is the caller’s responsibility.\n\n## Runtime loading: from installed plugin to callable capabilities\n\n## Discovery gate\n\n`getEnabledPlugins(cwd)` (`plugins/loader.ts`) reads:\n\n- plugin dependency manifest (`package.json`)\n- lockfile runtime state\n- project overrides via `getConfigDirPaths(\"plugin-overrides.json\", { user: false, cwd })`\n\nFiltering:\n\n- skip if no plugin package.json\n- skip if manifest (`omp`/`pi`) absent\n- skip if globally disabled in lockfile\n- skip if project-disabled\n\n## Capability path resolution\n\nFor each enabled plugin:\n\n- `resolvePluginExtensionPaths(plugin)`\n- `resolvePluginToolPaths(plugin)`\n- `resolvePluginHookPaths(plugin)`\n- `resolvePluginCommandPaths(plugin)`\n\nEach resolver includes base entries plus feature entries:\n\n- base entries are always included\n- explicit feature list -> only selected features\n- `enabledFeatures === null` -> enable features marked `default: true`\n\nManifest entries may point to a file or to a directory containing `index.ts`, `index.js`, `index.mjs`, or `index.cjs`. Missing files are silently skipped (`existsSync` guard).\n\n## Current runtime wiring differences\n\n- **Tools are wired into runtime today** via `discoverAndLoadCustomTools` (`custom-tools/loader.ts`), which calls `getAllPluginToolPaths(cwd)`.\n- **Extensions are wired into runtime today** via `discoverAndLoadExtensions` (`extensions/loader.ts`), which calls `getAllPluginExtensionPaths(cwd)`.\n- Paths are de-duplicated by resolved absolute path in custom tool and extension discovery (`seen` set, first path wins).\n- **Hooks/commands resolvers exist** and are exported, but this code path does not currently wire them into a runtime registry in the same way tools and extensions are wired.\n\n## Lock/state management details\n\n`PluginManager` caches runtime config in memory per instance (`#runtimeConfig`) and lazily loads once.\n\nLoad behavior:\n\n- lockfile missing -> `{ plugins: {}, settings: {} }`\n- lockfile read/parse failure -> warning + same empty defaults\n\nSave behavior:\n\n- writes full lockfile JSON pretty-printed each mutation\n\nNo cross-process locking or merge strategy exists; concurrent writers can overwrite each other.\n\n## Safety checks and trust boundaries\n\n## Input/package validation\n\nActive manager path enforces package-name validation:\n\n- regex for scoped/unscoped package specs (optionally with version)\n- explicit shell metacharacter denylist (`[;&|`$(){}[]<>\\\\]`)\n\nThis limits command-injection risk when invoking `bun install/uninstall`.\n\n## Filesystem trust boundary\n\n- Plugin code executes in-process when custom tool modules are imported; no sandboxing.\n- Manifest relative paths are joined against plugin package directory and only existence-checked.\n- The plugin package itself is trusted code once installed.\n\n## Legacy installer-only checks\n\n`installer.ts` includes additional link-time checks not mirrored in `PluginManager.link`:\n\n- local path must resolve inside project cwd\n- extra package name/path traversal guards for symlink target naming\n\nBecause CLI uses `PluginManager`, these stricter link guards are not currently on the main path.\n\n## Failure, partial success, and rollback behavior\n\nThe plugin manager is not transactional.\n\n| Operation stage | Failure behavior | Rollback |\n| -------------------------------------------------------- | -------------------------- | ----------------------------------------------------------------------------- |\n| `bun install` fails | install aborts with stderr | N/A (no state writes yet) |\n| Install succeeds, then manifest/feature validation fails | command fails | No uninstall rollback; dependency may remain in `node_modules`/`package.json` |\n| Install succeeds, then lockfile write fails | command fails | No rollback of installed package |\n| `bun uninstall` succeeds, lockfile write fails | command fails | Package removed, stale runtime state may remain |\n| `link` removes old target then symlink creation fails | command fails | No restoration of previous link/dir |\n\nOperationally, `doctor --fix` can repair some drift (`bun install`, orphaned config cleanup, invalid-feature cleanup), but it is best-effort.\n\n## Malformed/missing manifest behavior summary\n\n- Missing `omp`/`pi` field:\n - install/list: tolerated (minimal manifest)\n - runtime enabled-plugin discovery: skipped as non-plugin\n- Missing feature referenced by install spec or `features --set/--enable`: hard error with available feature list\n- Invalid `plugin-overrides.json`: ignored with fallback to `{}` in both manager and loader paths\n- Missing tool/hook/command file paths referenced by manifest: silently ignored during resolver expansion; flagged as errors only by `doctor`\n\n## Mode differences and precedence\n\n- `--dry-run` (install): returns synthetic install result, no filesystem/network/state writes.\n- `--json`: output formatting only, no behavior change.\n- Project overrides always take precedence over global lockfile for feature/settings view.\n- Effective enablement is `runtimeEnabled && !projectDisabled`.\n\n## Implementation files\n\n- [`src/commands/plugin.ts`](../packages/coding-agent/src/commands/plugin.ts) — CLI command declaration and flag mapping\n- [`src/cli/plugin-cli.ts`](../packages/coding-agent/src/cli/plugin-cli.ts) — action dispatch, user-facing command handlers\n- [`src/extensibility/plugins/manager.ts`](../packages/coding-agent/src/extensibility/plugins/manager.ts) — active install/remove/list/link/state/doctor implementation\n- [`src/extensibility/plugins/installer.ts`](../packages/coding-agent/src/extensibility/plugins/installer.ts) — legacy installer helpers and additional link safety checks\n- [`src/extensibility/plugins/loader.ts`](../packages/coding-agent/src/extensibility/plugins/loader.ts) — enabled-plugin discovery and tool/hook/command path resolution\n- [`src/extensibility/plugins/parser.ts`](../packages/coding-agent/src/extensibility/plugins/parser.ts) — install spec and package-name parsing helpers\n- [`src/extensibility/plugins/types.ts`](../packages/coding-agent/src/extensibility/plugins/types.ts) — manifest/runtime/override type contracts\n- [`src/extensibility/custom-tools/loader.ts`](../packages/coding-agent/src/extensibility/custom-tools/loader.ts) — runtime wiring for plugin-provided tool modules\n- [`src/extensibility/extensions/loader.ts`](../packages/coding-agent/src/extensibility/extensions/loader.ts) — runtime wiring for plugin-provided extension modules\n",
|
|
47
|
+
"porting-from-pi-mono.md": "# Porting From pi-mono: A Practical Merge Guide\n\nThis guide is a repeatable checklist for porting changes from pi-mono into this repo.\nUse it for any merge: single file, feature branch, or full release sync.\n\n## Last Sync Point (historical upstream marker)\n\n**Commit:** `b21b42d032919de2f2e6920a76fa9a37c3920c0a`\n**Date:** 2026-03-22\n\nUpdate this section after each sync; do not reuse the previous range. This commit is an upstream pi-mono marker and may not exist in this repo's local object database.\n\nWhen starting a new sync, generate patches from this commit forward in a pi-mono checkout or remote that contains the commit:\n\n```bash\ngit format-patch b21b42d032919de2f2e6920a76fa9a37c3920c0a..HEAD --stdout > changes.patch\n```\n\n## 0) Define the scope\n\n- Identify the upstream reference (commit, tag, or PR).\n- List the packages or folders you plan to touch.\n- Decide which features are in-scope and which are intentionally skipped.\n\n## 1) Bring code over safely\n\n- Prefer a clean, focused diff rather than a wholesale copy.\n- Avoid copying built artifacts or generated files.\n- If upstream added new files, add them explicitly and review contents.\n\n## 2) Match import extension conventions\n\nMost runtime TypeScript sources omit `.js` in internal imports, but several current entrypoints and tool modules keep `.js` for ESM/runtime compatibility. Follow the surrounding file and package export style; do not blanket-strip or blanket-add extensions.\n\n- In `packages/coding-agent` runtime sources, prefer extensionless internal imports when the surrounding module does, but preserve existing `.js` imports in files that already require them.\n- In `packages/tui/test` and `packages/natives/bench`, keep `.js` where surrounding files already use it.\n- Keep real file extensions when required by tooling or import assertions (e.g., `.json`, `.css`, `.md` text embeds).\n- Example: `import { x } from \"./foo.js\";` → `import { x } from \"./foo\";` only when that package/file convention is extensionless.\n\n## 3) Replace import scopes\n\nUpstream uses different package scopes. Replace them consistently.\n\n- Replace old scopes with the local scope used here.\n- Examples (adjust to match the actual packages you are porting):\n - `@mariozechner/pi-coding-agent` → `@oh-my-pi/pi-coding-agent`\n - `@mariozechner/pi-agent-core` → `@oh-my-pi/pi-agent-core`\n - `@mariozechner/pi-tui` → `@oh-my-pi/pi-tui`\n - `@mariozechner/pi-ai` → `@oh-my-pi/pi-ai`\n - `@mariozechner/pi-utils` → `@oh-my-pi/pi-utils`\n- Some upstream packages publish under the `@earendil-works/*` scope instead of `@mariozechner/*`. Map it the same way (`@earendil-works/pi-coding-agent` → `@oh-my-pi/pi-coding-agent`, and so on).\n- The bare `typebox` package is not an `@oh-my-pi/*` scope; do not rewrite it as one. See the Extensions divergence in section 15 for how tool-parameter schemas map.\n\n## 4) Use Bun APIs where they improve on Node\n\nWe run on Bun, but the current source intentionally mixes Bun APIs with small Node standard-library APIs. Replace Node APIs only when Bun provides a clearer, safer, or simpler implementation; do not mechanically rewrite every Node import.\n\n**Prefer replacing when porting new code:**\n\n- Process spawning: prefer Bun Shell `$` for simple commands; use `Bun.spawn`/`Bun.spawnSync` for streaming or process control. Keep existing `child_process` only where its exact semantics are needed.\n- HTTP clients: `node-fetch`, `axios` → native `fetch`\n- SQLite: `better-sqlite3` → `bun:sqlite`\n- Env loading: `dotenv` → Bun loads `.env` automatically\n- Runtime text/assets: prefer Bun imports such as `with { type: \"text\" }` or `Bun.file()` over copy steps or bundled fallback file reads.\n\n**DO NOT replace (these work fine in Bun):**\n\n- `os.homedir()` — do NOT replace with `Bun.env.HOME` or literal `\"~\"`\n- `os.tmpdir()` — do NOT replace with `Bun.env.TMPDIR || \"/tmp\"` or hardcoded paths\n- `fs.mkdtempSync()` — do NOT replace with manual path construction\n- `path.join()`, `path.resolve()`, etc. — these are fine\n\n**Import style:** Use the `node:` prefix for Node standard-library imports. Namespace imports are common, but named imports are acceptable where the surrounding code already uses them.\n\n**Additional Bun conventions:**\n\n- Prefer Bun Shell `$` for short, non-streaming commands; use `Bun.spawn` only when you need streaming I/O or process control.\n- Use `Bun.file()`/`Bun.write()` for simple files and `node:fs/promises` for directory-oriented operations. Existing synchronous `node:fs` calls are acceptable when the calling flow is intentionally synchronous.\n- Avoid `Bun.file().exists()` checks; use `isEnoent` handling in try/catch.\n- Prefer `Bun.sleep(ms)` over `setTimeout` wrappers.\n\n**Wrong:**\n\n```typescript\n// BROKEN: env vars may be undefined, \"~\" is not expanded\nconst home = Bun.env.HOME || \"~\";\nconst tmp = Bun.env.TMPDIR || \"/tmp\";\n```\n\n**Correct:**\n\n```typescript\nimport * as os from \"node:os\";\nimport * as fs from \"node:fs\";\nimport * as path from \"node:path\";\n\nconst configDir = path.join(os.homedir(), \".config\", \"myapp\");\nconst tempDir = fs.mkdtempSync(path.join(os.tmpdir(), \"myapp-\"));\n```\n\n## 5) Prefer Bun embeds (no copying)\n\nDo not add new runtime asset copy steps. Keep assets in repo and prefer Bun embeds/imports; preserve existing explicit generation workflows such as `packages/coding-agent/src/export/html/template.generated.ts`.\n\n- If upstream copies assets into a dist folder, replace with Bun-friendly embeds.\n- Prompts are static `.md` files; use Bun text imports (`with { type: \"text\" }`) and Handlebars instead of inline prompt strings.\n- Use `import.meta.dir` + `Bun.file` to load adjacent non-text resources.\n- Keep assets in-repo and let the bundler include them.\n- Eliminate copy scripts unless the user explicitly requests them or the package already has an intentional generation step.\n- If upstream reads a bundled fallback file at runtime, replace filesystem reads with a Bun text embed import unless the current package already uses a generated asset pipeline.\n - Example (Codex instructions fallback):\n - `const FALLBACK_PROMPT_PATH = join(import.meta.dir, \"codex-instructions.md\");` -> removed\n - `import FALLBACK_INSTRUCTIONS from \"./codex-instructions.md\" with { type: \"text\" };`\n - Use `return FALLBACK_INSTRUCTIONS;` instead of `readFileSync(FALLBACK_PROMPT_PATH, \"utf8\")`\n\n## 6) Port `package.json` carefully\n\nTreat `package.json` as a contract. Merge intentionally.\n\n- Keep existing `name`, `version`, `type`, `exports`, and `bin` unless the port requires changes.\n- Replace npm/node scripts with Bun equivalents (e.g., `bun check`, `bun test`).\n- Ensure dependencies use the correct scope.\n- Do not downgrade dependencies to fix type errors; upgrade instead.\n- Validate workspace package links and `peerDependencies`.\n\n## 7) Align code style and tooling\n\n- Keep existing formatting conventions.\n- Do not introduce `any` unless required.\n- Avoid dynamic imports unless they are required for optional dependencies, startup cost, or runtime-only modules; prefer top-level imports otherwise.\n- Never build prompts in code; prompts are static `.md` files rendered with Handlebars.\n- In `packages/coding-agent`, use `logger` from `@oh-my-pi/pi-utils` for internal/runtime logging; CLI command files may use `console.*` for intentional user-facing output.\n- Use `Promise.withResolvers()` instead of `new Promise((resolve, reject) => ...)`.\n- Prefer ES `#` private fields for new encapsulated state. Constructor parameter properties already exist in current code and are acceptable; do not churn unrelated access modifiers while porting.\n- Prefer existing helpers and utilities over new ad-hoc code.\n Preserve Bun-first infrastructure changes already made in this repo:\n - Runtime is Bun (no Node entry points for the main CLI).\n - Package manager is Bun (no npm lockfiles).\n - Heavy Node APIs should not be introduced casually; current source still uses selected Node APIs (`node:crypto`, `node:readline`, synchronous `node:fs`, and `child_process`) where they fit provider, CLI, or process-control semantics.\n - Lightweight Node APIs (`os.homedir`, `os.tmpdir`, `fs.mkdtempSync`, `path.*`) are kept.\n - CLI shebangs use `bun` (not `node`, not `tsx`).\n - TypeScript packages generally use source files directly; `@oh-my-pi/pi-natives` exports generated native bindings from `packages/natives/native`.\n - CI workflows run Bun for install/check/test.\n\n## 8) Remove old compatibility layers\n\nUnless requested, remove upstream compatibility shims.\n\n- Delete old APIs that were replaced.\n- Update all call sites to the new API directly.\n- Do not keep `*_v2` or parallel versions.\n\n## 9) Update docs and references\n\n- Replace pi-mono repo links where appropriate.\n- Update examples to use Bun and correct package scopes.\n- Ensure README instructions still match the current repo behavior.\n\n## 10) Validate the port\n\nRun the standard checks after changes:\n\n- `bun check`\n\nIf the repo already has failing checks unrelated to your changes, call that out.\nTests use Bun's runner (not Vitest), but only run `bun test` when explicitly requested.\n\n## 11) Protect improved features (regression trap list)\n\nIf you already improved behavior locally, treat those as **non‑negotiable**. Before porting, write down\nthe improvements and add explicit checks so they don’t get lost in the merge.\n\n- **Freeze the expected behavior**: add a short “before/after” note for each improvement (inputs, outputs,\n defaults, edge cases). This prevents silent rollback.\n- **Map old → new APIs**: if upstream renamed concepts (hooks → extensions, custom tools → tools, etc.),\n ensure every old entry point still wires through. One missed flag or export equals lost functionality.\n- **Verify exports**: check `package.json` `exports`, public types, and barrel files. Upstream ports often\n forget to re-export local additions.\n- **Cover non‑happy paths**: if you fixed error handling, timeouts, or fallback logic, add a test or at\n least a manual checklist that exercises those paths.\n- **Check defaults and config merge order**: improvements often live in defaults. Confirm new defaults\n didn’t revert (e.g., new config precedence, disabled features, tool lists).\n- **Audit env/shell behavior**: if you fixed execution or sandboxing, verify the new path still uses your\n sanitized env and does not reintroduce alias/function overrides.\n- **Re-run targeted samples**: keep a minimal set of \"known good\" examples and run them after the port\n (CLI flags, extension registration, tool execution).\n\n## 12) Detect and handle reworked code\n\nBefore porting a file, check if upstream significantly refactored it:\n\n```bash\n# Compare the file you're about to port against what you have locally\ngit diff HEAD upstream/main -- path/to/file.ts\n```\n\nIf the diff shows the file was **reworked** (not just patched):\n\n- New abstractions, renamed concepts, merged modules, changed data flow\n\nThen you must **read the new implementation thoroughly** before porting. Blind merging of reworked code loses functionality because:\n\nNote: interactive mode was recently split into controllers/utils/types. When backporting related changes, port updates into the individual files we created and ensure `interactive-mode.ts` wiring stays in sync.\n\n1. **Defaults change silently** - A new variable `defaultFoo = [a, b]` may replace an old `getAllFoo()` that returned `[a, b, c, d, e]`.\n\n2. **API options get dropped** - When systems merge (e.g., `hooks` + `customTools` → `extensions`), old options may not wire through to the new implementation.\n\n3. **Code paths go stale** - A renamed concept (e.g., `hookMessage` → `custom`) needs updates in every switch statement, type guard, and handler—not just the definition.\n\n4. **Context/capabilities shrink** - Old APIs may have exposed `{ logger, typebox, pi }` that new APIs forgot to include.\n\n### Semantic porting process\n\nWhen upstream reworked a module:\n\n1. **Read the old implementation** - Understand what it did, what options it accepted, what it exposed.\n\n2. **Read the new implementation** - Understand the new abstractions and how they map to old behavior.\n\n3. **Verify feature parity** - For each capability in the old code, confirm the new code preserves it or explicitly removes it.\n\n4. **Grep for stragglers** - Search for old names/concepts that may have been missed in switch statements, handlers, UI components.\n\n5. **Test the boundaries** - CLI flags, SDK options, event handlers, default values—these are where regressions hide.\n\n### Quick checks\n\n```bash\n# Find all uses of an old concept that may need updating\nrg \"oldConceptName\" --type ts\n\n# Compare default values between versions\ngit show upstream/main:path/to/file.ts | rg \"default|DEFAULT\"\n\n# Check if all enum/union values have handlers\nrg \"case \\\"\" path/to/file.ts\n```\n\n## 13) Quick audit checklist\n\nUse this as a final pass before you finish:\n\n- [ ] Import extensions follow the local package convention (no blanket `.js` stripping)\n- [ ] No newly introduced Node-only APIs unless they match an existing justified pattern\n- [ ] All package scopes updated\n- [ ] `package.json` scripts use Bun\n- [ ] Prompts are `.md` text imports (no inline prompt strings)\n- [ ] No internal/runtime `console.*` in coding-agent; CLI user-facing output is intentional\n- [ ] Assets load via Bun embed/import patterns, or through an existing intentional generation pipeline\n- [ ] Tests or checks run (or explicitly noted as blocked)\n- [ ] No functionality regressions (see sections 11-12)\n\n## 14) Commit message format\n\nWhen committing a backport, follow the repo format `<type>(scope): <past-tense description>` and keep the commit\nrange in the title.\n\n```\nfix(coding-agent): backported pi-mono changes (<from>..<to>)\n\npackages/<package>:\n- <type>: <description>\n- <type>: <description> (#<issue> by @<contributor>)\n\npackages/<other-package>:\n- <type>: <description>\n```\n\n**Example:**\n\n```\nfix(coding-agent): backported pi-mono changes (9f3eef65f..52532c7c0)\n\npackages/ai:\n- fix: handle \"sensitive\" stop reason from Anthropic API\n- fix: normalize tool call IDs with special characters for Responses API\n- fix: add overflow detection for Bedrock, MiniMax, Kimi providers\n- fix: 429 status is rate limiting, not context overflow\n\npackages/tui:\n- fix: refactored autocomplete state tracking\n- fix: file autocomplete should not trigger on empty text\n- fix: configurable autocomplete max visible items\n- fix: improved table column width calculation with word-aware wrapping\n\npackages/coding-agent:\n- fix: preserve external config.yml edits on save (#1046 by @nicobailonMD)\n- fix: resolve macOS NFD and curly quote variants in file paths\n```\n\n**Rules:**\n\n- Group changes by package\n- Use conventional commit types (`fix`, `feat`, `refactor`, `perf`, `docs`)\n- Include upstream issue/PR numbers and contributor attribution for external contributions\n- The commit range in the title helps track sync points\n\n## 15) Intentional Divergences\n\nOur fork has architectural decisions that differ from upstream. **Do not port these upstream patterns:**\n\n### UI Architecture\n\n| Upstream | Our Fork | Reason |\n| ------------------------------------------- | --------------------------------------------------------- | --------------------------------------------------------------------- |\n| `FooterDataProvider` class | `StatusLineComponent` | Simpler, integrated status line |\n| `ctx.ui.setHeader()` / `ctx.ui.setFooter()` | No-op stubs in current extension contexts | Not currently wired to replace the TUI status/header UI |\n| `ctx.ui.setEditorComponent()` | No-op stubs in current extension contexts | Custom editor replacement is not currently wired |\n| `InteractiveModeOptions` options object | Positional constructor args (options type still exported) | Keep constructor signature; update the type when upstream adds fields |\n\n### Component Naming\n\n| Upstream | Our Fork |\n| ---------------------------- | ----------------------- |\n| `extension-input.ts` | `hook-input.ts` |\n| `extension-selector.ts` | `hook-selector.ts` |\n| `ExtensionInputComponent` | `HookInputComponent` |\n| `ExtensionSelectorComponent` | `HookSelectorComponent` |\n\n### API Naming\n\n| Upstream | Our Fork | Notes |\n| ---------------------------------------- | ---------------------------------------- | ----------------------------------------- |\n| `sessionManager.appendSessionInfo(name)` | `sessionManager.setSessionName(name)` | We use `sessionName` throughout |\n| `sessionManager.getSessionName()` | `sessionManager.getSessionName()` | Same (we unified to match upstream's RPC) |\n| `agent.sessionName` / `setSessionName()` | `agent.sessionName` / `setSessionName()` | Same |\n\n### File Consolidation\n\n| Upstream | Our Fork | Reason |\n| -------------------------------------------------- | --------------------------------------------------------- | --------------------------------------------- |\n| `clipboard.ts` + `clipboard-image.ts` (tool files) | `src/utils/clipboard.ts` backed by `@oh-my-pi/pi-natives` | Native implementation with a small TS wrapper |\n\n### Test Framework\n\n| Upstream | Our Fork |\n| ------------------------- | ----------------------------- |\n| `vitest` with `vi.mock()` | `bun:test` with `vi` from bun |\n| `node:test` assertions | `expect()` matchers |\n\n### Tool Architecture\n\n| Upstream | Our Fork | Notes |\n| ----------------------------------- | ------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------- |\n| `createTool(cwd: string, options?)` | `createTools(session: ToolSession)` via `BUILTIN_TOOLS` registry | Tool factories accept `ToolSession` and can return `null` |\n| Per-tool `*Operations` interfaces | Only current per-tool override interfaces remain (for example `FindOperations`) | Used for SSH/remote overrides where present |\n| Node.js `fs/promises` everywhere | Bun file APIs for simple file writes/reads, `node:fs/promises` for dirs, selected sync `node:fs` where needed | Prefer Bun APIs when they simplify |\n\n### Auth Storage\n\n| Upstream | Our Fork | Notes |\n| ------------------------------- | ------------------------------------------- | -------------------------------------------- |\n| `proper-lockfile` + `auth.json` | `agent.db` (bun:sqlite) | Credentials stored exclusively in `agent.db` |\n| Single credential per provider | Multi-credential with round-robin selection | Session affinity and backoff logic preserved |\n\n### Extensions\n\n| Upstream | Our Fork |\n| ---------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- |\n| `jiti` for TypeScript loading | Native Bun `import()` |\n| `pkg.pi` manifest field | `pkg.omp` preferred; fallback to `pkg.pi` remains |\n| `StringEnum` from `pi-ai` | `Type.Enum` from the `pi.typebox` shim (or author the schema with `pi.zod`); `pi-ai` no longer exports `StringEnum` |\n| `formatSize` from `pi-coding-agent` | `formatBytes` from `@oh-my-pi/pi-utils` |\n| `DefaultResourceLoader` / `DefaultPackageManager` / `SettingsManager` / `createEventBus` | Capability-based discovery (`loadCapability(...)`) plus the `Settings` singleton and `EventBus` |\n\n### Skip These Upstream Features\n\nWhen porting, **skip** these files/features entirely:\n\n- `footer-data-provider.ts` — we use StatusLineComponent\n- `clipboard-image.ts` — image clipboard support is exposed through `src/utils/clipboard.ts` backed by `@oh-my-pi/pi-natives`\n- GitHub workflow files — we have our own CI\n- `models.generated.ts` — auto-generated, regenerate locally (as models.json instead)\n\n### Features We Added (Preserve These)\n\nThese exist in our fork but not upstream. **Never overwrite:**\n\n- `StatusLineComponent` in interactive mode\n- Multi-credential auth with session affinity\n- Capability-based discovery system (`defineCapability`, `registerProvider`, `loadCapability`, `skillCapability`, etc.)\n- MCP/Exa/SSH integrations\n- LSP writethrough for format-on-save\n- Bash interception (`checkBashInterception`)\n- Fuzzy path suggestions in read tool\n",
|
|
48
|
+
"porting-to-natives.md": "# Porting to pi-natives (N-API) — Field Notes\n\nThis is a practical guide for moving hot paths into `crates/pi-natives` and wiring them through the generated native package entrypoint. It exists to avoid the same failures happening twice.\n\n## When to port\n\nPort when any of these are true:\n\n- The hot path runs in render loops, tight UI updates, or large batches.\n- JS allocations dominate (string churn, regex backtracking, large arrays).\n- You already have a JS baseline and can benchmark both versions side by side.\n- The work is CPU-bound or blocking I/O that can run on the libuv thread pool.\n- The work is async I/O that can run on Tokio's runtime (for example shell execution).\n\nAvoid ports that depend on JS-only state or dynamic imports. N-API exports should be data-in/data-out. Long-running work should go through `task::blocking` (CPU-bound/blocking I/O) or `task::future` (async I/O) with cancellation where the caller needs `timeoutMs` or `AbortSignal`.\n\n## Current package shape\n\n`@oh-my-pi/pi-natives` no longer has a `packages/natives/src/<module>` TypeScript wrapper layer. The package root points at generated native artifacts:\n\n- runtime entry/export wrapper: `packages/natives/native/index.js`\n- types entry: `packages/natives/native/index.d.ts`\n- loader helpers: `packages/natives/native/loader-state.js`\n- embedded manifest: `packages/natives/native/embedded-addon.js`\n\nConsumers import directly from `@oh-my-pi/pi-natives`. The generated declarations and explicit ESM exports are produced during `bun --cwd=packages/natives run build`.\n\n## Anatomy of a native export\n\n**Rust side:**\n\n- Implementation lives in `crates/pi-natives/src/<module>.rs`.\n- If you add a new module, register it in `crates/pi-natives/src/lib.rs`.\n- Export with `#[napi]`; snake_case exports are converted to camelCase automatically. Use explicit JS names only for true aliases/non-default names. Use `#[napi(object)]` for object-shaped structs.\n- For CPU-bound or blocking work, use `task::blocking(tag, cancel_token, work)`.\n- For async work that needs Tokio, use `task::future(env, tag, work)`.\n- Pass a `CancelToken` when the API exposes `timeoutMs` or `AbortSignal`, and call `heartbeat()` inside long loops.\n\n**Package/build side:**\n\n- `packages/natives/scripts/build-native.ts` runs napi-rs, installs the `.node` artifact, copies generated `index.d.ts`, and regenerates explicit ESM class/function exports plus enum runtime exports in the checked-in `native/index.js`.\n- `packages/natives/native/index.js` is the ESM entrypoint that calls the loader, exposes named exports, and rejects install/compiled `.node` files that do not expose the package-version sentinel.\n- `packages/natives/package.json` exposes only the package root (`@oh-my-pi/pi-natives`) as the import surface. At publish time the binaries are split out: the core ships the loader only (no `.node`), and each platform's `.node` is published as an optional-dependency leaf package `@oh-my-pi/pi-natives-<tag>` (`scripts/ci-release-publish.ts` + `packages/natives/scripts/gen-npm-packages.ts`). This is transparent to importers — you still `import` from `@oh-my-pi/pi-natives`.\n\n**Consumer side:**\n\n- Update direct imports/callsites in `packages/coding-agent` or `packages/tui` when the new export replaces a JS implementation.\n- Keep higher-level policy in consumers unless it belongs in the native primitive itself.\n\n## Porting checklist\n\n1. **Add the Rust implementation**\n\n- Put the core logic in a plain Rust function.\n- If it is a new module, add it to `crates/pi-natives/src/lib.rs`.\n- Expose it with `#[napi]` so the default snake_case -> camelCase mapping stays consistent.\n- Keep signatures owned and simple: `String`, `Vec<String>`, `Uint8Array`, `Either<JsString, Uint8Array>`, or `#[napi(object)]` structs.\n- For CPU-bound or blocking work, use `task::blocking`; for async work, use `task::future`.\n- If exposing cancellation, include `timeout_ms: Option<u32>` and `signal: Option<Unknown<'env>>` in options, create `CancelToken::new(...)`, and heartbeat in long loops.\n\n2. **Build generated bindings**\n\n- Run `bun --cwd=packages/natives run build`.\n- Confirm the generated `packages/natives/native/index.d.ts` includes the new export with the intended JS name/signature.\n- Confirm `packages/natives/native/index.js` has generated explicit ESM exports for the new class/function and enum objects when enum changes are involved.\n\n3. **Update consumers**\n\n- Import the new export directly from `@oh-my-pi/pi-natives`.\n- Replace only callsites where the native implementation is faster/equivalent and preserves behavior.\n- Remove obsolete JS implementation code in the same change when the native path becomes canonical.\n\n4. **Add benchmarks**\n\n- Put benchmarks next to the owning package (`packages/tui/bench`, `packages/natives/bench`, or `packages/coding-agent/bench`).\n- Include a JS baseline and native version in the same run.\n- Use `Bun.nanoseconds()` and a fixed iteration count.\n- Keep benchmark inputs realistic for the hot path.\n\n5. **Run focused verification**\n\n- Build the native package.\n- Run the benchmark.\n- Run the narrow tests or scenario covering the changed export/callsites.\n\n## Pain points and how to avoid them\n\n### 1) Stale platform/variant artifacts\n\nThe loader probes platform-tagged artifacts in deterministic order. For x64, selected variant candidates are tried before the unsuffixed default fallback:\n\n- `modern`: `pi_natives.<tag>-modern.node`, then `...-baseline.node`, then `pi_natives.<tag>.node`.\n- `baseline`: `pi_natives.<tag>-baseline.node`, then `pi_natives.<tag>.node`.\n\nNon-x64 uses `pi_natives.<tag>.node`.\n\nCompiled binaries also probe `<getNativesDir()>/<version>/...` and a legacy user-data directory before package/executable locations. Windows `node_modules` installs stage leaf/core addons into the same versioned directory before probing. If any earlier candidate is stale, a new export may appear missing unless the version sentinel rejects it first.\n\n**Fix:** remove stale candidate/cache files and rebuild.\n\n```bash\nrm packages/natives/native/pi_natives.<platform>-<arch>.node\nrm packages/natives/native/pi_natives.<platform>-<arch>-modern.node\nrm packages/natives/native/pi_natives.<platform>-<arch>-baseline.node\nbun --cwd=packages/natives run build\n```\n\nFor compiled binaries or Windows staging, delete the versioned addon cache shown in the loader error (normally under `~/.omp/natives/<version>` unless `$XDG_DATA_HOME/omp` is used).\n\n### 2) Generated types do not match loaded binary\n\nThis can happen when `native/index.d.ts` was regenerated but the `.node` file being loaded is stale, same-version incomplete, or from a different platform/variant. Different-version install/compiled binaries should be rejected by the version sentinel during loading.\n\nVerify the loaded export set from the actual candidate path reported by the loader:\n\n```bash\nbun -e 'import { createRequire } from \"node:module\"; const require = createRequire(import.meta.url); const mod = require(process.argv[2]); console.log(Object.keys(mod).sort())' -- /path/from/loader/error/pi_natives.<tag>[-variant].node\n```\n\nFix the build/candidate mismatch. Do not paper over it with optional consumer checks if the export is required.\n\n### 3) Rust signature mismatch\n\nKeep N-API signatures simple and owned. Avoid borrowed references like `&str` in public exports. If you need structured data, use `#[napi(object)]` structs. If you need callbacks, use napi-rs `ThreadsafeFunction` and keep callback error/value behavior explicit.\n\n### 4) Enum runtime exports and ESM named exports\n\nnapi-rs declarations alone are not enough for JS callers that import named symbols or use enum objects at runtime. `scripts/gen-enums.ts` reads `native/index.d.ts`, writes explicit `export const ... = nativeBindings...` entries for public classes/functions, and emits enum objects in `native/index.js`. If you add or change a native export, verify both `native/index.d.ts` and the generated export block in `native/index.js`.\n\n### 5) Benchmarking mistakes\n\n- Do not compare different inputs or allocations.\n- Keep JS and native using identical input arrays.\n- Run both in the same benchmark file to avoid skew.\n- Include enough iterations to smooth startup noise, but keep inputs realistic.\n\n## Benchmark template\n\n```ts\nconst ITERATIONS = 2000;\n\nfunction bench(name: string, fn: () => void): number {\n const start = Bun.nanoseconds();\n for (let i = 0; i < ITERATIONS; i++) fn();\n const elapsed = (Bun.nanoseconds() - start) / 1e6;\n console.log(\n `${name}: ${elapsed.toFixed(2)}ms total (${(elapsed / ITERATIONS).toFixed(6)}ms/op)`,\n );\n return elapsed;\n}\n\nbench(\"feature/js\", () => {\n jsImpl(sample);\n});\n\nbench(\"feature/native\", () => {\n nativeImpl(sample);\n});\n```\n\n## Verification checklist\n\n- Generated `native/index.d.ts` includes the new export and intended TS signature.\n- `native/index.js` includes the generated named export; enum objects are present when the change adds/changes enums.\n- The loaded `.node` file's `Object.keys(require(candidate))` includes the new export and the package-version sentinel.\n- Bench numbers are recorded in the PR/notes.\n- Call sites are updated only if native is faster/equal and behavior-compatible.\n- Obsolete JS code is removed when the native implementation becomes canonical.\n\n## Rule of thumb\n\n- If native is slower, do not switch callsites. Keep or remove the export based on whether it has a near-term owner.\n- If native is faster and behavior-compatible, switch callsites and keep a benchmark to catch regressions.\n",
|
|
49
|
+
"provider-streaming-internals.md": "# Provider streaming internals\n\nThis document explains how token/tool streaming is normalized in `@oh-my-pi/pi-ai`, then propagated through `@oh-my-pi/pi-agent-core` and `coding-agent` session events.\n\n## End-to-end flow\n\n1. `streamSimple()` (`packages/ai/src/stream.ts`) maps generic options and dispatches to a provider stream function.\n2. Provider stream functions translate provider-native stream events into the unified `AssistantMessageEvent` sequence. Current built-ins include Anthropic, OpenAI Responses/Completions/Codex/Azure Responses, Google Gemini/Gemini CLI/Vertex, Bedrock Converse, Ollama, Cursor, pi-native gateway transport, plus GitLab Duo/Kimi/Synthetic wrappers and extension-registered custom APIs.\n3. Each provider pushes events into `AssistantMessageEventStream` (`packages/ai/src/utils/event-stream.ts`), which throttles delta events and exposes:\n - async iteration for incremental updates\n - `result()` for final `AssistantMessage`\n4. `agentLoop` (`packages/agent/src/agent-loop.ts`) consumes those events, mutates in-flight assistant state, and emits `message_update` events carrying the raw `assistantMessageEvent`.\n5. `AgentSession` (`packages/coding-agent/src/session/agent-session.ts`) subscribes to agent events, persists messages, drives extension hooks, and applies session behaviors (retry, compaction, TTSR, streaming-edit abort checks).\n\n## Unified stream contract in `@oh-my-pi/pi-ai`\n\nAll providers emit the same shape (`AssistantMessageEvent` in `packages/ai/src/types.ts`):\n\n- `start`\n- content block lifecycle triplets:\n - text: `text_start` → `text_delta`\\* → `text_end`\n - thinking: `thinking_start` → `thinking_delta`\\* → `thinking_end`\n - tool call: `toolcall_start` → `toolcall_delta`\\* → `toolcall_end`\n- terminal event:\n - `done` with `reason: \"stop\" | \"length\" | \"toolUse\"`\n - or `error` with `reason: \"aborted\" | \"error\"`\n\n`AssistantMessageEventStream` guarantees:\n\n- final result is resolved by terminal event (`done` or `error`)\n- deltas are batched/throttled (~50ms)\n- buffered deltas are flushed before non-delta events and before completion\n\n## Delta throttling and harmonization behavior\n\n`AssistantMessageEventStream` treats `text_delta`, `thinking_delta`, and `toolcall_delta` as mergeable events:\n\n- buffered deltas are merged only when **type + contentIndex** match\n- merge keeps the latest `partial` snapshot\n- non-delta events force immediate flush\n\nThis smooths high-frequency provider streams for TUI/event consumers, but is not provider backpressure: providers still produce at full speed, while the local stream buffers.\n\n## Provider normalization details\n\n## Anthropic (`anthropic-messages`)\n\nSource: `packages/ai/src/providers/anthropic.ts`\n\nNormalization points:\n\n- `message_start` initializes usage (input/output/cache tokens)\n- `content_block_start` maps to text/thinking/toolcall starts\n- `content_block_delta` maps:\n - `text_delta` → `text_delta`\n - `thinking_delta` → `thinking_delta`\n - `input_json_delta` → `toolcall_delta`\n - `signature_delta` updates `thinkingSignature` only (no event)\n- `content_block_stop` emits corresponding `*_end`\n- `message_delta.stop_reason` maps via `mapStopReason()`\n\nTool-call argument streaming:\n\n- each tool block carries internal `partialJson`\n- every JSON delta appends to `partialJson`\n- `arguments` are reparsed on each delta via `parseStreamingJson()`\n- `toolcall_end` reparses once more, then strips `partialJson`\n\n## OpenAI Responses family (`openai-responses`, `openai-codex-responses`, `azure-openai-responses`)\n\nSources: `packages/ai/src/providers/openai-responses.ts`, `openai-codex-responses.ts`, and `azure-openai-responses.ts`\n\nNormalization points:\n\n- `response.output_item.added` starts reasoning/text/function-call/custom-tool blocks\n- reasoning summary events (`response.reasoning_summary_text.delta`) and raw reasoning events (`response.reasoning_text.delta`) become `thinking_delta`\n- output/refusal deltas become `text_delta`\n- `response.function_call_arguments.delta` and `response.custom_tool_call_input.delta` become `toolcall_delta`\n- `response.output_item.done` emits `thinking_end` / `text_end` / `toolcall_end`\n- `response.completed` maps status to stop reason and usage; `response.failed` / SDK `error` events throw into the wrapper's terminal `error` path\n\nTool-call argument streaming:\n\n- same `partialJson` accumulation pattern as Anthropic for function-call JSON arguments\n- custom tools stream raw string input and expose final arguments as `{ input: <raw> }`\n- providers that send only `response.function_call_arguments.done` still populate final args\n- tool call IDs are normalized as `\"<call_id>|<item_id>\"`\n\n## Google Generative AI (`google-generative-ai`)\n\nSource: `packages/ai/src/providers/google.ts`\n\nNormalization points:\n\n- iterates `candidate.content.parts`\n- text parts are split into thinking vs text by `isThinkingPart(part)`\n- block transitions close previous block before starting a new one\n- `part.functionCall` is treated as a complete tool call (start/delta/end emitted immediately)\n- finish reason mapped by `mapStopReason()` from `google-shared.ts`\n\nTool-call argument streaming:\n\n- function call args arrive as structured object, not incremental JSON text\n- implementation emits one synthetic `toolcall_delta` containing `JSON.stringify(arguments)`\n- no partial JSON parser needed for Google in this path\n\n## Partial tool-call JSON accumulation and recovery\n\nShared behavior for Anthropic/OpenAI Responses uses `parseStreamingJson()` (`packages/ai/src/utils/json-parse.ts`):\n\n1. try `JSON.parse`\n2. fallback to `partial-json` parser for incomplete fragments\n3. if both fail, return `{}`\n\nImplications:\n\n- malformed or truncated argument deltas do not crash stream processing immediately\n- in-progress `arguments` may temporarily be `{}`\n- later valid deltas can recover structured arguments because parsing is retried on every append\n- final `toolcall_end` performs one more parse attempt before emission\n\n## Stop reasons vs transport/runtime errors\n\nProvider stop reasons are mapped to normalized `stopReason`:\n\n- Anthropic: `end_turn`→`stop`, `max_tokens`→`length`, `tool_use`→`toolUse`, safety/refusal cases→`error`\n- OpenAI Responses: `completed`→`stop`, `incomplete`→`length`, `failed/cancelled`→`error`\n- Google: `STOP`→`stop`, `MAX_TOKENS`→`length`, safety/prohibited/malformed-function-call classes→`error`\n\nError semantics are split in two stages:\n\n1. **Model completion semantics** (provider reported finish reason/status)\n2. **Transport/runtime failure** (network/client/parser/abort exceptions)\n\nIf provider stream throws or signals failure, each provider wrapper catches and emits terminal `error` event with:\n\n- `stopReason = \"aborted\"` when abort signal is set\n- otherwise `stopReason = \"error\"`\n- `errorMessage = formatErrorMessageWithRetryAfter(error)`\n\n## Malformed chunk / SSE parse failure behavior\n\nMost provider paths delegate chunk/SSE framing to vendor SDK streams (Anthropic SDK, OpenAI SDK, Google SDK). The Codex SSE fallback uses `readSseJson()` directly, and websocket Codex frames are normalized through the same event handler.\n\nObserved behavior in current implementation:\n\n- malformed SDK stream parsing surfaces as an exception or stream `error` event\n- malformed Codex SSE JSON/framing throws from the local SSE reader\n- provider wrapper converts failures into unified terminal `error` events\n- no provider-specific resume/retry inside the stream function itself, except Codex websocket-to-SSE transport fallback before replay-unsafe output is emitted\n- higher-level retries are handled in `AgentSession` auto-retry logic (message-level retry, not stream-chunk replay)\n\n## Cancellation boundaries\n\nCancellation is layered:\n\n- AI provider request: `options.signal` is passed into provider client stream call.\n- Provider wrapper: after stream loop, aborted signal forces error path (`\"Request was aborted\"`).\n- Agent loop: checks `signal.aborted` before handling each provider event and can synthesize an aborted assistant message from the latest partial.\n- Session/agent controls: `AgentSession.abort()` -> `agent.abort()` -> shared abort controller cancellation.\n\nTool execution cancellation is separate from model stream cancellation:\n\n- tool runners use `AbortSignal.any([agentSignal, steeringAbortSignal])`\n- steering interrupts can abort remaining tool execution while preserving already-produced tool results\n\n## Backpressure boundaries\n\nThere is no hard backpressure mechanism between provider SDK stream and downstream consumers:\n\n- `EventStream` uses in-memory queues with no max size\n- throttling reduces UI update rate but does not slow provider intake\n- if consumers lag significantly, queued events can grow until completion\n\nCurrent design favors responsiveness and simple ordering over bounded-buffer flow control.\n\n## How stream events surface as agent/session events\n\n`agentLoop.streamAssistantResponse()` bridges `AssistantMessageEvent` to `AgentEvent`:\n\n- on `start`: pushes placeholder assistant message and emits `message_start`\n- on block events (`text_*`, `thinking_*`, `toolcall_*`): updates last assistant message, emits `message_update` with raw `assistantMessageEvent`\n- on terminal (`done`/`error`): resolves final message from `response.result()`, emits `message_end`\n\n`AgentSession` then consumes those events for session-level behaviors:\n\n- TTSR watches `message_update.assistantMessageEvent` for `text_delta`, `thinking_delta`, and `toolcall_delta`\n- streaming edit guard inspects `toolcall_delta`/`toolcall_end` on `edit` calls and can abort early\n- persistence writes finalized messages at `message_end`\n- auto-retry examines assistant `stopReason === \"error\"` plus `errorMessage` heuristics\n\n## Unified vs provider-specific responsibilities\n\nUnified (common contract):\n\n- event shape (`AssistantMessageEvent`)\n- final result extraction (`done`/`error`)\n- delta throttling + merge rules\n- agent/session event propagation model\n\nProvider-specific (not fully abstracted):\n\n- upstream event taxonomies and mapping logic\n- stop-reason translation tables\n- tool-call ID conventions\n- reasoning/thinking block semantics and signatures\n- usage token semantics and availability timing\n- message conversion constraints per API\n\n## Implementation files\n\n- [`../../ai/src/stream.ts`](../packages/ai/src/stream.ts) — provider dispatch, option mapping, API key/session plumbing, custom API dispatch, and provider-specific credential handling.\n- [`../../ai/src/utils/event-stream.ts`](../packages/ai/src/utils/event-stream.ts) — generic stream queue + assistant delta throttling.\n- [`../../ai/src/utils/json-parse.ts`](../packages/ai/src/utils/json-parse.ts) — partial JSON parsing for streamed tool arguments.\n- [`../../ai/src/providers/anthropic.ts`](../packages/ai/src/providers/anthropic.ts) — Anthropic event translation and tool JSON delta accumulation.\n- [`../../ai/src/providers/openai-responses.ts`](../packages/ai/src/providers/openai-responses.ts), [`openai-responses-shared.ts`](../packages/ai/src/providers/openai-responses-shared.ts), [`openai-codex-responses.ts`](../packages/ai/src/providers/openai-codex-responses.ts), [`azure-openai-responses.ts`](../packages/ai/src/providers/azure-openai-responses.ts) — Responses-family event translation and status mapping.\n- [`../../ai/src/providers/google.ts`](../packages/ai/src/providers/google.ts), [`google-gemini-cli.ts`](../packages/ai/src/providers/google-gemini-cli.ts), [`google-vertex.ts`](../packages/ai/src/providers/google-vertex.ts) — Gemini stream chunk-to-block translation variants.\n- [`../../ai/src/providers/google-shared.ts`](../packages/ai/src/providers/google-shared.ts) — Gemini finish-reason mapping and shared conversion rules.\n- [`../../ai/src/providers/amazon-bedrock.ts`](../packages/ai/src/providers/amazon-bedrock.ts), [`openai-completions.ts`](../packages/ai/src/providers/openai-completions.ts), [`ollama.ts`](../packages/ai/src/providers/ollama.ts), [`cursor.ts`](../packages/ai/src/providers/cursor.ts), [`pi-native-client.ts`](../packages/ai/src/providers/pi-native-client.ts) — additional built-in stream adapters using the same event contract.\n- [`../../agent/src/agent-loop.ts`](../packages/agent/src/agent-loop.ts) — provider stream consumption and `message_update` bridging.\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — session-level handling of streaming updates, abort, retry, and persistence.\n",
|
|
50
|
+
"python-repl.md": "# Eval Tool Python Backend\n\nThis document describes the Python execution stack in `packages/coding-agent`.\nIt covers tool behavior, runner lifecycle, environment handling, execution semantics, output rendering, supported magics, and operational failure modes.\n\n## Scope and Key Files\n\n- Tool surface: `src/tools/eval.ts`\n- Session/per-call kernel orchestration: `src/eval/py/executor.ts`\n- Subprocess kernel client: `src/eval/py/kernel.ts`\n- Python wrapper / NDJSON server: `src/eval/py/runner.py`\n- Prelude helpers loaded into every kernel: `src/eval/py/prelude.py`\n- Host-side subagent helper bridge: `src/eval/agent-bridge.ts`\n- MIME bundle renderer (text + structured outputs): `src/eval/py/display.ts`\n- Interactive-mode renderer for user-triggered Python runs: `src/modes/components/eval-execution.ts`\n- Runtime/env filtering and Python resolution: `src/eval/py/runtime.ts`\n\n## What eval's Python backend is\n\nThe `eval` tool executes one or more Python cells inside a retained `python` subprocess that speaks NDJSON over stdin/stdout. No Jupyter gateway and no extra pip dependencies are required — a vanilla Python 3.8+ interpreter is enough. Rich `display()` output (PIL, pandas, plotly, matplotlib figures) keeps working because the wrapper implements MIME-bundle dispatch.\n\nTool params:\n\n```ts\n{\n cells: Array<{\n language: \"py\" | \"js\";\n code: string;\n title?: string;\n timeout?: number; // seconds, clamped to 1..3600, default 30. Inactivity budget — see \"Cell timeout\".\n reset?: boolean; // reset this cell's selected runtime before execution\n }>;\n}\n```\n\nThe tool is `concurrency = \"exclusive\"` for a session, so calls do not overlap.\n\n## Kernel lifecycle\n\nEach Python kernel is a single subprocess: `<resolved-python> -u <runner.py>`. The runner is bundled with the host binary (Bun text import), written to an `omp-python-runner` cache under the OS temp directory once per script hash, and reused by subsequent spawns.\n\nKernel startup sequence:\n\n1. Availability check (`checkPythonKernelAvailability`) — verifies that a Python interpreter resolves and runs.\n2. Spawn `python -u runner.py` with filtered env and `cwd`.\n3. Send an init request that runs `os.chdir(cwd)`, injects env entries, and adds `cwd` to `sys.path`.\n4. Execute `PYTHON_PRELUDE` (idempotent — only initializes once per process).\n\nKernel shutdown:\n\n- Send `{\"type\": \"exit\"}` over stdin.\n- Wait for process exit with `SHUTDOWN_GRACE_MS` budget.\n- Escalate to `SIGTERM` and finally `SIGKILL` if the process does not exit in time.\n\n## Wire protocol (NDJSON, host ↔ runner)\n\nOne JSON object per line, UTF-8, `\\n` terminated.\n\nHost → runner:\n\n```jsonc\n{\"id\": \"<reqId>\", \"code\": \"<source>\", \"silent\": false, \"storeHistory\": true}\n{\"type\": \"exit\"}\n```\n\nRunner → host:\n\n```jsonc\n{\"type\": \"started\", \"id\": \"<reqId>\"}\n{\"type\": \"stdout\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"stderr\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"display\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"result\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"error\", \"id\": \"<reqId>\", \"ename\": \"...\", \"evalue\": \"...\", \"traceback\": [\"...\"]}\n{\"type\": \"done\", \"id\": \"<reqId>\", \"status\": \"ok\"|\"error\", \"executionCount\": N, \"cancelled\": false}\n```\n\nStatus events the prelude emits (e.g. `_emit_status(\"find\", count=…)`) ship inside display bundles under `application/x-omp-status` so the existing TUI status renderer keeps working.\n\n## Magics\n\nThe runner's source transformer rewrites IPython-style magics to plain Python calls before parsing. Supported set:\n\n| Magic | Effect |\n| --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `%pip <args>` | `python -m pip <args>` with live streaming output. Newly installed packages are evicted from `sys.modules` so the next `import` picks up the fresh install. |\n| `%cd <path>` | `os.chdir(path)` (with `~` expansion); emits status event. |\n| `%pwd` | Returns `os.getcwd()`. |\n| `%ls [path]` | Returns `sorted(os.listdir(path))`. |\n| `%env [KEY[=VAL]]` | List, read, or set env vars (matches prelude `env()` semantics). |\n| `%set_env KEY VALUE` | Set `os.environ[KEY]`. |\n| `%time <expr>` / `%timeit <expr>` | Time the expression; emits status event with elapsed ms. |\n| `%who` / `%whos` | List user-namespace names. |\n| `%reset` | Clear user globals and re-inject prelude. |\n| `%load <path>` | Read a file into a fresh cell and execute. |\n| `%run <path>` | `runpy.run_path` and merge globals back. |\n| `%%bash` / `%%sh` | Run the cell body via `bash`/`sh`. |\n| `%%capture [name]` | Run body with stdout/stderr captured into `name`. |\n| `%%timeit` | Time the cell body. |\n| `%%writefile <path>` | Write body to file. |\n| `!cmd` / `var = !cmd` | Run command via subprocess shell; returns an SList-style result with `.n` / `.s` helpers. |\n| `var = %name args` | Assignment forms work for line magics and `!cmd`. |\n\nUnknown magic names raise `NameError: UsageError: ...` inside the cell.\n\n## Session persistence semantics\n\n`python.kernelMode` controls retained kernel reuse:\n\n- `session` (default)\n - Reuses kernel sessions keyed by namespaced eval session id plus cwd.\n - Multiple owners can share the same retained kernel for that key.\n - Calls through the tool are exclusive, so tool invocations do not overlap.\n - A dead retained subprocess is replaced before execution.\n - If the subprocess dies during execution, it is replaced and the cell is retried once.\n- `per-call`\n - Spawns a fresh subprocess for each request.\n - Shuts the subprocess down after the request.\n - No cross-call state persistence.\n\n### Multi-cell behavior in a single tool call\n\nPython cells run sequentially in the same selected Python kernel instance for that tool call.\n\nIf an intermediate cell fails:\n\n- Earlier cell state remains in memory.\n- Tool returns a targeted error indicating which cell failed.\n- Later cells are not executed.\n\n`reset=true` is per cell and resets that language runtime before the cell executes.\n\n## Environment filtering and runtime resolution\n\nEnvironment is filtered before launching the runner:\n\n- Allowlist includes core vars like `PATH`, `HOME`, locale vars, `VIRTUAL_ENV`, `PYTHONPATH`, etc.\n- Allow-prefixes: `LC_`, `XDG_`, `PI_`\n- Denylist strips common API keys (OpenAI/Anthropic/Gemini/etc.)\n\nRuntime selection order:\n\n1. Active/located venv (`VIRTUAL_ENV`, then `<cwd>/.venv`, `<cwd>/venv`)\n2. Managed venv at `~/.omp/python-env`\n3. `python` or `python3` on PATH\n\nWhen a venv is selected, its bin/Scripts path is prepended to `PATH`.\n\nThe runner additionally receives `PYTHONUNBUFFERED=1` and `PYTHONIOENCODING=utf-8` so streamed output reaches the host promptly.\n\n## Tool availability and mode selection\n\n`eval.py` / `eval.js` (both default `true`) plus optional boolean env flags `PI_PY` / `PI_JS` control eval backend exposure:\n\n- Python backend only (`eval.py=true`, `eval.js=false`, or `PI_PY=1 PI_JS=0`)\n- JavaScript backend only (`eval.py=false`, `eval.js=true`, or `PI_PY=0 PI_JS=1`)\n- both backends (`eval.py=true`, `eval.js=true`, or `PI_PY=1 PI_JS=1`)\n\n`PI_PY` and `PI_JS` use normal boolean flag parsing. If either env var is set, the env pair overrides the per-key settings; an unset member of the pair defaults to enabled.\n\nIf Python preflight fails and `eval.js` is enabled, `eval` remains available for `js` cells; `py` cells fail with a Python-backend availability error.\n\nPython prelude helpers include `agent(prompt, *, agent_type=\"task\", model=None, context=None, label=None, schema=None)`. It synchronously calls the host bridge, runs one subagent through the task executor, and returns the final text. When `schema` is supplied, the helper parses the subagent's JSON output and returns the object.\n\n## Execution flow and cancellation/timeout\n\n### Cell timeout\n\nEach eval cell `timeout` is in seconds, defaults to 30, and is clamped to `1..600`. It is a **wall-clock budget on the cell's own work** that the watchdog (`IdleTimeout`, `src/eval/idle-timeout.ts`) enforces, **but it is paused while a host-side `agent()`/`parallel()`/`completion()` bridge call is in flight**: those calls pump a heartbeat (`withBridgeHeartbeat`, `src/eval/heartbeat.ts`) that re-arms the watchdog, so a long fanout or a slow completion runs to completion instead of being killed mid-stream.\n\nThe heartbeat is the **sole** signal that extends the budget. Everything else the cell does — compute, `stdout`/`stderr`, `log()`/`phase()`, and ordinary (non-agent) tool calls — counts against `timeout`, so a cell that is not delegating to an agent/completion is bounded by a plain wall-clock timeout. The tool combines the caller abort signal, the session abort signal, and the watchdog's signal with `AbortSignal.any(...)`; no wall-clock deadline is passed to the backend, so neither runtime arms a competing fixed timer.\n\n### Kernel execution cancellation\n\nOn abort/timeout:\n\n- The host sends `kill(\"SIGINT\")` to the runner subprocess.\n- The runner's exec-time signal handler raises `KeyboardInterrupt` inside the user code.\n- Result includes `cancelled=true`; the timeout path annotates output as `Command timed out after <n> seconds`.\n- Between requests the runner installs `SIG_IGN` for SIGINT so a stray cancel does not tear down the kernel.\n\nIf a second cancel is required (runner stuck in C code), the host escalates to `SIGTERM` and the session restarts on the next call.\n\n### stdin behavior\n\nInteractive stdin is not supported. The runner does not forward `input()` prompts; user code that calls `input()` blocks until cancellation.\n\n## Output capture and rendering\n\n### Captured output classes\n\nFrom runner frames:\n\n- `stdout` / `stderr` → plain text chunks\n- `display` / `result` → rich display handling (MIME bundle)\n- `error` → traceback text\n- `application/x-omp-status` MIME inside `display` → structured status events\n\nDisplay MIME precedence:\n\n1. `text/markdown`\n2. `text/plain`\n3. `text/html` (converted to basic markdown)\n\nAdditionally captured as structured outputs:\n\n- `application/json` → JSON tree data\n- `image/png` / `image/jpeg` → image payloads\n- `application/x-omp-status` → status events\n\n### Matplotlib\n\nThe runner sets `MPLBACKEND=Agg` as an environ default so figures render off-screen. After every cell, `pyplot.get_fignums()` is iterated; each figure is saved to PNG, emitted as an `image/png` display, and closed.\n\n### Storage and truncation\n\nOutput is streamed through `OutputSink` and may be persisted to artifact storage. Tool results can include truncation metadata and `artifact://<id>` for full output recovery.\n\n### Renderer behavior\n\n- Tool renderer (`eval.ts`):\n - shows code-cell blocks with per-cell status\n - collapsed preview defaults to 10 lines\n - supports expanded mode for all output retained in the tool result\n- Interactive renderer (`eval-execution.ts`):\n - used for user-triggered Python execution in TUI\n - collapsed preview defaults to 20 lines\n - clamps very long individual lines to 4000 chars for display safety\n - shows cancellation/error/truncation notices\n\n## Operational troubleshooting\n\n- **Python backend not available** — Check `eval.py`, `PI_PY`, and that `python`/`python3` is on PATH. If preflight fails and `eval.js` is enabled, use a `js` cell.\n- **No Python on PATH** — Install a system Python 3.8+ or place a venv at `~/.omp/python-env`. `omp setup python --check` reports the resolved interpreter.\n- **Execution hangs then times out** — Increase tool `timeout` (max 3600s) if workload is legitimate. For stuck native code, cancellation triggers `SIGINT` first then escalates; the session restarts on the next request.\n- **stdin/input prompts in Python code** — `input()` is not supported; pass data programmatically.\n- **Working directory errors** — Tool validates `cwd` exists and is a directory before execution.\n\n## Relevant environment variables\n\n- `PI_PY` / `PI_JS` — eval backend exposure overrides\n- `PI_PYTHON_SKIP_CHECK=1` — bypass Python preflight/warm checks\n- `PI_PYTHON_INTEGRATION=1` — enable gated integration tests that spawn a real Python\n- `PI_PYTHON_IPC_TRACE=1` — log NDJSON frames exchanged with the runner subprocess\n",
|
|
51
|
+
"render-mermaid.md": "# RenderMermaid\n\n`RenderMermaid` is an optional built-in tool that renders Mermaid source to terminal-friendly text.\n\n## Enable it\n\nDisabled by default. Turn it on in `/settings` under **Tools → Render Mermaid**, or in `~/.omp/agent/config.yml`:\n\n```yaml\nrenderMermaid:\n enabled: true\n```\n\n## What it does\n\n- Tool name: `render_mermaid`\n- Input: Mermaid source in the required `mermaid` field\n- Output: rendered ASCII/Unicode text, not SVG or PNG\n- Storage: when artifact storage is available, the full render is also saved as an `artifact://...`\n\nThere are no model-specific or environment-variable prerequisites. Once enabled, any model that can call built-in tools can use it.\n\n## Parameters\n\n```json\n{\n \"mermaid\": \"graph TD\\n A[Start] --> B[Stop]\",\n \"config\": {\n \"useAscii\": false,\n \"paddingX\": 2,\n \"paddingY\": 2,\n \"boxBorderPadding\": 0\n }\n}\n```\n\nAvailable `config` fields:\n\n- `useAscii` — `true` for plain ASCII, `false` for Unicode box-drawing characters (default and usually more readable)\n- `paddingX` — horizontal spacing between nodes\n- `paddingY` — vertical spacing between nodes\n- `boxBorderPadding` — inner padding inside node boxes\n\n## Current limitations\n\n`RenderMermaid` uses the `beautiful-mermaid` ASCII renderer. It works best for flowcharts and small diagrams.\n\nComplex sequence diagrams, especially with `alt` / `else` blocks, can become very wide in a terminal. That is current renderer behavior, not a provider or model configuration problem.\n\nIf a sequence diagram is hard to read:\n\n1. Keep Unicode output (`useAscii: false`)\n2. Reduce spacing with a tighter config such as `paddingX: 2`, `paddingY: 2`, `boxBorderPadding: 0`\n3. Prefer smaller sub-diagrams over one large sequence diagram\n4. Open the saved artifact if the inline preview is truncated in the TUI\n\n## Example\n\nInput:\n\n```mermaid\ngraph TD\n A[Start] --> B{Decision}\n B -->|Yes| C[Action]\n B -->|No| D[End]\n```\n\nTypical result:\n\n```text\n┌─────┐\n│Start│\n└─────┘\n │\n ▼\n┌────────┐\n│Decision│\n└────────┘\n```\n",
|
|
52
|
+
"resolve-tool-runtime.md": "# Resolve tool runtime internals\n\nThis document explains how preview/apply workflows are modeled in coding-agent and how built-in or custom tools can participate via the tool-choice queue and `pushPendingAction`.\n\n## Scope and key files\n\n- [`src/tools/resolve.ts`](../packages/coding-agent/src/tools/resolve.ts)\n- [`src/tools/ast-edit.ts`](../packages/coding-agent/src/tools/ast-edit.ts)\n- [`src/extensibility/custom-tools/types.ts`](../packages/coding-agent/src/extensibility/custom-tools/types.ts)\n- [`src/extensibility/custom-tools/loader.ts`](../packages/coding-agent/src/extensibility/custom-tools/loader.ts)\n- [`src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n\n## What `resolve` does\n\n`resolve` is a hidden tool that finalizes a pending preview action.\n\n- `action: \"apply\"` executes the queued action's `apply(reason, extra)` callback and returns that result with resolve metadata.\n- `action: \"discard\"` invokes `reject(reason, extra)` if provided; otherwise returns `Discarded: <label>. Reason: <reason>`.\n- `extra` is optional free-form metadata. Queue handlers receive it; producers decide whether it has meaning.\n\nIf no pending action exists, `resolve` fails with:\n\n- `No pending action to resolve. Nothing to apply or discard.`\n\n## Pending actions use the tool-choice queue\n\nPreview producers call `queueResolveHandler(...)`, which pushes a one-shot forced `resolve` directive onto the session tool-choice queue and adds a `resolve-reminder` steering message.\n\nRuntime behavior:\n\n- the queued handler owns the pending `apply`/`reject` callbacks,\n- `resolve` looks up the current queue invoker with `session.peekQueueInvoker()`,\n- if the model rejects the forced tool choice, the queue directive is requeued,\n- `resolve` does not maintain a separate pending-action stack.\n\n`resolve` also checks a standing resolve handler after the queue invoker; this is used by long-lived approval flows that are not ordinary preview tool calls.\n\nMultiple pending previews therefore follow the active tool-choice queue ordering, not an independent pending-action store. If an apply callback throws, the queued helper re-pushes the same resolve directive and reminder so the preview can still be discarded or retried.\n\n## Built-in producer example (`ast_edit`)\n\n`ast_edit` previews structural replacements first. When the preview has replacements and is not applied yet, it queues a resolve handler that contains:\n\n- label (human-readable summary)\n- `sourceToolName` (`ast_edit`)\n- `apply(reason: string, extra?: Record<string, unknown>)` callback that reruns AST edit with `dryRun: false`\n\n`resolve(action=\"apply\", reason=\"...\")` passes `reason` into this callback. `ast_edit` currently ignores `extra`.\n\n## Custom tools: `pushPendingAction`\n\nCustom tools can register resolve-compatible pending actions through `CustomToolAPI.pushPendingAction(...)`. The custom tool loader forwards these actions to `queueResolveHandler(...)` when that hook is available.\n\n`CustomToolPendingAction`:\n\n- `label: string` (required)\n- `apply(reason: string): Promise<AgentToolResult<unknown>>` (required) — invoked on apply; `reason` is the string passed to `resolve`\n- `reject?(reason: string): Promise<AgentToolResult<unknown> | undefined>` (optional) — invoked on discard; return value replaces the default \"Discarded\" message if provided\n- `details?: unknown` exists on the public custom-tool type but is not currently forwarded by the loader into resolve metadata\n- `sourceToolName?: string` (optional, defaults to `\"custom_tool\"`)\n\n### Minimal usage example\n\n```ts\nimport type { CustomToolFactory } from \"@oh-my-pi/pi-coding-agent\";\n\nconst factory: CustomToolFactory = (pi) => ({\n name: \"batch_rename_preview\",\n label: \"Batch Rename Preview\",\n description: \"Previews renames and defers commit to resolve\",\n parameters: pi.zod.object({\n files: pi.zod.array(pi.zod.string()),\n }),\n\n async execute(_toolCallId, params) {\n const previewSummary = `Prepared rename plan for ${params.files.length} files`;\n\n pi.pushPendingAction({\n label: `Batch rename: ${params.files.length} files`,\n sourceToolName: \"batch_rename_preview\",\n apply: async (reason) => {\n // apply writes here\n return {\n content: [\n { type: \"text\", text: `Applied batch rename. Reason: ${reason}` },\n ],\n };\n },\n reject: async (reason) => {\n // optional: cleanup or notify on discard\n return {\n content: [\n { type: \"text\", text: `Discarded batch rename. Reason: ${reason}` },\n ],\n };\n },\n });\n\n return {\n content: [\n {\n type: \"text\",\n text: `${previewSummary}. Call resolve to apply or discard.`,\n },\n ],\n };\n },\n});\n\nexport default factory;\n```\n\n## Runtime availability and failures\n\n`pushPendingAction` is wired by the custom tool loader through the active session's resolve queue hook.\n\nIf the runtime did not provide the resolve queue hook, `pushPendingAction` throws:\n\n- `Pending action store unavailable for custom tools in this runtime.`\n\n## Tool-choice behavior\n\nWhen `queueResolveHandler(...)` registers a preview, the agent runtime forces a one-shot `resolve` tool choice so pending previews are explicitly finalized before normal tool flow continues.\n\n## Developer guidance\n\n- Use pending actions only for destructive or high-impact operations that should support explicit apply/discard.\n- Keep `label` concise and specific; it is shown in resolve renderer output.\n- Ensure `apply(reason)` is deterministic and idempotent enough for one-shot execution; `reason` is informational and should not change behavior.\n- Implement `reject(reason)` when the discard needs cleanup (temp state, locks, notifications); omit it for stateless previews where the default message suffices.\n- If your tool can stage multiple previews, remember they are mediated by the tool-choice queue rather than a separate pending-action stack.\n",
|
|
53
|
+
"rpc.md": "# RPC Protocol Reference\n\nRPC mode runs the coding agent as a newline-delimited JSON protocol over stdio.\n\n- **stdin**: commands (`RpcCommand`), extension UI responses, and host-tool updates/results\n- **stdout**: a ready frame, command responses (`RpcResponse`), session/agent events, extension UI requests, host-tool requests/cancellations\n\nPrimary implementation:\n\n- `src/modes/rpc/rpc-mode.ts`\n- `src/modes/rpc/rpc-types.ts`\n- `src/session/agent-session.ts`\n- `packages/agent/src/agent.ts`\n- `packages/agent/src/agent-loop.ts`\n\n## Startup\n\n```bash\nomp --mode rpc [regular CLI options]\n```\n\nBehavior notes:\n\n- `@file` CLI arguments are rejected in RPC mode.\n- RPC mode disables automatic session title generation by default to avoid an extra model call.\n- RPC mode resets workflow-altering `todo.*`, `task.*`, `async.*`, and `bash.autoBackground.*` settings to their built-in defaults instead of inheriting user overrides.\n- The process reads stdin as JSONL (`readJsonl(Bun.stdin.stream())`).\n- At startup it writes `{ \"type\": \"ready\" }` before processing commands.\n- When stdin closes, pending host-tool calls are rejected and the process exits with code `0`.\n- Responses/events are written as one JSON object per line.\n\n## Transport and Framing\n\nEach frame is a single JSON object followed by `\\n`.\n\nThere is no envelope beyond the object shape itself.\n\n### Outbound frame categories (stdout)\n\n1. Ready frame (`{ type: \"ready\" }`)\n2. `RpcResponse` (`{ type: \"response\", ... }`)\n3. `AgentSessionEvent` objects (`agent_start`, `message_update`, etc.)\n4. `RpcExtensionUIRequest` (`{ type: \"extension_ui_request\", ... }`)\n5. Host tool requests/cancellations (`host_tool_call`, `host_tool_cancel`)\n6. Host URI requests/cancellations (`host_uri_request`, `host_uri_cancel`)\n7. Extension errors (`{ type: \"extension_error\", extensionPath, event, error }`)\n\n### Inbound frame categories (stdin)\n\n1. `RpcCommand`\n2. `RpcExtensionUIResponse` (`{ type: \"extension_ui_response\", ... }`)\n3. Host tool updates/results (`host_tool_update`, `host_tool_result`)\n4. Host URI results (`host_uri_result`)\n\n## Request/Response Correlation\n\nAll commands accept optional `id?: string`.\n\n- If provided, normal command responses echo the same `id`.\n- `RpcClient` relies on this for pending-request resolution.\n\nImportant edge behavior from runtime:\n\n- Unknown command responses are emitted with `id: undefined` (even if the request had an `id`).\n- Parse/handler exceptions in the input loop emit `command: \"parse\"` with `id: undefined`.\n- `prompt` and `abort_and_prompt` return immediate success, then may emit a later error response with the **same** id if async prompt scheduling fails.\n\n## Command Schema (canonical)\n\n`RpcCommand` is defined in `src/modes/rpc/rpc-types.ts`:\n\n### Prompting\n\n- `{ id?, type: \"prompt\", message: string, images?: ImageContent[], streamingBehavior?: \"steer\" | \"followUp\" }`\n- `{ id?, type: \"steer\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"follow_up\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"abort\" }`\n- `{ id?, type: \"abort_and_prompt\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"new_session\", parentSession?: string }`\n\n### State\n\n- `{ id?, type: \"get_state\" }`\n- `{ id?, type: \"set_todos\", phases: TodoPhase[] }`\n- `{ id?, type: \"set_host_tools\", tools: RpcHostToolDefinition[] }`\n- `{ id?, type: \"set_host_uri_schemes\", schemes: RpcHostUriSchemeDefinition[] }`\n\n### Model\n\n- `{ id?, type: \"set_model\", provider: string, modelId: string }`\n- `{ id?, type: \"cycle_model\" }`\n- `{ id?, type: \"get_available_models\" }`\n\n### Thinking\n\n- `{ id?, type: \"set_thinking_level\", level: ThinkingLevel }`\n- `{ id?, type: \"cycle_thinking_level\" }`\n\n### Queue modes\n\n- `{ id?, type: \"set_steering_mode\", mode: \"all\" | \"one-at-a-time\" }`\n- `{ id?, type: \"set_follow_up_mode\", mode: \"all\" | \"one-at-a-time\" }`\n- `{ id?, type: \"set_interrupt_mode\", mode: \"immediate\" | \"wait\" }`\n\n### Compaction\n\n- `{ id?, type: \"compact\", customInstructions?: string }`\n- `{ id?, type: \"set_auto_compaction\", enabled: boolean }`\n\n### Retry\n\n- `{ id?, type: \"set_auto_retry\", enabled: boolean }`\n- `{ id?, type: \"abort_retry\" }`\n\n### Bash\n\n- `{ id?, type: \"bash\", command: string }`\n- `{ id?, type: \"abort_bash\" }`\n\n### Session\n\n- `{ id?, type: \"get_session_stats\" }`\n- `{ id?, type: \"export_html\", outputPath?: string }`\n- `{ id?, type: \"switch_session\", sessionPath: string }`\n- `{ id?, type: \"branch\", entryId: string }`\n- `{ id?, type: \"get_branch_messages\" }`\n- `{ id?, type: \"get_last_assistant_text\" }`\n- `{ id?, type: \"set_session_name\", name: string }`\n- `{ id?, type: \"handoff\", customInstructions?: string }`\n\n### Messages\n\n- `{ id?, type: \"get_messages\" }`\n\n### Login\n\n- `{ id?, type: \"get_login_providers\" }`\n- `{ id?, type: \"login\", providerId: string }`\n\n## Response Schema\n\nAll command results use `RpcResponse`:\n\n- Success: `{ id?, type: \"response\", command: <command>, success: true, data?: ... }`\n- Failure: `{ id?, type: \"response\", command: string, success: false, error: string }`\n\nData payloads are command-specific and defined in `rpc-types.ts`.\n\n### `get_state` payload\n\n```json\n{\n \"model\": { \"provider\": \"...\", \"id\": \"...\" },\n \"thinkingLevel\": \"off|minimal|low|medium|high|xhigh\",\n \"isStreaming\": false,\n \"isCompacting\": false,\n \"steeringMode\": \"all|one-at-a-time\",\n \"followUpMode\": \"all|one-at-a-time\",\n \"interruptMode\": \"immediate|wait\",\n \"sessionFile\": \"...\",\n \"sessionId\": \"...\",\n \"sessionName\": \"...\",\n \"autoCompactionEnabled\": true,\n \"messageCount\": 0,\n \"queuedMessageCount\": 0,\n \"todoPhases\": [\n {\n \"id\": \"phase-1\",\n \"name\": \"Todos\",\n \"tasks\": [\n {\n \"id\": \"task-1\",\n \"content\": \"Map the tool surface\",\n \"status\": \"in_progress\"\n }\n ]\n }\n ],\n \"systemPrompt\": [\"...\"],\n \"dumpTools\": [\n {\n \"name\": \"read\",\n \"description\": \"Read files and URLs\",\n \"parameters\": {}\n }\n ],\n \"contextUsage\": {\n \"tokens\": 1100,\n \"contextWindow\": 200000,\n \"percent\": 0.55\n }\n}\n```\n\n### `set_todos` payload\n\nReplaces the in-memory todo state for the current session and returns the normalized phase list:\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"set_todos\",\n \"phases\": [\n {\n \"id\": \"phase-1\",\n \"name\": \"Evaluation\",\n \"tasks\": [\n {\n \"id\": \"task-1\",\n \"content\": \"Map the read tool surface\",\n \"status\": \"in_progress\"\n },\n {\n \"id\": \"task-2\",\n \"content\": \"Exercise edit operations\",\n \"status\": \"pending\"\n }\n ]\n }\n ]\n}\n```\n\nThis is useful for hosts that want to pre-seed a plan before the first prompt.\n\n### `set_host_tools` payload\n\nReplaces the current set of host-owned tools that the RPC server may call back\ninto over stdio:\n\n```json\n{\n \"id\": \"req_3\",\n \"type\": \"set_host_tools\",\n \"tools\": [\n {\n \"name\": \"echo_host\",\n \"label\": \"Echo Host\",\n \"description\": \"Echo a value from the embedding host\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"message\": { \"type\": \"string\" }\n },\n \"required\": [\"message\"],\n \"additionalProperties\": false\n }\n }\n ]\n}\n```\n\nThe response payload is:\n\n```json\n{\n \"toolNames\": [\"echo_host\"]\n}\n```\n\nThese tools are added to the active session tool registry before the next model\ncall. Re-sending `set_host_tools` replaces the previous host-owned set.\n\n### `set_host_uri_schemes` payload\n\nReplaces the current set of host-owned URL schemes the RPC server should\ndispatch reads/writes through:\n\n```json\n{\n \"id\": \"req_4\",\n \"type\": \"set_host_uri_schemes\",\n \"schemes\": [\n {\n \"scheme\": \"db\",\n \"description\": \"Virtual db row files\",\n \"writable\": true,\n \"immutable\": false\n }\n ]\n}\n```\n\nThe response payload is:\n\n```json\n{\n \"schemes\": [\"db\"]\n}\n```\n\nSchemes are case-insensitive on the wire and normalized to lowercase before\nthe response is sent. Re-sending `set_host_uri_schemes` replaces the entire\nprevious set — schemes missing from the new list are unregistered.\n\n## Event Stream Schema\n\nRPC mode forwards `AgentSessionEvent` objects from `AgentSession.subscribe(...)`.\n\nCommon event types:\n\n- `agent_start`, `agent_end`\n- `turn_start`, `turn_end`\n- `message_start`, `message_update`, `message_end`\n- `tool_execution_start`, `tool_execution_update`, `tool_execution_end`\n- `auto_compaction_start`, `auto_compaction_end`\n- `auto_retry_start`, `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n- `todo_auto_clear`\n\nExtension runner errors are emitted separately as:\n\n```json\n{\n \"type\": \"extension_error\",\n \"extensionPath\": \"...\",\n \"event\": \"...\",\n \"error\": \"...\"\n}\n```\n\n`message_update` includes streaming deltas in `assistantMessageEvent` (text/thinking/toolcall deltas).\n\n## Prompt/Queue Concurrency and Ordering\n\nThis is the most important operational behavior.\n\n### Immediate ack vs completion\n\n`prompt` and `abort_and_prompt` are **acknowledged immediately**:\n\n```json\n{ \"id\": \"req_1\", \"type\": \"response\", \"command\": \"prompt\", \"success\": true }\n```\n\nThat means:\n\n- command acceptance != run completion\n- final completion is observed via `agent_end`\n\n### While streaming\n\n`AgentSession.prompt()` requires `streamingBehavior` during active streaming:\n\n- `\"steer\"` => queued steering message (interrupt path)\n- `\"followUp\"` => queued follow-up message (post-turn path)\n\nIf omitted during streaming, prompt fails.\n\n### Queue defaults\n\nFrom `packages/agent/src/agent.ts` defaults:\n\n- `steeringMode`: `\"one-at-a-time\"`\n- `followUpMode`: `\"one-at-a-time\"`\n- `interruptMode`: `\"immediate\"`\n\n### Mode semantics\n\n- `set_steering_mode` / `set_follow_up_mode`\n - `\"one-at-a-time\"`: dequeue one queued message per turn\n - `\"all\"`: dequeue entire queue at once\n- `set_interrupt_mode`\n - `\"immediate\"`: tool execution checks steering between tool calls; pending steering can abort remaining tool calls in the turn\n - `\"wait\"`: defer steering until turn completion\n\n## Extension UI Sub-Protocol\n\nExtensions in RPC mode use request/response UI frames.\n\n### Outbound request\n\n`RpcExtensionUIRequest` (`type: \"extension_ui_request\"`) methods:\n\n- `select`, `confirm`, `input`, `editor`, `cancel`\n- `notify`, `setStatus`, `setWidget`, `setTitle`, `set_editor_text`\n- `open_url` (emitted by RPC login flows)\n\nRuntime note:\n\n- Automatic session title generation is disabled in RPC mode, and `setTitle` UI\n requests are also suppressed by default because most hosts do not have a\n meaningful terminal-title surface. Set `PI_RPC_EMIT_TITLE=1` to opt back in to\n the UI event only.\n\nExample:\n\n```json\n{\n \"type\": \"extension_ui_request\",\n \"id\": \"123\",\n \"method\": \"confirm\",\n \"title\": \"Confirm\",\n \"message\": \"Continue?\",\n \"timeout\": 30000\n}\n```\n\n### Inbound response\n\n`RpcExtensionUIResponse` (`type: \"extension_ui_response\"`):\n\n- `{ type: \"extension_ui_response\", id: string, value: string }`\n- `{ type: \"extension_ui_response\", id: string, confirmed: boolean }`\n- `{ type: \"extension_ui_response\", id: string, cancelled: true, timedOut?: boolean }`\n\nIf a dialog has a timeout, RPC mode resolves to a default value when timeout/abort fires.\n\n## Host Tool Sub-Protocol\n\nRPC hosts can expose custom tools to the agent by sending `set_host_tools`, then\nserving execution requests over the same transport.\n\n### Outbound request\n\nWhen the agent wants the host to execute one of those tools, RPC mode emits:\n\n```json\n{\n \"type\": \"host_tool_call\",\n \"id\": \"host_1\",\n \"toolCallId\": \"toolu_123\",\n \"toolName\": \"echo_host\",\n \"arguments\": { \"message\": \"hello\" }\n}\n```\n\nIf the tool execution is later aborted, RPC mode emits:\n\n```json\n{\n \"type\": \"host_tool_cancel\",\n \"id\": \"host_cancel_1\",\n \"targetId\": \"host_1\"\n}\n```\n\n### Inbound updates and completion\n\nHosts can optionally stream progress:\n\n```json\n{\n \"type\": \"host_tool_update\",\n \"id\": \"host_1\",\n \"partialResult\": {\n \"content\": [{ \"type\": \"text\", \"text\": \"working\" }]\n }\n}\n```\n\nCompletion uses:\n\n```json\n{\n \"type\": \"host_tool_result\",\n \"id\": \"host_1\",\n \"result\": {\n \"content\": [{ \"type\": \"text\", \"text\": \"done\" }]\n }\n}\n```\n\nSet top-level `isError: true` on `host_tool_result` to reject the pending host tool call and surface the returned text content as a tool error.\n\n## Host URI Sub-Protocol\n\nRPC hosts can also own custom URL schemes (virtual files). After\n`set_host_uri_schemes`, every read of `<scheme>://…` and write of\n`<scheme>://…` (when registered as `writable`) is bounced back to the host\nover the same transport.\n\n### Outbound request\n\nWhen a session tool resolves a host-owned URL, RPC mode emits:\n\n```json\n{\n \"type\": \"host_uri_request\",\n \"id\": \"uri_1\",\n \"operation\": \"read\",\n \"url\": \"db://users/42\"\n}\n```\n\nWrites look the same with `\"operation\": \"write\"` and an additional\n`\"content\": \"...\"` field carrying the full replacement bytes.\n\nIf the request is later aborted (caller cancels, session ends), RPC mode\nemits:\n\n```json\n{\n \"type\": \"host_uri_cancel\",\n \"id\": \"uri_cancel_1\",\n \"targetId\": \"uri_1\"\n}\n```\n\n### Inbound result\n\nFor successful reads:\n\n```json\n{\n \"type\": \"host_uri_result\",\n \"id\": \"uri_1\",\n \"content\": \"id=42\\nname=Alice\\n\",\n \"contentType\": \"text/plain\",\n \"notes\": [\"fresh from cache\"],\n \"immutable\": false\n}\n```\n\nFor successful writes, omit content:\n\n```json\n{ \"type\": \"host_uri_result\", \"id\": \"uri_1\" }\n```\n\nTo reject the request, set `isError: true` and either populate `error` with\na message or fall back to `content` for textual error surfacing:\n\n```json\n{\n \"type\": \"host_uri_result\",\n \"id\": \"uri_1\",\n \"isError\": true,\n \"error\": \"row 42 not found\"\n}\n```\n\n### Constraints\n\n- The agent's `edit` tool does not target host URIs. Hosts that want to\n mutate virtual files expose `write` and let the model use the `write` tool\n with replacement content.\n- Schemes are global to the process; `set_host_uri_schemes` replaces the\n previous set, unregistering anything not in the new list.\n- Schemes are normalized to lowercase before registration.\n\n## Error Model and Recoverability\n\n### Command-level failures\n\nFailures are `success: false` with string `error`.\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"response\",\n \"command\": \"set_model\",\n \"success\": false,\n \"error\": \"Model not found: provider/model\"\n}\n```\n\n### Recoverability expectations\n\n- Most command failures are recoverable; process remains alive.\n- Malformed JSONL / parse-loop exceptions emit a `parse` error response and continue reading subsequent lines.\n- Empty `set_session_name` is rejected (`Session name cannot be empty`).\n- Extension UI responses with unknown `id` are ignored.\n- Process termination conditions are stdin close or explicit extension-triggered shutdown after the current command.\n\n## Compact Command Flows\n\n### 1) Prompt and stream\n\nstdin:\n\n```json\n{ \"id\": \"req_1\", \"type\": \"prompt\", \"message\": \"Summarize this repo\" }\n```\n\nstdout sequence (typical):\n\n```json\n{ \"id\": \"req_1\", \"type\": \"response\", \"command\": \"prompt\", \"success\": true }\n{ \"type\": \"agent_start\" }\n{ \"type\": \"message_update\", \"assistantMessageEvent\": { \"type\": \"text_delta\", \"delta\": \"...\" }, \"message\": { \"role\": \"assistant\", \"content\": [] } }\n{ \"type\": \"agent_end\", \"messages\": [] }\n```\n\n### 2) Prompt during streaming with explicit queue policy\n\nstdin:\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"prompt\",\n \"message\": \"Also include risks\",\n \"streamingBehavior\": \"followUp\"\n}\n```\n\n### 3) Inspect and tune queue behavior\n\nstdin:\n\n```json\n{ \"id\": \"q1\", \"type\": \"get_state\" }\n{ \"id\": \"q2\", \"type\": \"set_steering_mode\", \"mode\": \"all\" }\n{ \"id\": \"q3\", \"type\": \"set_interrupt_mode\", \"mode\": \"wait\" }\n```\n\n### 4) Extension UI round trip\n\nstdout:\n\n```json\n{\n \"type\": \"extension_ui_request\",\n \"id\": \"ui_7\",\n \"method\": \"input\",\n \"title\": \"Branch name\",\n \"placeholder\": \"feature/...\"\n}\n```\n\nstdin:\n\n```json\n{ \"type\": \"extension_ui_response\", \"id\": \"ui_7\", \"value\": \"feature/rpc-host\" }\n```\n\n## Notes on `RpcClient` helper\n\n`src/modes/rpc/rpc-client.ts` is a convenience wrapper, not the protocol definition.\n\nCurrent helper characteristics:\n\n- Spawns `bun <cliPath> --mode rpc`\n- Correlates responses by generated `req_<n>` ids\n- Dispatches recognized core `AgentEvent` types to listeners\n- Supports host-owned custom tools via `setCustomTools()` and automatic handling of `host_tool_call` / `host_tool_cancel`\n- Wraps common protocol commands including OAuth `getLoginProviders()` / `login(...)`; use raw protocol frames for any surface not wrapped by the helper.\n\nUse raw protocol frames if you need complete surface coverage.\n",
|
|
54
|
+
"rulebook-matching-pipeline.md": "# Rulebook Matching Pipeline\n\nThis document describes how coding-agent discovers rules from supported config formats, normalizes them into a single `Rule` shape, resolves precedence conflicts, and splits the result into:\n\n- **Rulebook rules** (available to the model via system prompt + `rule://` URLs)\n- **TTSR rules** (time-travel stream interruption rules)\n\nIt reflects the current implementation, including partial semantics and metadata that is parsed but not enforced.\n\n## Implementation files\n\n- [`packages/coding-agent/src/capability/rule.ts`](../packages/coding-agent/src/capability/rule.ts)\n- [`packages/coding-agent/src/capability/rule-buckets.ts`](../packages/coding-agent/src/capability/rule-buckets.ts)\n- [`packages/coding-agent/src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`packages/coding-agent/src/discovery/index.ts`](../packages/coding-agent/src/discovery/index.ts)\n- [`packages/coding-agent/src/discovery/helpers.ts`](../packages/coding-agent/src/discovery/helpers.ts)\n- [`packages/coding-agent/src/discovery/builtin.ts`](../packages/coding-agent/src/discovery/builtin.ts)\n- [`packages/coding-agent/src/discovery/builtin-defaults.ts`](../packages/coding-agent/src/discovery/builtin-defaults.ts)\n- [`packages/coding-agent/src/discovery/agents.ts`](../packages/coding-agent/src/discovery/agents.ts)\n- [`packages/coding-agent/src/discovery/cursor.ts`](../packages/coding-agent/src/discovery/cursor.ts)\n- [`packages/coding-agent/src/discovery/windsurf.ts`](../packages/coding-agent/src/discovery/windsurf.ts)\n- [`packages/coding-agent/src/discovery/cline.ts`](../packages/coding-agent/src/discovery/cline.ts)\n- [`packages/coding-agent/src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`packages/coding-agent/src/system-prompt.ts`](../packages/coding-agent/src/system-prompt.ts)\n- [`packages/coding-agent/src/internal-urls/rule-protocol.ts`](../packages/coding-agent/src/internal-urls/rule-protocol.ts)\n- [`packages/utils/src/frontmatter.ts`](../packages/utils/src/frontmatter.ts)\n\n## 1. Canonical rule shape\n\nAll providers normalize source files into `Rule`:\n\n```ts\ninterface Rule {\n name: string;\n path: string;\n content: string;\n globs?: string[];\n alwaysApply?: boolean;\n description?: string;\n condition?: string[];\n astCondition?: string[];\n scope?: string[];\n interruptMode?: \"never\" | \"prose-only\" | \"tool-only\" | \"always\";\n _source: SourceMeta;\n}\n```\n\nCapability identity is `rule.name` (`ruleCapability.key = rule => rule.name`).\n\nConsequence: precedence and deduplication are **name-based only**. Two different files with the same `name` are considered the same logical rule.\n\n## 2. Discovery sources and normalization\n\n`src/discovery/index.ts` auto-registers providers. For `rules`, current providers are:\n\n- `native` (priority `100`)\n- `agents` (priority `70`)\n- `cursor` (priority `50`)\n- `windsurf` (priority `50`)\n- `cline` (priority `40`)\n- `builtin-defaults` (priority `1`)\n\n### Native provider (`builtin.ts`)\n\nLoads `.omp` rules from:\n\n- project: `<cwd>/.omp/rules/*.{md,mdc}` when the cwd `.omp` directory exists\n- user: `~/.omp/agent/rules/*.{md,mdc}`\n- sticky user rule: `~/.omp/agent/RULES.md`\n- sticky project rule: nearest ancestor `.omp/RULES.md` while walking from cwd toward the repository root\n\nNormalization:\n\n- `name` = filename without `.md`/`.mdc`\n- frontmatter parsed via `parseFrontmatter`\n- `content` = body (frontmatter stripped)\n- `globs`, `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` are parsed by `buildRuleFromMarkdown`\n- top-level `RULES.md` is synthesized as rule name `RULES` and forced to `alwaysApply: true`\n\nImportant caveat: `condition` values that look like file globs are converted into `tool:edit(...)` / `tool:write(...)` scope shorthands with catch-all condition `.*`.\n\n### Agents provider (`agents.ts`)\n\nLoads from both `.agent` and `.agents` directories:\n\n- project: walk upward from `cwd` to repo root, loading `<ancestor>/.agent/rules/*.{md,mdc}` and `<ancestor>/.agents/rules/*.{md,mdc}`\n- user: `~/.agent/rules/*.{md,mdc}` and `~/.agents/rules/*.{md,mdc}`\n\nNormalization uses the shared `buildRuleFromMarkdown` path: filename-derived name, stripped frontmatter body, and parsed `globs`, `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode`.\n\n### Cursor provider (`cursor.ts`)\n\nLoads from:\n\n- user: `~/.cursor/rules/*.{mdc,md}`\n- project: `<cwd>/.cursor/rules/*.{mdc,md}`\n\nNormalization (`transformMDCRule`):\n\n- `description`: kept only if string\n- `alwaysApply`: only `true` is preserved (`false` becomes `undefined`)\n- `globs`: accepts array (string elements only) or single string\n- `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` are parsed by shared rule helpers\n- `name` from filename without extension\n\n### Windsurf provider (`windsurf.ts`)\n\nLoads from:\n\n- user: `~/.codeium/windsurf/memories/global_rules.md` (fixed rule name `global_rules`)\n- project: `<cwd>/.windsurf/rules/*.md`\n\nNormalization:\n\n- `globs`: array-of-string or single string\n- `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` parsed by shared rule helpers\n- `name` is fixed to `global_rules` for the user global file and derived from filename for project rules\n\n### Cline provider (`cline.ts`)\n\nSearches upward from `cwd` for nearest `.clinerules`:\n\n- if directory: loads `*.md` inside it\n- if file: loads single file as rule named `clinerules`\n\nNormalization:\n\n- `globs`: array-of-string or single string\n- `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` parsed by shared rule helpers\n- `name` is fixed to `clinerules` for a `.clinerules` file and derived from filename for `.clinerules/*.md`\n\n## 3. Frontmatter parsing behavior and ambiguity\n\nAll providers use `parseFrontmatter` (`utils/frontmatter.ts`) with these semantics:\n\n1. Frontmatter is parsed only when content starts with `---` and has a closing `\\n---`.\n2. Body is trimmed after frontmatter extraction.\n3. If YAML parse fails:\n - warning is logged,\n - parser falls back to simple `key: value` line parsing (`^(\\w+):\\s*(.*)$`).\n\nAmbiguity consequences:\n\n- Fallback parser does not support arrays, nested objects, quoting rules, or hyphenated keys.\n- Fallback values become strings (for example `alwaysApply: true` becomes string `\"true\"`), so providers requiring boolean/string types may drop metadata.\n- `ttsr_trigger` works in fallback (underscore key); keys like `thinking-level` would not.\n- Files without valid frontmatter still load as rules with empty metadata and full content body.\n\n## 4. Provider precedence and deduplication\n\n`loadCapability(\"rules\")` (`capability/index.ts`) merges provider outputs and then deduplicates by `rule.name`.\n\n### Precedence model\n\n- Providers are ordered by priority descending.\n- Equal priority keeps registration order (`cursor` before `windsurf` from `discovery/index.ts`).\n- Dedup is first-wins: first encountered rule name is kept; later same-name items are marked `_shadowed` in `all` and excluded from `items`.\n\nEffective rule provider order is currently:\n\n1. `native` (100)\n2. `agents` (70)\n3. `cursor` (50)\n4. `windsurf` (50)\n5. `cline` (40)\n6. `builtin-defaults` (1)\n\n### Intra-provider ordering caveat\n\nWithin a provider, item order comes from `loadFilesFromDir` glob result ordering plus explicit push order. This is deterministic enough for normal use but not explicitly sorted in code.\n\nNotable source-order differences:\n\n- `native` appends project `.omp/rules`, user `~/.omp/agent/rules`, user `RULES.md`, then nearest project `RULES.md`.\n- `agents` appends project-walk `.agent`/`.agents` rule dirs before user home dirs.\n- `cursor` appends user then project results.\n- `windsurf` appends user `global_rules` first, then project rules.\n- `cline` loads only nearest `.clinerules` source.\n- `builtin-defaults` uses the embedded rule source order.\n\n## 5. Split into Rulebook, Always-Apply, and TTSR buckets\n\nAfter rule discovery in `createAgentSession` (`sdk.ts`), `bucketRules(...)` applies session-level filtering and bucket assignment:\n\n1. Drop rules listed in `ttsr.disabledRules`.\n2. Drop rules from the `builtin-defaults` provider when `ttsr.builtinRules === false`.\n3. Register rules with a non-empty `condition` or `astCondition` into `TtsrManager`; if registration succeeds, the rule is TTSR-only.\n4. Put remaining `alwaysApply === true` rules into `alwaysApplyRules`.\n5. Put remaining rules with `description` into `rulebookRules`.\n\n### Bucket behavior\n\n- **TTSR bucket**: any enabled rule with a non-empty parsed `condition` (regex) or `astCondition` (ast-grep patterns) that `TtsrManager.addRule(...)` accepts. Takes priority over other buckets.\n- **Always-apply bucket**: `alwaysApply === true`, not TTSR. Full content injected into system prompt. Resolvable via `rule://`.\n- **Rulebook bucket**: must have description, must not be TTSR, must not be `alwaysApply`. Listed in system prompt by name+description; content read on demand via `rule://`.\n- A rule with both a trigger condition and `alwaysApply` goes to TTSR only if TTSR registration accepts it; otherwise it can fall through to always-apply.\n- A rule with both `alwaysApply` and `description` goes to always-apply only (not rulebook).\n\n## 6. How metadata affects runtime surfaces\n\n### `description`\n\n- Required for inclusion in rulebook.\n- Rendered in system prompt `<rules>` block.\n- Missing description means rule is not available via `rule://` and not listed in system prompt rules.\n\n### `globs`\n\n- Carried through on `Rule`.\n- Rendered as `<glob>...</glob>` entries in the system prompt rules block.\n- Exposed in rules UI state (`extensions` mode list).\n- Used by TTSR as a global path gate: if a TTSR rule has globs, the match context must include at least one matching file path.\n- Not used to automatically select rulebook rules for `rule://`; rulebook matching remains advisory prompt behavior.\n\n### `alwaysApply`\n\n- Parsed and preserved by providers.\n- Used in UI display (`\"always\"` trigger label in extensions state manager).\n- Used as an exclusion condition from `rulebookRules`.\n- **Full rule content is auto-injected into the system prompt** (before the rulebook rules section).\n- Rule is also addressable via `rule://<name>` for re-reading.\n\n### `condition`, `astCondition`, `scope`, and `interruptMode`\n\n- `condition` is the regex TTSR trigger field; legacy `ttsr_trigger` / `ttsrTrigger` are accepted as fallback inputs during parsing.\n- `astCondition` is the ast-grep trigger field: a string or list of structural patterns, kept verbatim (no glob inference). It only matches on edit/write tool streams, where the language is inferred from the file path. A rule may set `condition`, `astCondition`, or both.\n- `scope` narrows TTSR matching scope. A `condition` token that looks like a file glob becomes `tool:edit(<glob>)` and `tool:write(<glob>)` scope entries plus catch-all condition `.*`; `astCondition` tokens never trigger this shorthand.\n- `interruptMode` can override the global TTSR interrupt mode for the rule.\n\n## 7. System prompt inclusion path\n\n`buildSystemPromptInternal` receives both `rules` (rulebook) and `alwaysApplyRules`.\n\nAlways-apply rules are rendered first, injecting their raw content directly into the prompt.\n\nRulebook rules are rendered in a `# Rules` section with:\n\n- `Read rule://<name> when working in matching domain`\n- Each rule's `name`, `description`, and optional `<glob>` list\n\nThis is advisory/contextual: prompt text asks the model to read applicable rules, but code does not enforce glob applicability.\n\n## 8. `rule://` internal URL behavior\n\n`RuleProtocolHandler` resolves against the process-global active-rule snapshot\ninstalled once per top-level session in `sdk.ts`:\n\n```ts\nsetActiveRules([...rulebookRules, ...alwaysApplyRules, ...ttsrManager.getRules()]);\n```\n\nImplications:\n\n- `rule://<name>` resolves against **rulebookRules**, **alwaysApplyRules**, and **registered TTSR rules**.\n- TTSR rules are bucketed out before rulebook/always, but `ttsrManager.getRules()` re-adds them to the snapshot so a triggered rule (e.g. a builtin) stays addressable for re-reading.\n- Rules with no description, no `alwaysApply`, and no accepted TTSR condition are not addressable via `rule://`.\n- Resolution is exact name match.\n- Unknown names return error listing available rule names.\n- Returned content is raw `rule.content` (frontmatter stripped), content type `text/markdown`.\n\n## 9. Known partial / non-enforced semantics\n\n1. The rule providers currently loaded for `rules` are `native`, `agents`, `cursor`, `windsurf`, `cline`, and embedded `builtin-defaults`; provider files for other tools may parse other config formats but do not register rule loaders.\n2. `globs` metadata is surfaced to prompt/UI and is used as a global path gate for TTSR matching, but it is not used to automatically select rulebook rules for `rule://`.\n3. Rule selection for `rule://` includes rulebook, always-apply, and registered TTSR rules (so a triggered TTSR rule can be re-read), but not rules that registered no condition and carry neither a description nor `alwaysApply`.\n4. Discovery warnings (`loadCapability(\"rules\").warnings`) are produced but `createAgentSession` does not currently surface/log them in this path.\n",
|
|
55
|
+
"sdk.md": "# SDK\n\nThe SDK is the in-process integration surface for `@oh-my-pi/pi-coding-agent`.\nUse it when you want direct access to agent state, event streaming, tool wiring, and session control from your own Bun/Node process.\n\nIf you need cross-language/process isolation, use RPC mode instead.\n\n## Installation\n\n```bash\nbun add @oh-my-pi/pi-coding-agent\n```\n\n## Entry points\n\n`@oh-my-pi/pi-coding-agent` exports the SDK APIs from the package root (and also via `@oh-my-pi/pi-coding-agent/sdk`).\n\nCore exports for embedders:\n\n- `createAgentSession`\n- `SessionManager`\n- `Settings`\n- `AuthStorage`\n- `ModelRegistry`\n- `discoverAuthStorage`\n- Discovery helpers (`discoverExtensions`, `discoverSkills`, `discoverContextFiles`, `discoverPromptTemplates`, `discoverSlashCommands`, `discoverCustomTSCommands`, `discoverMCPServers`)\n- Tool factory surface (`createTools`, `BUILTIN_TOOLS`, tool classes)\n\n## Quick start (auto-discovery defaults)\n\n```ts\nimport { createAgentSession } from \"@oh-my-pi/pi-coding-agent\";\n\nconst { session, modelFallbackMessage } = await createAgentSession();\n\nif (modelFallbackMessage) {\n process.stderr.write(`${modelFallbackMessage}\\n`);\n}\n\nconst unsubscribe = session.subscribe((event) => {\n if (\n event.type === \"message_update\" &&\n event.assistantMessageEvent.type === \"text_delta\"\n ) {\n process.stdout.write(event.assistantMessageEvent.delta);\n }\n});\n\nawait session.prompt(\"Summarize this repository in 3 bullets.\");\nunsubscribe();\nawait session.dispose();\n```\n\n## What `createAgentSession()` discovers by default\n\n`createAgentSession()` follows “provide to override, omit to discover”.\n\nIf omitted, it resolves:\n\n- `cwd`: `getProjectDir()`\n- `agentDir`: `~/.omp/agent` (via `getAgentDir()`)\n- `authStorage`: `discoverAuthStorage(agentDir)`\n- `modelRegistry`: `new ModelRegistry(authStorage)` + background `refreshInBackground()` when the registry is not provided\n- `settings`: `await Settings.init({ cwd, agentDir })`\n- `sessionManager`: `SessionManager.create(cwd)` (file-backed)\n- skills/context files/prompt templates/slash commands/extensions/custom TS commands\n- built-in tools via `createTools(...)`\n- MCP tools (enabled by default; Exa MCP servers are folded into native Exa integration, and browser automation MCP servers are filtered when the built-in browser tool is enabled)\n- LSP integration (enabled by default)\n- `eventBus`: new `EventBus()` unless supplied\n\n### Required vs optional inputs\n\nTypically you must provide only what you want to control:\n\n- **Must provide**: nothing for a minimal session\n- **Usually provide explicitly** in embedders:\n - `sessionManager` (if you need in-memory or custom location)\n - `authStorage` + `modelRegistry` (if you own credential/model lifecycle)\n - `model` or `modelPattern` (if deterministic model selection matters)\n - `settings` (if you need isolated/test config)\n\n## Session manager behavior (persistent vs in-memory)\n\n`AgentSession` always uses a `SessionManager`; behavior depends on which factory you use.\n\n### File-backed (default)\n\n```ts\nimport { createAgentSession, SessionManager } from \"@oh-my-pi/pi-coding-agent\";\n\nconst { session } = await createAgentSession({\n sessionManager: SessionManager.create(process.cwd()),\n});\n\nconsole.log(session.sessionFile); // absolute .jsonl path\n```\n\n- Persists conversation/messages/state deltas to session files.\n- Supports resume/open/list/fork workflows.\n- `session.sessionFile` is defined.\n\n### In-memory\n\n```ts\nimport { createAgentSession, SessionManager } from \"@oh-my-pi/pi-coding-agent\";\n\nconst { session } = await createAgentSession({\n sessionManager: SessionManager.inMemory(),\n});\n\nconsole.log(session.sessionFile); // undefined\n```\n\n- No filesystem persistence.\n- Useful for tests, ephemeral workers, request-scoped agents.\n- Session methods still work, but persistence-specific behaviors (file resume/fork paths) are naturally limited.\n\n### Resume/open/list helpers\n\n```ts\nimport { SessionManager } from \"@oh-my-pi/pi-coding-agent\";\n\nconst recent = await SessionManager.continueRecent(process.cwd());\nconst listed = await SessionManager.list(process.cwd());\nconst opened = listed[0] ? await SessionManager.open(listed[0].path) : null;\n```\n\n## Model and auth wiring\n\n`createAgentSession()` uses `ModelRegistry` + `AuthStorage` for model selection and API key resolution.\n\n### Explicit wiring\n\n```ts\nimport {\n createAgentSession,\n discoverAuthStorage,\n ModelRegistry,\n SessionManager,\n} from \"@oh-my-pi/pi-coding-agent\";\n\nconst authStorage = await discoverAuthStorage();\nconst modelRegistry = new ModelRegistry(authStorage);\nawait modelRegistry.refresh();\n\nconst available = modelRegistry.getAvailable();\nif (available.length === 0)\n throw new Error(\"No authenticated models available\");\n\nconst { session } = await createAgentSession({\n authStorage,\n modelRegistry,\n model: available[0],\n thinkingLevel: \"medium\",\n sessionManager: SessionManager.inMemory(),\n});\n```\n\n### Selection order when `model` is omitted\n\nWhen no explicit `model`/`modelPattern` is provided:\n\n1. restore model from existing session (if restorable + key available)\n2. settings default model role (`default`)\n3. first available model with valid auth\n\nIf restore fails, `modelFallbackMessage` explains fallback.\n\n### Auth priority\n\n`AuthStorage.getApiKey(...)` resolves in this order:\n\n1. runtime override (`setRuntimeApiKey`, used by CLI `--api-key`)\n2. config-sourced API key override (`models.yml` provider `apiKey`)\n3. stored API-key credential in `agent.db` / broker-backed storage\n4. stored OAuth credential, including refresh when needed\n5. provider environment variables\n6. custom-provider resolver fallback\n\n## Event subscription model\n\nSubscribe with `session.subscribe(listener)`; it returns an unsubscribe function.\n\n```ts\nconst unsubscribe = session.subscribe((event) => {\n switch (event.type) {\n case \"agent_start\":\n case \"turn_start\":\n case \"tool_execution_start\":\n break;\n case \"message_update\":\n if (event.assistantMessageEvent.type === \"text_delta\") {\n process.stdout.write(event.assistantMessageEvent.delta);\n }\n break;\n }\n});\n```\n\n`AgentSessionEvent` includes core `AgentEvent` plus session-level events:\n\n- `auto_compaction_start` / `auto_compaction_end`\n- `auto_retry_start` / `auto_retry_end`\n- `retry_fallback_applied` / `retry_fallback_succeeded`\n- `ttsr_triggered`\n- `todo_reminder` / `todo_auto_clear`\n- `irc_message`\n\n## Prompt lifecycle\n\n`session.prompt(text, options?)` is the primary entry point.\n\nBehavior:\n\n1. optional command/template expansion (`/` commands, custom commands, file slash commands, prompt templates)\n2. if currently streaming:\n - requires `streamingBehavior: \"steer\" | \"followUp\"`\n - queues instead of throwing work away\n3. if idle:\n - validates model + API key\n - appends user message\n - starts agent turn\n\nRelated APIs:\n\n- `sendUserMessage(content, { deliverAs? })`\n- `steer(text, images?)`\n- `followUp(text, images?)`\n- `sendCustomMessage({ customType, content, ... }, { deliverAs?, triggerTurn? })`\n- `abort()`\n\n## Tools and extension integration\n\n### Built-ins and filtering\n\n- Built-ins come from `createTools(...)` and `BUILTIN_TOOLS`.\n- `toolNames` acts as an allowlist for built-ins.\n- `customTools` and extension-registered tools are still included.\n- Hidden tools (for example `yield`) are opt-in unless required by options.\n\n```ts\nconst { session } = await createAgentSession({\n toolNames: [\"read\", \"search\", \"find\", \"write\"],\n requireYieldTool: true,\n});\n```\n\n### Extensions\n\n- `extensions`: inline `ExtensionFactory[]`\n- `additionalExtensionPaths`: load extra extension files\n- `disableExtensionDiscovery`: disable automatic extension scanning\n- `preloadedExtensions`: reuse already loaded extension set\n\n### Runtime tool set changes\n\n`AgentSession` supports runtime activation updates:\n\n- `getActiveToolNames()`\n- `getAllToolNames()`\n- `setActiveToolsByName(names)`\n- `refreshMCPTools(mcpTools)`\n\nSystem prompt is rebuilt to reflect active tool changes.\n\n## Discovery helpers\n\nUse these when you want partial control without recreating internal discovery logic:\n\n- `discoverAuthStorage(agentDir?)`\n- `discoverExtensions(cwd?)`\n- `discoverSkills(cwd?, _agentDir?, settings?)`\n- `discoverContextFiles(cwd?, _agentDir?)`\n- `discoverPromptTemplates(cwd?, agentDir?)`\n- `discoverSlashCommands(cwd?)`\n- `discoverCustomTSCommands(cwd?, agentDir?)`\n- `discoverMCPServers(cwd?)`\n- `buildSystemPrompt(options?)`\n\n## Subagent-oriented options\n\nFor SDK consumers building orchestrators (similar to task executor flow):\n\n- `outputSchema`: passes structured output expectation into tool context\n- `requireYieldTool`: forces `yield` tool inclusion\n- `taskDepth`: recursion-depth context for nested task sessions\n- `parentTaskPrefix`: artifact naming prefix for nested task outputs\n\nThese are optional for normal single-agent embedding.\n\n## `createAgentSession()` return value\n\n```ts\ntype CreateAgentSessionResult = {\n session: AgentSession;\n extensionsResult: LoadExtensionsResult;\n setToolUIContext: (uiContext: ExtensionUIContext, hasUI: boolean) => void;\n mcpManager?: MCPManager;\n modelFallbackMessage?: string;\n lspServers?: Array<{\n name: string;\n status: \"ready\" | \"error\";\n fileTypes: string[];\n error?: string;\n }>;\n eventBus: EventBus;\n};\n```\n\nUse `setToolUIContext(...)` only if your embedder provides UI capabilities that tools/extensions should call into.\n\n## Startup performance\n\n`createAgentSession()` runs two background optimizations to overlap I/O with the rest of session setup:\n\n- **Model-host preconnect.** As soon as the model is resolved, the SDK fires a best-effort `fetch.preconnect(model.baseUrl)` so DNS + TCP + TLS + HTTP/2 to the provider's host happens in parallel with extension/skill load, tool registry build, and system-prompt assembly. The first real `fetch(...)` then reuses the warm connection, saving 100–300 ms on transcontinental hops (e.g. residential IP → `api.anthropic.com`). Implementation lives in `preconnectModelHost()` in `packages/coding-agent/src/sdk.ts`. If `fetch.preconnect` is unavailable (non-Bun runtime) or the call throws, the optimization is silently skipped — never a hard dependency. Applies to every mode (interactive, print, RPC, ACP).\n- **Conditional LSP warmup.** Startup LSP servers (those returned by `discoverStartupLspServers(cwd)`) are only warmed when **all** of these hold:\n - `enableLsp !== false` on the session options, **and**\n - `options.hasUI === true` (interactive TUI), **and**\n - the `lsp.lazy` setting is disabled (it defaults to `true`).\n\n With `lsp.lazy` enabled — the default — no language servers are launched at startup at all; each server cold-starts on first use, i.e. when the agent invokes the `lsp` tool or an edit/write touches a file whose extension matches the server's `fileTypes`. Print / script / RPC / ACP invocations (`hasUI=false`) skip the warmup regardless of the setting: they don't render the warmup status indicator and typically finish before the language servers would stabilize, so warming them just spends CPU parsing big `initialize` responses concurrently with the LLM stream consumer and jitters perceived latency. Tools that actually need an LSP server still spin one up on demand through `getOrCreateClient()` — only the _startup_ warmup is skipped. The returned `lspServers` field in `CreateAgentSessionResult` is still populated for UI sessions in lazy mode — recognized servers are discovered (no processes spawned) and reported with status `\"available\"` so the welcome screen and `/status` can list them; it is `undefined` only when `enableLsp === false` or `hasUI === false`.\n\n## Minimal controlled embed example\n\n```ts\nimport {\n createAgentSession,\n discoverAuthStorage,\n ModelRegistry,\n SessionManager,\n Settings,\n} from \"@oh-my-pi/pi-coding-agent\";\n\nconst authStorage = await discoverAuthStorage();\nconst modelRegistry = new ModelRegistry(authStorage);\nawait modelRegistry.refresh();\n\nconst settings = Settings.isolated({\n \"compaction.enabled\": true,\n \"retry.enabled\": true,\n});\n\nconst { session } = await createAgentSession({\n authStorage,\n modelRegistry,\n settings,\n sessionManager: SessionManager.inMemory(),\n toolNames: [\"read\", \"search\", \"find\", \"edit\", \"write\"],\n enableMCP: false,\n enableLsp: true,\n});\n\nsession.subscribe((event) => {\n if (\n event.type === \"message_update\" &&\n event.assistantMessageEvent.type === \"text_delta\"\n ) {\n process.stdout.write(event.assistantMessageEvent.delta);\n }\n});\n\nawait session.prompt(\"Find all TODO comments in this repo and propose fixes.\");\nawait session.dispose();\n```\n",
|
|
56
|
+
"secrets.md": "# Secret Obfuscation\n\nPrevents sensitive values (API keys, tokens, passwords) from being sent to LLM providers. When enabled, secrets are replaced before outbound text content leaves the process. Reversible obfuscation placeholders are restored when session context is rebuilt for display or resume.\n\n## Enabling\n\nDisabled by default. Toggle via `/settings` UI or directly in `config.yml`:\n\n```yaml\nsecrets:\n enabled: true\n```\n\n## How it works\n\n1. On session startup, secrets are collected from two sources:\n - **Environment variables** whose names match common secret patterns (`KEY`, `SECRET`, `TOKEN`, `PASSWORD`, `PASS`, `AUTH`, `CREDENTIAL`, `PRIVATE`, `OAUTH`) with values >= 8 characters\n - **`secrets.yml` files** (see below)\n\n2. Outbound text messages to the LLM have secret values replaced with deterministic placeholders like `#AB12#`.\n\n3. Session context is deep-walked and obfuscation placeholders are restored when building display/resume context. Replace-mode substitutions are one-way and are not restored.\n\nTwo modes control what happens to each secret:\n\n| Mode | Behavior | Reversible |\n| --------------------- | ------------------------------------------------------- | -------------------------------------------- |\n| `obfuscate` (default) | Replaced with deterministic placeholder `#[A-Z0-9]{4}#` | Yes (deobfuscated in display/resume context) |\n| `replace` | Replaced with deterministic same-length string | No (one-way) |\n\n## secrets.yml\n\nDefine custom secret entries in YAML. Two locations are checked:\n\n| Level | Path | Purpose |\n| ------- | -------------------------- | --------------------------- |\n| Global | `~/.omp/agent/secrets.yml` | Secrets across all projects |\n| Project | `<cwd>/.omp/secrets.yml` | Project-specific secrets |\n\nProject entries override global entries with matching `content`.\n\n### Schema\n\nEach entry in the array has these fields:\n\n| Field | Type | Required | Description |\n| ------------- | ---------------------------- | -------- | ------------------------------------------------- |\n| `type` | `\"plain\"` or `\"regex\"` | Yes | Match strategy |\n| `content` | string | Yes | The secret value (plain) or regex pattern (regex) |\n| `mode` | `\"obfuscate\"` or `\"replace\"` | No | Default: `\"obfuscate\"` |\n| `replacement` | string | No | Custom replacement (replace mode only) |\n| `flags` | string | No | Regex flags (regex type only) |\n\n### Examples\n\n#### Plain secrets\n\n```yaml\n# Obfuscate a specific API key (default mode)\n- type: plain\n content: sk-proj-abc123def456\n\n# Replace a database password with a fixed string\n- type: plain\n content: hunter2\n mode: replace\n replacement: \"********\"\n```\n\n#### Regex secrets\n\n```yaml\n# Obfuscate any AWS-style key\n- type: regex\n content: \"AKIA[0-9A-Z]{16}\"\n\n# Case-insensitive match with explicit flags\n- type: regex\n content: \"api[_-]?key\\\\s*=\\\\s*\\\\w+\"\n flags: \"i\"\n\n# Regex literal syntax (pattern and flags in one string)\n- type: regex\n content: \"/bearer\\\\s+[a-zA-Z0-9._~+\\\\/=-]+/i\"\n```\n\nRegex entries always scan globally (the `g` flag is enforced automatically). The regex literal syntax `/pattern/flags` is supported as an alternative to separate `content` + `flags` fields. Escaped slashes within the pattern (`\\\\/`) are handled correctly.\n\n#### Replace mode with regex\n\n```yaml\n# One-way replace connection strings (not reversible)\n- type: regex\n content: \"postgres://[^\\\\s]+\"\n mode: replace\n replacement: \"postgres://***\"\n```\n\n## Interaction with env var detection\n\nEnvironment variables are collected first, then file-defined entries are appended. File entries can cover secrets that don't live in env vars (config files, hardcoded values, etc.). If the same plain value appears in both env and file entries, the env entry's obfuscate-mode mapping is used first.\n\n## Key files\n\n- `packages/coding-agent/src/secrets/index.ts` -- loading, merging, env var collection\n- `packages/coding-agent/src/secrets/obfuscator.ts` -- `SecretObfuscator` class, placeholder generation, message obfuscation\n- `packages/coding-agent/src/secrets/regex.ts` -- regex literal parsing and compilation\n- `packages/coding-agent/src/config/settings-schema.ts` -- `secrets.enabled` setting definition\n\n## See also\n\n- [`auth-broker-gateway.md`](./auth-broker-gateway.md) -- remote credential vault and forward-proxy that keep provider OAuth refresh tokens and access tokens off developer hosts entirely (complementary to in-process obfuscation).\n",
|
|
57
|
+
"session-operations-export-share-fork-resume.md": "# Session Operations: export, dump, share, fresh, fork, resume/continue\n\nThis document describes operator-visible behavior for session export/share/fork/resume operations as currently implemented.\n\n## Implementation files\n\n- [`../src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/export/html/index.ts`](../packages/coding-agent/src/export/html/index.ts)\n- [`../src/export/custom-share.ts`](../packages/coding-agent/src/export/custom-share.ts)\n- [`../src/main.ts`](../packages/coding-agent/src/main.ts)\n\n## Operation matrix\n\n| Operation | Entry path | Session mutation | Session file creation/switch | Output artifact |\n| --------------------------------------- | ------------------------- | ------------------------------------- | ---------------------------------------------------------------------------------- | --------------------------------------------------------------- |\n| `/dump` | Interactive slash command | No | No | Clipboard text |\n| `/export [path]` | Interactive slash command | No | No | HTML file |\n| `--export <session.jsonl> [outputPath]` | CLI startup fast-path | No runtime session mutation | No active session; reads target file | HTML file |\n| `/share` | Interactive slash command | No | No | Temp HTML + share URL/gist |\n| `/fresh` | Interactive slash command | Yes (provider-facing in-memory id/state only) | No; keeps current session file/header | None |\n| `/fork` | Interactive slash command | Yes (active session identity changes) | Creates new session file and switches current session to it (persistent mode only) | Copies artifact directory to new session namespace when present |\n| `--fork <id\\|path>` | CLI startup | Yes after session creation | Creates a new session fork from the selected source into current cwd/session dir | None |\n| `/resume` | Interactive slash command | Yes (active in-memory state replaced) | Switches to selected existing session file | None |\n| `--resume` | CLI startup picker | Yes after session creation | Opens selected existing session file | None |\n| `--resume <id\\|path>` | CLI startup | Yes after session creation | Opens existing session; global cross-project match re-roots (moved dir) or forks into current project | None |\n| `--continue` | CLI startup | Yes after session creation | Opens terminal breadcrumb (re-roots it if its dir was moved) or most-recent session; creates new one if none exists | None |\n\n## Export and dump\n\n### `/export [outputPath]` (interactive)\n\nFlow:\n\n1. `InputController` routes `/export...` to `CommandController.handleExportCommand`.\n2. The command splits on whitespace and uses only the first argument after `/export` as `outputPath`.\n3. `AgentSession.exportToHtml()` calls `exportSessionToHtml(sessionManager, state, { outputPath, themeName })`.\n4. On success, UI shows path and opens the file in browser.\n\nBehavior details:\n\n- `--copy`, `clipboard`, and `copy` arguments are explicitly rejected with a warning to use `/dump`.\n- Export embeds session header/entries/leaf plus current `systemPrompt` and tool descriptions from agent state.\n- No session entries are appended during export.\n\nCaveat:\n\n- Argument parsing is whitespace-based (`text.split(/\\s+/)`), so quoted paths with spaces are not preserved as a single path by this command path.\n\n### `--export <inputSessionFile> [outputPath]` (CLI)\n\nFlow in `main.ts`:\n\n1. Handled early (before interactive/session startup).\n2. Calls `exportFromFile(inputPath, outputPath?)`.\n3. `SessionManager.open(inputPath)` loads entries, then HTML is generated and written.\n4. Process prints `Exported to: ...` and exits.\n\nBehavior details:\n\n- Missing input file surfaces as `File not found: <path>`.\n- This path does not create an `AgentSession` and does not mutate any running session.\n\n### `/dump` (interactive clipboard export)\n\nFlow:\n\n1. `CommandController.handleDumpCommand()` calls `session.formatSessionAsText()`.\n2. If empty string, reports `No messages to dump yet.`\n3. Otherwise copies to clipboard via native `copyToClipboard`.\n\nDump content includes:\n\n- System prompt\n- Active model/thinking level\n- Tool definitions + parameters\n- User/assistant messages\n- Thinking blocks and tool calls\n- Tool results and execution blocks (except `excludeFromContext` bash/python entries)\n- Custom/hook/file mention/branch summary/compaction summary entries\n\nNo session persistence changes are made by dumping.\n\n## Share\n\n`/share` is interactive-only and always starts by exporting current session to a temp HTML file.\n\n### Phase 1: temp export\n\n- Temp file path: `${os.tmpdir()}/${Snowflake.next()}.html`\n- Uses `session.exportToHtml(tmpFile)`\n- If export fails (notably in-memory sessions), share ends with error.\n\n### Phase 2: custom share handler (if present)\n\n`loadCustomShare()` checks `~/.omp/agent` for first existing candidate:\n\n- `share.ts`\n- `share.js`\n- `share.mjs`\n\nRequirements:\n\n- Module must default-export a function `(htmlPath) => Promise<CustomShareResult | string | undefined>`.\n\nIf present and valid:\n\n- UI enters `Sharing...` loader state.\n- Handler result interpretation:\n - string => treated as URL, shown and opened\n - object => `url` and/or `message` shown; `url` opened\n - `undefined`/falsy => generic `Session shared`\n- Temp file is removed after completion.\n\nCritical fallback behavior:\n\n- If custom handler exists but loading fails, command errors and returns.\n- If custom handler executes and throws, command errors and returns.\n- In both failure cases, it **does not** fall back to GitHub gist.\n- Gist fallback happens only when no custom share script exists.\n\n### Phase 3: default gist fallback\n\nOnly when no custom share handler is found:\n\n1. Validates `gh auth status`.\n2. Shows `Creating gist...` loader.\n3. Runs `gh gist create --public=false <tmpFile>`.\n4. Parses gist URL, derives gist id, builds preview URL `https://gistpreview.github.io/?<id>`.\n5. Shows both preview and gist URLs; opens preview.\n\nCancellation/abort semantics in share:\n\n- Loader has `onAbort` hook that restores editor UI and reports `Share cancelled`.\n- The underlying `gh gist create` command is not passed an abort signal in this code path; cancellation is UI-level and checked after command returns.\n\n## Fork\n\nInteractive `/fork` creates a new session from the current one and switches the active session identity.\n\n### Preconditions and immediate guards\n\n- If agent is streaming, `/fork` is rejected with warning.\n- UI status/loading indicators are cleared before operation.\n\n### Session-level flow\n\n`AgentSession.fork()`:\n\n1. Emits `session_before_switch` with `reason: \"fork\"` (cancellable).\n2. Flushes pending writes.\n3. Calls `SessionManager.fork()`.\n4. Copies artifacts directory from old session namespace to new namespace (best-effort; non-ENOENT copy failures are logged, not fatal).\n5. Updates `agent.sessionId`.\n6. Emits `session_switch` with `reason: \"fork\"`.\n\n`SessionManager.fork()` behavior:\n\n- Requires persistent mode and existing session file.\n- Creates new session id and new JSONL file path.\n- Rewrites header with:\n - new `id`\n - new timestamp\n - `cwd` unchanged\n - `parentSession` set to previous session id\n- Keeps all non-header entries unchanged in the new file.\n\n### Non-persistent behavior\n\n- In-memory session manager returns `undefined` from `fork()`.\n- `AgentSession.fork()` returns `false`.\n- UI reports `Fork failed (session not persisted or cancelled)`.\n\n### CLI `--fork <id|path>`\n\nStartup `--fork` is resolved before normal session creation:\n\n1. `--fork` is rejected with `--no-session`.\n2. Path-like values (`/`, `\\`, or `.jsonl`) call `SessionManager.forkFrom(path, cwd, sessionDir)`.\n3. Other values resolve via `resolveResumableSession(...)`: local sessions first, then global search when `sessionDir` is not forced. Matching accepts lowercased session id prefixes, full JSONL filename prefixes, and timestamp-stripped filename id suffixes.\n4. The forked file is created in the current cwd/session-dir scope and becomes the active session manager for startup.\n\n## Resume and continue\n\n## Interactive `/resume`\n\nFlow:\n\n1. Opens session selector populated via `SessionManager.list(currentCwd, currentSessionDir)`.\n2. On selection, `SelectorController.handleResumeSession(sessionPath)` calls `session.switchSession(sessionPath)`.\n3. UI clears/rebuilds chat and todos, then reports `Resumed session`.\n\nNotes:\n\n- This picker only lists sessions in the current session directory scope.\n- It does not use global cross-project search.\n\n## CLI `--resume`\n\n### `--resume` (no value)\n\n- `main.ts` lists sessions for current cwd/sessionDir and opens picker.\n- Selected path is opened with `SessionManager.open(selectedPath)` before session creation.\n\n### `--resume <value>`\n\n`createSessionManager()` resolution order:\n\n1. If value looks like path (`/`, `\\`, or `.jsonl`), open directly.\n2. Else `resolveResumableSession(...)` searches:\n - current scope (`SessionManager.list(cwd, sessionDir)`)\n - global sessions (`SessionManager.listAll()`) only when no explicit `sessionDir` was provided\n3. Matching accepts case-insensitive session id prefixes, full JSONL filename prefixes, and the id suffix after the timestamp in `<timestamp>_<sessionId>.jsonl`.\n\nCross-project id match behavior:\n\n- If matched session cwd differs from current cwd, behavior depends on whether the matched session's recorded directory still exists:\n - **Directory gone (moved/renamed, e.g. `git worktree move`)**: CLI asks `Session's directory no longer exists (...). Move (re-root) it into the current directory? [Y/n]`.\n - On yes (default): `SessionManager.open(match.path)` then `manager.moveTo(cwd)` re-roots the existing session into the current directory (no duplicate file).\n - On no: command cancels (returns no session). On non-TTY: command errors.\n - **Directory still exists (genuinely different project)**: CLI asks `Session found in different project ... Fork into current directory? [y/N]`.\n - On yes: `SessionManager.forkFrom(match.path, cwd, sessionDir)` creates a new local forked file.\n - On no: command cancels. On non-TTY: command errors.\n\n## CLI `--continue`\n\n`SessionManager.continueRecent(cwd, sessionDir)`:\n\n1. Resolves session dir for current cwd.\n2. Reads the terminal-scoped breadcrumb.\n3. If the breadcrumb points at a session recorded under a different cwd whose directory no longer exists (moved/renamed) **and** the current directory has no sessions of its own, re-roots that session into the current directory via `moveTo` instead of starting fresh.\n4. Otherwise, if the breadcrumb's cwd matches the current cwd, uses the breadcrumb session; else falls back to the most recently modified session file.\n5. Opens the found session; if none exists, creates a new session.\n\nThis is startup-only behavior; there is no interactive `/continue` slash command.\n\n## How session switching actually mutates runtime state\n\n`AgentSession.switchSession(sessionPath)` does the runtime transition used by resume-like operations:\n\n1. Emit `session_before_switch` with `reason: \"resume\"` and `targetSessionFile` (cancellable).\n2. Disconnect agent event subscription and abort in-flight work.\n3. Flush current session manager writes.\n4. Capture rollback state for the current session, agent messages, queued steering/follow-up/next-turn messages, model/thinking/service-tier, MCP selections, tools, and system prompt.\n5. Clear queued steering/follow-up/next-turn messages.\n6. `sessionManager.setSessionFile(sessionPath)` and update `agent.sessionId`.\n7. Build session context from loaded entries.\n8. Restore MCP selections/tools/system prompt for the target session.\n9. Emit `session_switch` with `reason: \"resume\"`.\n10. Replace agent messages from context and sync todos.\n11. Close provider sessions when switching files, or when same-file reload changed replay messages.\n12. Restore model (if available in current registry).\n13. Restore or initialize thinking level and service tier.\n14. Reconnect agent event subscription.\n\nIf any step after the capture fails, `switchSession()` restores the captured state and reconnects the previous agent subscription before rethrowing.\n\nNo new session file is created by `switchSession()` itself.\n\n## Event emissions and cancellation points\n\n### Switch/fork lifecycle hooks\n\nFor `newSession`, `fork`, and `switchSession`:\n\n- Before event: `session_before_switch`\n - reasons: `new`, `fork`, `resume`\n - cancellable by returning `{ cancel: true }`\n- After event: `session_switch`\n - same reason set\n - includes `previousSessionFile`\n\n`ExtensionRunner.emit()` returns early on the first cancelling before-event result.\n\n### Custom tool `onSession` behavior\n\nSDK bridges extension session events to custom tool `onSession` callbacks:\n\n- `session_switch` -> `onSession({ reason: \"switch\", previousSessionFile })`\n- `session_branch` -> `reason: \"branch\"`\n- `session_start` -> `reason: \"start\"`\n- `session_tree` -> `reason: \"tree\"`\n- `session_shutdown` -> `reason: \"shutdown\"`\n\nThese callbacks are observational; they do not cancel switch/fork.\n\n### Other cancellation surfaces relevant to this doc\n\n- `/fork` is blocked while streaming (user must wait/abort current response first).\n- `/resume` selector can be cancelled by user closing selector.\n- Cross-project `--resume <id>` can be cancelled by declining fork prompt.\n- `/share` has UI abort path (`Share cancelled`) for gist flow; it does not wire process-kill semantics for `gh gist create` in this code path.\n\n## Non-persistent (in-memory) session behavior\n\nWhen session manager is created with `SessionManager.inMemory()` (`--no-session`):\n\n- Session file path is absent.\n- `/export` and `/share` fail with `Cannot export in-memory session to HTML` (propagated to command error UI).\n- `/fork` fails because `SessionManager.fork()` requires persistence.\n- `/dump` still works because it serializes in-memory agent state.\n- CLI resume/continue semantics are bypassed if `--no-session` is set, because manager creation returns in-memory immediately.\n\n## Known implementation caveats (as of current code)\n\n- `SelectorController.handleResumeSession()` does not check the boolean result from `session.switchSession(...)`; a hook-cancelled switch can still proceed through UI \"Resumed session\" repaint/status path.\n- `/share` custom-share failures do not degrade to default gist fallback; they terminate the command with error.\n- `/export` argument tokenization is simplistic and does not preserve quoted paths with spaces.\n",
|
|
58
|
+
"session-switching-and-recent-listing.md": "# Session switching and recent session listing\n\nThis document describes how coding-agent discovers recent sessions, resolves `--resume` targets, presents session pickers, and switches the active runtime session.\n\nIt focuses on current implementation behavior, including fallback paths and caveats.\n\n## Implementation files\n\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/cli/session-picker.ts`](../packages/coding-agent/src/cli/session-picker.ts)\n- [`../src/modes/components/session-selector.ts`](../packages/coding-agent/src/modes/components/session-selector.ts)\n- [`../src/modes/controllers/selector-controller.ts`](../packages/coding-agent/src/modes/controllers/selector-controller.ts)\n- [`../src/main.ts`](../packages/coding-agent/src/main.ts)\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/modes/interactive-mode.ts`](../packages/coding-agent/src/modes/interactive-mode.ts)\n- [`../src/modes/utils/ui-helpers.ts`](../packages/coding-agent/src/modes/utils/ui-helpers.ts)\n\n## Recent-session discovery\n\n### Directory scope\n\n`SessionManager` stores sessions under a cwd-scoped directory by default:\n\n- `~/.omp/agent/sessions/--<cwd-encoded>--/*.jsonl`\n\n`SessionManager.list(cwd, sessionDir?)` reads only that directory unless an explicit `sessionDir` is provided.\n\n### Two listing paths with different payloads\n\nThere are two different listing pipelines:\n\n1. `getRecentSessions(sessionDir, limit)` (welcome/summary view)\n - Reads only a 4KB prefix (`readTextSlices(..., 4096, 0)[0]`) from each file.\n - Parses header + earliest user text preview.\n - Returns lightweight `RecentSessionInfo` with lazy `name` and `timeAgo` getters.\n - Sorts by file `mtime` descending.\n\n2. `SessionManager.list(...)` / `SessionManager.listAll()` (resume pickers and ID matching)\n - Reads a 4KB prefix plus a bounded 32 KiB tail in one `readTextSlices(...)` call per file, not the full JSONL file.\n - Builds `SessionInfo` objects (`id`, `cwd`, `title`, `messageCount`, `firstMessage`, `allMessagesText`, timestamps, lifecycle status).\n - Uses prefix parsing plus marker counting for list text, and tail parsing for the final-message lifecycle status; later messages beyond the prefix may not be present in `allMessagesText`.\n - Sorts by `modified` descending.\n\n### Metadata fallback behavior\n\nFor recent summaries (`RecentSessionInfo`):\n\n- display name preference: `header.title` -> first user prompt -> `header.id` -> filename\n- name is truncated to 40 chars for compact displays\n- control characters/newlines are stripped/sanitized from title-derived names\n\nFor `SessionInfo` list entries:\n\n- `title` is `header.title` or the last compaction `shortSummary` seen in the 4KB prefix\n- `firstMessage` is first user message text discoverable from the prefix or `\"(no messages)\"`\n\n## `--continue` resolution and terminal breadcrumb preference\n\n`SessionManager.continueRecent(cwd, sessionDir?)` resolves the target in this order:\n\n1. Read terminal-scoped breadcrumb (`~/.omp/agent/terminal-sessions/<terminal-id>`)\n2. Validate breadcrumb:\n - current terminal can be identified\n - breadcrumb cwd matches current cwd (resolved path compare)\n - referenced file still exists\n3. If breadcrumb is invalid/missing, fall back to newest file by mtime in the session dir (`findMostRecentSession`)\n4. If none found, create a new session\n\nTerminal ID derivation prefers TTY path and falls back to env-based identifiers (`TMUX_PANE`, `CMUX_SURFACE_ID`, `KITTY_WINDOW_ID`, `TERM_SESSION_ID`, `WT_SESSION`).\n\nBreadcrumb writes are best-effort and non-fatal.\n\n## Startup-time resume target resolution (`main.ts`)\n\n### `--resume <value>`\n\n`createSessionManager(...)` handles string-valued `--resume` in two modes:\n\n1. Path-like value (contains `/`, `\\\\`, or ends with `.jsonl`)\n - direct `SessionManager.open(sessionArg, parsed.sessionDir)`\n\n2. Resume key value\n - `resolveResumableSession(...)` searches local sessions first, then all sessions when `sessionDir` is not forced\n - matching is case-insensitive and accepts `id` prefix, full JSONL filename prefix, or the session-id suffix after the timestamp\n - first match in modified-descending order is used (no ambiguity prompt)\n\nCross-project match behavior:\n\n- if matched session cwd differs from current cwd, CLI prompts whether to fork into current project\n- yes -> `SessionManager.forkFrom(...)`\n- no -> throws error (`Session \"...\" is in another project (...)`)\n\nNo match -> throws error (`Session \"...\" not found.`).\n\n### `--resume` (no value)\n\nHandled after initial session-manager construction:\n\n1. list local sessions with `SessionManager.list(cwd, parsed.sessionDir)`\n2. if empty: print `No sessions found` and exit early\n3. open TUI picker (`selectSession`)\n4. if canceled: print `No session selected` and exit early\n5. if selected: `SessionManager.open(selectedPath)`\n\n### `--continue`\n\nUses `SessionManager.continueRecent(...)` directly (breadcrumb-first behavior above).\n\n## Picker-based selection internals\n\n## CLI picker (`src/cli/session-picker.ts`)\n\n`selectSession(sessions)` creates a standalone TUI with `SessionSelectorComponent` and resolves exactly once:\n\n- selection -> resolves selected path\n- cancel (Esc) -> resolves `null`\n- hard exit (Ctrl+C path) -> stops TUI and `process.exit(0)`\n\n## Interactive in-session picker (`SelectorController.showSessionSelector`)\n\nFlow:\n\n1. fetch sessions from current session dir via `SessionManager.list(currentCwd, currentSessionDir)`\n2. mount `SessionSelectorComponent` in editor area using `showSelector(...)`\n3. callbacks:\n - select -> close selector and call `handleResumeSession(sessionPath)`\n - cancel -> restore editor and rerender\n - exit -> `ctx.shutdown()`\n\n## Session selector component behavior\n\n`SessionList` supports:\n\n- arrow/page navigation\n- Enter to select\n- Delete to delete after confirmation\n- Esc to cancel\n- Ctrl+C to exit\n- fuzzy search across session id/title/cwd/first message/all messages/path\n\nEmpty-list render behavior:\n\n- renders `No sessions in current folder. Press Tab to view all.`\n- Enter/Delete on empty do nothing (no callback)\n- Esc/Ctrl+C still work\n\nCaveat: the empty-state UI mentions Tab, but this component currently has no Tab handler and current wiring only lists current-scope sessions.\n\n## Runtime switch execution (`AgentSession.switchSession`)\n\n`switchSession(sessionPath)` is the core in-process switch path.\n\nLifecycle/state transition:\n\n1. capture `previousSessionFile`\n2. emit `session_before_switch` hook event (`reason: \"resume\"`, cancellable)\n3. if canceled -> return `false` with no switch\n4. disconnect from current agent event stream\n5. abort active generation/tool flow\n6. clear queued steering/follow-up/next-turn message buffers\n7. flush session writer (`sessionManager.flush()`) to persist pending writes\n8. `sessionManager.setSessionFile(sessionPath)`\n - updates session file pointer\n - writes terminal breadcrumb\n - loads entries / migrates / blob-resolves / reindexes\n - if missing/invalid file data: initializes a new session at that path and rewrites header\n9. update `agent.sessionId`\n10. rebuild display context via `buildDisplaySessionContext()`\n11. restore persisted/discovered MCP tool selections and rebuild active tools/system prompt when discovery is enabled\n12. emit `session_switch` hook event (`reason: \"resume\"`, `previousSessionFile`)\n13. replace agent messages with rebuilt context and sync todos\n14. close provider sessions when switching to a different session or when same-session reload changed replay messages\n15. restore default model from `sessionContext.models.default` if available and present in model registry\n16. restore thinking level and service tier:\n - thinking uses persisted `thinking_level_change`, otherwise the configured default clamped to model capability\n - service tier uses persisted `service_tier_change`, otherwise the configured `serviceTier` setting (`\"none\"` becomes unset)\n17. reconnect agent listeners and return `true`\n\n## UI state rebuild after interactive switch\n\n`SelectorController.handleResumeSession` performs UI reset around `switchSession`:\n\n- stop loading animation\n- clear status container\n- clear pending-message UI and pending tool map\n- reset streaming component/message references\n- call `session.switchSession(...)`\n- clear chat container and rerender from session context (`renderInitialMessages`)\n- reload todos from new session artifacts\n- show `Resumed session`\n\nSo visible conversation/todo state is rebuilt from the new session file.\n\n## Startup resume vs in-session switch\n\n### Startup resume (`--continue`, `--resume`, direct open)\n\n- Session file is chosen before `createAgentSession(...)`.\n- `sdk.ts` builds `existingSession = sessionManager.buildSessionContext()`.\n- Agent messages are restored once during session creation.\n- Model/thinking are selected during creation (including restore/fallback logic).\n- Interactive mode then runs `#restoreModeFromSession()` to re-enter persisted mode state (currently plan/plan_paused).\n\n### In-session switch (`/resume`-style selector path)\n\n- Uses `AgentSession.switchSession(...)` on an already-running `AgentSession`.\n- Messages/model/thinking are rebuilt immediately in place.\n- Hook `session_before_switch`/`session_switch` events are emitted.\n- UI chat/todos are refreshed.\n- No dedicated post-switch mode restore call is made in selector flow; mode re-entry behavior is not symmetric with startup `#restoreModeFromSession()`.\n\n## Failure and edge-case behavior\n\n### Cancellation paths\n\n- CLI picker cancel -> returns `null`, caller prints `No session selected`, process exits early.\n- Interactive picker cancel -> editor restored, no session change.\n- Hook cancellation (`session_before_switch`) -> `switchSession()` returns `false`.\n\n### Empty list paths\n\n- CLI `--resume` (no value): empty list prints `No sessions found` and exits.\n- Interactive selector: empty list renders message and remains cancellable.\n\n### Missing/invalid target session file\n\nWhen opening/switching to a specific path (`setSessionFile`):\n\n- ENOENT -> treated as empty -> new session initialized at that exact path and persisted.\n- malformed/invalid header (or effectively unreadable parsed entries) -> treated as empty -> new session initialized and persisted.\n\nThis is recovery behavior, not hard failure.\n\n### Hard failures\n\nSwitch/open can still throw on true I/O failures (permission errors, rewrite failures, etc.), which propagate to callers.\n\n### ID prefix matching caveats\n\n- Matching uses `startsWith` on the lowercased session id, lowercased JSONL filename, and lowercased id suffix after the filename timestamp.\n- First match in modified-descending order wins; there is no ambiguity UI if multiple sessions share a prefix.\n- Prefix-listing metadata is intentionally lightweight, so search text may not include messages outside the first 4KB of the session file.\n",
|
|
59
|
+
"session-tree-plan.md": "# Session tree architecture (current)\n\nReference: [session.md](../docs/session.md)\n\nThis document describes how session tree navigation works today: in-memory tree model, leaf movement rules, branching behavior, and extension/event integration.\n\n## What this subsystem is\n\nThe session is stored as an append-only entry log, but runtime behavior is tree-based:\n\n- Every non-header entry has `id` and `parentId`.\n- The active position is `leafId` in `SessionManager`.\n- Appending an entry always creates a child of the current leaf.\n- Branching does **not** rewrite history; it only changes where the leaf points before the next append.\n\nKey files:\n\n- `src/session/session-manager.ts` — tree data model, traversal, leaf movement, branch/session extraction\n- `src/session/agent-session.ts` — `/tree` navigation flow, summarization, hook/event emission\n- `src/modes/components/tree-selector.ts` — interactive tree UI behavior and filtering\n- `src/modes/controllers/selector-controller.ts` — selector orchestration for `/tree` and `/branch`\n- `src/modes/controllers/input-controller.ts` — command routing (`/tree`, `/branch`, double-escape behavior)\n- `src/session/messages.ts` — conversion of `branch_summary`, `compaction`, and `custom_message` entries into LLM context messages\n\n## Tree data model in `SessionManager`\n\nRuntime indices:\n\n- `#byId: Map<string, SessionEntry>` — fast lookup for any entry\n- `#leafId: string | null` — current position in the tree\n- `#labelsById: Map<string, string>` — resolved labels by target entry id\n\nTree APIs:\n\n- `getBranch(fromId?)` walks parent links to root and returns root→node path\n- `getTree()` returns `SessionTreeNode[]` (`entry`, `children`, `label`)\n - parent links become children arrays\n - entries with missing parents are treated as roots\n - children are sorted oldest→newest by timestamp\n- `getChildren(parentId)` returns direct children\n- `getLabel(id)` resolves current label from `labelsById`\n\n`getTree()` is a runtime projection; persistence remains append-only JSONL entries.\n\n## Leaf movement semantics\n\nThere are three leaf movement primitives:\n\n1. `branch(entryId)`\n - Validates entry exists\n - Sets `leafId = entryId`\n - No new entry is written\n\n2. `resetLeaf()`\n - Sets `leafId = null`\n - Next append creates a new root entry (`parentId = null`)\n\n3. `branchWithSummary(branchFromId, summary, details?, fromExtension?)`\n - Accepts `branchFromId: string | null`\n - Sets `leafId = branchFromId`\n - Appends a `branch_summary` entry as child of that leaf\n - When `branchFromId` is `null`, `fromId` is persisted as `\"root\"`\n\n## `/tree` navigation behavior (same session file)\n\n`AgentSession.navigateTree()` is navigation, not file forking.\n\nFlow:\n\n1. Validate target and compute abandoned path (`collectEntriesForBranchSummary`)\n2. Emit `session_before_tree` with `TreePreparation`\n3. Optionally summarize abandoned entries (hook-provided summary or built-in summarizer)\n4. Compute new leaf target:\n - selecting a **user** message: leaf moves to its parent, and message text is returned for editor prefill\n - selecting a **custom_message**: same rule as user message (leaf = parent, text prefills editor)\n - selecting any other entry: leaf = selected entry id\n5. Apply leaf move:\n - with summary: `branchWithSummary(newLeafId, ...)`\n - without summary and `newLeafId === null`: `resetLeaf()`\n - otherwise: `branch(newLeafId)`\n6. Rebuild agent context from new leaf and emit `session_tree`\n\nImportant: summary entries are attached at the **new navigation position**, not on the abandoned branch tail.\n\n## `/branch` behavior (new session file)\n\n`/branch` and `/tree` are intentionally different:\n\n- `/tree` navigates within the current session file.\n- `/branch` creates a new session branch file (or in-memory replacement for non-persistent mode).\n\nUser-facing `/branch` flow (`SelectorController.showUserMessageSelector` → `AgentSession.branch`):\n\n- Branch source must be a **user message**.\n- Selected user text is extracted for editor prefill.\n- If selected user message is root (`parentId === null`): start a new session via `newSession({ parentSession: previousSessionFile })`.\n- Otherwise: `createBranchedSession(selectedEntry.parentId)` to fork history up to the selected prompt boundary.\n\n`SessionManager.createBranchedSession(leafId)` specifics:\n\n- Builds root→leaf path via `getBranch(leafId)`; throws if missing.\n- Excludes existing `label` entries from copied path.\n- Rebuilds fresh label entries from resolved `labelsById` for entries that remain in path.\n- Persistent mode: writes new JSONL file and switches manager to it; returns new file path.\n- In-memory mode: replaces in-memory entries; returns `undefined`.\n\n## Context reconstruction and summary/custom integration\n\n`buildSessionContext()` (in `session-manager.ts`) resolves the active root→leaf path and builds effective LLM context state:\n\n- Tracks latest thinking/model/service-tier/mode/TTSR/MCP-selection state on path.\n- Handles latest compaction on path:\n - emits compaction summary first\n - replays kept messages from `firstKeptEntryId` to compaction point\n - then replays post-compaction messages\n- Includes `branch_summary` and `custom_message` entries as `AgentMessage` objects.\n\n`session/messages.ts` then maps these message types for model input:\n\n- `branchSummary` and `compactionSummary` become user-role templated context messages\n- `custom`/`hookMessage` become user-role content messages\n\nSo tree movement changes context by changing the active leaf path, not by mutating old entries.\n\n## Labels and tree UI behavior\n\nLabel persistence:\n\n- `appendLabelChange(targetId, label?)` writes `label` entries on the current leaf chain.\n- `labelsById` is updated immediately (set or delete).\n- `getTree()` resolves current label onto each returned node.\n\nTree selector behavior (`tree-selector.ts`):\n\n- Flattens tree for navigation, keeps active-path highlighting, and prioritizes displaying the active branch first.\n- Supports filter modes: `default`, `no-tools`, `user-only`, `labeled-only`, `all`.\n - `default` suppresses `label`, `custom`, `model_change`, and `thinking_level_change`; it is not a complete \"hide all internal entries\" filter.\n- Supports free-text search over rendered semantic content.\n- `Shift+L` opens inline label editing and writes via `appendLabelChange`.\n\nCommand routing:\n\n- `/tree` always opens tree selector.\n- `/branch` opens user-message selector unless `doubleEscapeAction=tree`, in which case it also uses tree selector UX.\n\n## Extension and hook touchpoints for tree operations\n\nCommand-time extension API (`ExtensionCommandContext`):\n\n- `branch(entryId)` — create branched session file\n- `navigateTree(targetId, { summarize? })` — move within current tree/file\n\nEvents around tree navigation:\n\n- `session_before_tree`\n - receives `TreePreparation`:\n - `targetId`\n - `oldLeafId`\n - `commonAncestorId`\n - `entriesToSummarize`\n - `userWantsSummary`\n - may cancel navigation\n - may provide summary payload used instead of built-in summarizer\n - receives abort `signal` (Escape cancellation path)\n- `session_tree`\n - emits `newLeafId`, `oldLeafId`\n - includes `summaryEntry` when a summary was created\n - `fromExtension` indicates summary origin\n\nAdjacent but related lifecycle hooks:\n\n- `session_before_branch` / `session_branch` for `/branch` flow\n- `session_before_compact`, `session.compacting`, `session_compact` for compaction entries that later affect tree-context reconstruction\n\n## Real constraints and edge conditions\n\n- `branch()` cannot target `null`; use `resetLeaf()` for root-before-first-entry state.\n- `branchWithSummary()` supports `null` target and records `fromId: \"root\"`.\n- Selecting current leaf in tree selector is a no-op.\n- Summarization requires an active model; if absent, summarize navigation fails fast.\n- If summarization is aborted, navigation is cancelled and leaf is unchanged.\n- In-memory sessions never return a branch file path from `createBranchedSession`.\n- Tree context reconstruction includes service-tier and MCP tool-selection state, but those entries do not become LLM messages.\n\n## Plan approval session naming\n\nWhen a user approves a plan from plan mode (`InteractiveMode.#approvePlan`), the approval handler seeds the session name from the plan's title so the resulting (fresh or compacted) session does not stay unnamed.\n\nTrigger:\n\n- Plan approval reaches `#approvePlan(...)` with `options.title` populated from the plan-approval details.\n- This runs for every approval choice (`Approve and execute`, `Approve and compact context`, plain `Approve`); the synthetic `plan-approved` prompt is what otherwise bypasses the input-controller's title-generation path.\n\nNaming source:\n\n- The normalized plan title is humanized via `humanizePlanTitle(title)` (`packages/coding-agent/src/plan-mode/approved-plan.ts`):\n - replaces runs of `-`/`_` with a single space\n - trims whitespace\n - capitalizes the first character\n - returns `\"\"` for whitespace-only / separator-only input\n- The humanized name is applied only when the current session has no name (`!sessionManager.getSessionName()`). It then calls `sessionManager.setSessionName(name, \"auto\")`, which also refuses to overwrite user-named sessions.\n- On successful apply, the terminal title (`setSessionTerminalTitle`) and the editor border color are refreshed to reflect the new name.\n\nExamples (from `humanizePlanTitle`):\n\n- `migrate-mcp-loader` → `Migrate mcp loader`\n- `fix_session_naming` → `Fix session naming`\n- `foo--bar__baz` → `Foo bar baz`\n- `RefactorRouter` → `RefactorRouter` (no separators to expand)\n- `\"\"` / `\"---\"` → `\"\"` (no name applied)\n\n## Legacy compatibility still present\n\nSession migrations still run on load:\n\n- v1→v2 adds `id`/`parentId` and converts compaction index anchor to id anchor\n- v2→v3 migrates legacy `hookMessage` role to `custom`\n\nCurrent runtime behavior is version-3 tree semantics after migration.\n",
|
|
60
|
+
"session.md": "# Session Storage and Entry Model\n\nThis document is the source of truth for how coding-agent sessions are represented, persisted, migrated, and reconstructed at runtime.\n\n## Scope\n\nCovers:\n\n- Session JSONL format and versioning\n- Entry taxonomy and tree semantics (`id`/`parentId` + leaf pointer)\n- Migration/compatibility behavior when loading old or malformed files\n- Context reconstruction (`buildSessionContext`)\n- Persistence guarantees, failure behavior, truncation/blob externalization\n- Storage abstractions (`FileSessionStorage`, `MemorySessionStorage`) and related utilities\n\nDoes not cover `/tree` UI rendering behavior beyond semantics that affect session data.\n\n## Implementation Files\n\n- [`src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`src/session/messages.ts`](../packages/coding-agent/src/session/messages.ts)\n- [`src/session/session-storage.ts`](../packages/coding-agent/src/session/session-storage.ts)\n- [`src/session/history-storage.ts`](../packages/coding-agent/src/session/history-storage.ts)\n- [`src/session/blob-store.ts`](../packages/coding-agent/src/session/blob-store.ts)\n\n## On-Disk Layout\n\nDefault session file location:\n\n```text\n~/.omp/agent/sessions/--<cwd-encoded>--/<timestamp>_<sessionId>.jsonl\n```\n\n`<cwd-encoded>` is derived from the working directory by stripping leading slash and replacing `/`, `\\\\`, and `:` with `-`.\n\nBlob store location:\n\n```text\n~/.omp/agent/blobs/<sha256>\n```\n\nTerminal breadcrumb files are written under:\n\n```text\n~/.omp/agent/terminal-sessions/<terminal-id>\n```\n\nBreadcrumb content is two lines: original cwd, then session file path. `continueRecent()` prefers this terminal-scoped pointer before scanning most-recent mtime.\n\n## File Format\n\nSession files are JSONL: one JSON object per line.\n\n- Line 1 is always the session header (`type: \"session\"`).\n- Remaining lines are `SessionEntry` values.\n- Entries are append-only at runtime; branch navigation moves a pointer (`leafId`) rather than mutating existing entries.\n\n### Header (`SessionHeader`)\n\n```json\n{\n \"type\": \"session\",\n \"version\": 3,\n \"id\": \"1f9d2a6b9c0d1234\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\",\n \"cwd\": \"/work/pi\",\n \"title\": \"optional session title\",\n \"titleSource\": \"auto\",\n \"parentSession\": \"optional lineage marker\"\n}\n```\n\nNotes:\n\n- `version` is optional in v1 files; absence means v1.\n- `parentSession` is an opaque lineage string. Current code writes either a session id or a session path depending on flow (`fork`, `forkFrom`, `createBranchedSession`, or explicit `newSession({ parentSession })`). Treat as metadata, not a typed foreign key.\n\n### Entry Base (`SessionEntryBase`)\n\nAll non-header entries include:\n\n```json\n{\n \"type\": \"...\",\n \"id\": \"8-char-id\",\n \"parentId\": \"previous-or-branch-parent\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\"\n}\n```\n\n`parentId` can be `null` for a root entry (first append, or after `resetLeaf()`).\n\n## Entry Taxonomy\n\n`SessionEntry` is the union of:\n\n- `message`\n- `thinking_level_change`\n- `model_change`\n- `service_tier_change`\n- `compaction`\n- `branch_summary`\n- `custom`\n- `custom_message`\n- `label`\n- `ttsr_injection`\n- `session_init`\n- `mode_change`\n- `mcp_tool_selection`\n\n### `message`\n\nStores an `AgentMessage` directly.\n\n```json\n{\n \"type\": \"message\",\n \"id\": \"a1b2c3d4\",\n \"parentId\": null,\n \"timestamp\": \"2026-02-16T10:21:00.000Z\",\n \"message\": {\n \"role\": \"assistant\",\n \"provider\": \"anthropic\",\n \"model\": \"claude-sonnet-4-5\",\n \"content\": [{ \"type\": \"text\", \"text\": \"Done.\" }],\n \"usage\": {\n \"input\": 100,\n \"output\": 20,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"cost\": {\n \"input\": 0,\n \"output\": 0,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"total\": 0\n }\n },\n \"timestamp\": 1760000000000\n }\n}\n```\n\n### `model_change`\n\n```json\n{\n \"type\": \"model_change\",\n \"id\": \"b1c2d3e4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:21:30.000Z\",\n \"model\": \"openai/gpt-4o\",\n \"role\": \"default\"\n}\n```\n\n`role` is optional; missing is treated as `default` in context reconstruction.\n\n### `service_tier_change`\n\n```json\n{\n \"type\": \"service_tier_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:21:45.000Z\",\n \"serviceTier\": \"flex\"\n}\n```\n\n`serviceTier` can also be `null`.\n\n### `thinking_level_change`\n\n```json\n{\n \"type\": \"thinking_level_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:22:00.000Z\",\n \"thinkingLevel\": \"high\"\n}\n```\n\n### `compaction`\n\n```json\n{\n \"type\": \"compaction\",\n \"id\": \"d1e2f3a4\",\n \"parentId\": \"c1d2e3f4\",\n \"timestamp\": \"2026-02-16T10:23:00.000Z\",\n \"summary\": \"Conversation summary\",\n \"shortSummary\": \"Short recap\",\n \"firstKeptEntryId\": \"a1b2c3d4\",\n \"tokensBefore\": 42000,\n \"details\": { \"readFiles\": [\"src/a.ts\"] },\n \"preserveData\": { \"hookState\": true },\n \"fromExtension\": false\n}\n```\n\n### `branch_summary`\n\n```json\n{\n \"type\": \"branch_summary\",\n \"id\": \"e1f2a3b4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:24:00.000Z\",\n \"fromId\": \"a1b2c3d4\",\n \"summary\": \"Summary of abandoned path\",\n \"details\": { \"note\": \"optional\" },\n \"fromExtension\": true\n}\n```\n\nIf branching from root (`branchFromId === null`), `fromId` is the literal string `\"root\"`.\n\n### `custom`\n\nExtension state persistence; ignored by `buildSessionContext`.\n\n```json\n{\n \"type\": \"custom\",\n \"id\": \"f1a2b3c4\",\n \"parentId\": \"e1f2a3b4\",\n \"timestamp\": \"2026-02-16T10:25:00.000Z\",\n \"customType\": \"my-extension\",\n \"data\": { \"state\": 1 }\n}\n```\n\n### `custom_message`\n\nExtension-provided message that does participate in LLM context. `content` can be a string or text/image content blocks, and `attribution` records whether the user or agent initiated it.\n\n```json\n{\n \"type\": \"custom_message\",\n \"id\": \"a2b3c4d5\",\n \"parentId\": \"f1a2b3c4\",\n \"timestamp\": \"2026-02-16T10:26:00.000Z\",\n \"customType\": \"my-extension\",\n \"content\": \"Injected context\",\n \"display\": true,\n \"details\": { \"debug\": false },\n \"attribution\": \"agent\"\n}\n```\n\n### `label`\n\n```json\n{\n \"type\": \"label\",\n \"id\": \"b2c3d4e5\",\n \"parentId\": \"a2b3c4d5\",\n \"timestamp\": \"2026-02-16T10:27:00.000Z\",\n \"targetId\": \"a1b2c3d4\",\n \"label\": \"checkpoint\"\n}\n```\n\n`label: undefined` clears a label for `targetId`.\n\n### `ttsr_injection`\n\n```json\n{\n \"type\": \"ttsr_injection\",\n \"id\": \"c2d3e4f5\",\n \"parentId\": \"b2c3d4e5\",\n \"timestamp\": \"2026-02-16T10:28:00.000Z\",\n \"injectedRules\": [\"ruleA\", \"ruleB\"]\n}\n```\n\n### `mcp_tool_selection`\n\n```json\n{\n \"type\": \"mcp_tool_selection\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:28:30.000Z\",\n \"selectedToolNames\": [\"server.tool\"]\n}\n```\n\n### `session_init`\n\n```json\n{\n \"type\": \"session_init\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:29:00.000Z\",\n \"systemPrompt\": \"...\",\n \"task\": \"...\",\n \"tools\": [\"read\", \"edit\"],\n \"outputSchema\": { \"type\": \"object\" }\n}\n```\n\n### `mode_change`\n\n```json\n{\n \"type\": \"mode_change\",\n \"id\": \"e2f3a4b5\",\n \"parentId\": \"d2e3f4a5\",\n \"timestamp\": \"2026-02-16T10:30:00.000Z\",\n \"mode\": \"plan\",\n \"data\": { \"planFile\": \"/tmp/plan.md\" }\n}\n```\n\n## Versioning and Migration\n\nCurrent session version: `3`.\n\n### v1 -> v2\n\nApplied when header `version` is missing or `< 2`:\n\n- Adds `id` and `parentId` to each non-header entry.\n- Reconstructs a linear parent chain using file order.\n- Migrates compaction field `firstKeptEntryIndex` -> `firstKeptEntryId` when present.\n- Sets header `version = 2`.\n\n### v2 -> v3\n\nApplied when header `version < 3`:\n\n- For `message` entries: rewrites legacy `message.role === \"hookMessage\"` to `\"custom\"`.\n- Sets header `version = 3`.\n\n### Migration Trigger and Persistence\n\n- Migrations run during session load (`setSessionFile`).\n- If any migration ran, the entire file is rewritten to disk immediately.\n- Migration mutates in-memory entries first, then persists rewritten JSONL.\n\n## Load and Compatibility Behavior\n\n`loadEntriesFromFile(path)` behavior:\n\n- Missing file (`ENOENT`) -> returns `[]`.\n- Non-parseable lines are handled by lenient JSONL parser (`parseJsonlLenient`).\n- If first parsed entry is not a valid session header (`type !== \"session\"` or missing string `id`) -> returns `[]`.\n\n`SessionManager.setSessionFile()` behavior:\n\n- `[]` from loader is treated as empty/nonexistent session and replaced with a new initialized session file at that path.\n- Valid files are loaded, migrated if needed, blob refs resolved, then indexed.\n\n## Tree and Leaf Semantics\n\nThe underlying model is append-only tree + mutable leaf pointer:\n\n- Every append method creates exactly one new entry whose `parentId` is current `leafId`.\n- The new entry becomes the new `leafId`.\n- `branch(entryId)` moves only `leafId`; existing entries remain unchanged.\n- `resetLeaf()` sets `leafId = null`; next append creates a new root entry (`parentId: null`).\n- `branchWithSummary()` sets leaf to branch target and appends a `branch_summary` entry.\n\n`getEntries()` returns all non-header entries in insertion order. Existing entries are not deleted in normal operation; rewrites preserve logical history while updating representation (migrations, move, targeted rewrite helpers).\n\n## Context Reconstruction (`buildSessionContext`)\n\n`buildSessionContext(entries, leafId, byId?)` resolves what is sent to the model.\n\nAlgorithm:\n\n1. Determine leaf:\n - `leafId === null` -> return empty context.\n - explicit `leafId` -> use that entry if found.\n - otherwise fallback to last entry.\n2. Walk `parentId` chain from leaf to root and reverse to root->leaf path.\n3. Derive runtime state across path:\n - `thinkingLevel` from latest `thinking_level_change` (default `\"off\"`)\n - `serviceTier` from latest `service_tier_change`\n - model map from `model_change` entries (`role ?? \"default\"`)\n - fallback `models.default` from assistant message provider/model if no explicit model change\n - deduplicated `injectedTtsrRules` from all `ttsr_injection` entries\n - selected MCP discovery tools from latest `mcp_tool_selection`\n - mode/modeData from latest `mode_change` (default mode `\"none\"`)\n4. Build message list:\n - `message` entries pass through\n - `custom_message` entries become `custom` AgentMessages via `createCustomMessage`\n - `branch_summary` entries become `branchSummary` AgentMessages via `createBranchSummaryMessage`\n - if a `compaction` exists on path:\n - emit compaction summary first (`createCompactionSummaryMessage`)\n - emit path entries starting at `firstKeptEntryId` up to the compaction boundary\n - emit entries after the compaction boundary\n\n`custom`, `session_init`, `service_tier_change`, `mcp_tool_selection`, and `ttsr_injection` entries do not inject model context directly.\n\n## Persistence Guarantees and Failure Model\n\n### Persist vs in-memory\n\n- `SessionManager.create/open/continueRecent/forkFrom` -> persistent mode (`persist = true`).\n- `SessionManager.inMemory` -> non-persistent mode (`persist = false`) with `MemorySessionStorage`.\n\n### Write pipeline\n\nWrites are serialized through an internal promise chain (`#persistChain`) and `NdjsonFileWriter`.\n\n- `append*` updates in-memory state immediately.\n- Persistence is deferred until at least one assistant message exists.\n - Before first assistant: entries are retained in memory; no file append occurs.\n - When first assistant exists: full in-memory session is flushed to file.\n - Afterwards: new entries append incrementally.\n\nRationale in code: avoid persisting sessions that never produced an assistant response.\n\n### Durability operations\n\n- `flush()` flushes writer and calls `fsync()`.\n- Atomic full rewrites (`#rewriteFile`) write to temp file, flush+fsync, close, then rename over target.\n- Used for migrations, `setSessionName`, `rewriteEntries`, move operations, and tool-call arg rewrites.\n\n### Error behavior\n\n- Persistence errors are latched (`#persistError`) and rethrown on subsequent operations.\n- First error is logged once with session file context.\n- Writer close is best-effort but propagates the first meaningful error.\n\n## Data Size Controls and Blob Externalization\n\nBefore persisting entries:\n\n- Large strings are truncated to `MAX_PERSIST_CHARS` (500,000 chars) with notice:\n - `\"[Session persistence truncated large content]\"`\n- Transient fields `partialJson` and `jsonlEvents` are removed.\n- If object has both `content` and `lineCount`, line count is recomputed after truncation.\n- Image blocks in `content` arrays with base64 length >= 1024 are externalized to blob refs:\n - stored as `blob:sha256:<hash>`\n - raw bytes written to blob store (`BlobStore.put`)\n\nOn load, blob refs are resolved back to base64 for message/custom_message image blocks.\n\n## Storage Abstractions\n\n`SessionStorage` interface provides all filesystem operations used by `SessionManager`:\n\n- sync: `ensureDirSync`, `existsSync`, `writeTextSync`, `statSync`, `listFilesSync`\n- async: `exists`, `readText`, `readTextSlices`, `writeText`, `rename`, `unlink`, `openWriter`\n\nImplementations:\n\n- `FileSessionStorage`: real filesystem (Bun + node fs)\n- `MemorySessionStorage`: map-backed in-memory implementation for tests/non-persistent sessions\n\n`SessionStorageWriter` exposes `writeLine`, `flush`, `fsync`, `close`, `getError`.\n\n## Session Discovery Utilities\n\nDefined in `session-manager.ts`:\n\n- `getRecentSessions(sessionDir, limit)` -> lightweight metadata for UI/session picker, capped by `limit`\n- `findMostRecentSession(sessionDir)` -> newest by mtime\n- `list(cwd, sessionDir?)` -> sessions in one project scope\n- `listAll()` -> sessions across all project scopes under `~/.omp/agent/sessions`\n- `resolveResumableSession(sessionArg, cwd, sessionDir?)` -> local then global resume/fork target lookup\n\nMetadata extraction for `getRecentSessions` reads a prefix via `readTextSlices(..., 4096, 0)`. `list`/`listAll` read a 4KB prefix plus a bounded 32 KiB tail through one `readTextSlices(...)` call per file, using the prefix for metadata and the tail for lifecycle status. Resume matching is case-insensitive and accepts session id prefixes, full filename prefixes, or the id suffix after the timestamp in `<timestamp>_<sessionId>.jsonl`.\n\n## Related but Distinct: Prompt History Storage\n\n`HistoryStorage` (`history-storage.ts`) is a separate SQLite subsystem for prompt recall/search, not session replay.\n\n- DB: `~/.omp/agent/history.db`\n- Table: `history(id, prompt, created_at, cwd)`\n- FTS5 index: `history_fts` with trigger-maintained sync\n- Deduplicates consecutive identical prompts using in-memory last-prompt cache\n- Async insertion (`setImmediate`) so prompt capture does not block turn execution\n\nUse session files for conversation graph/state replay; use `HistoryStorage` for prompt history UX.\n",
|
|
61
|
+
"skills.md": "# Skills\n\nSkills are file-backed capability packs discovered at startup and exposed to the model as:\n\n- lightweight metadata in the system prompt (name + description)\n- on-demand content via the `read` tool against `skill://...`\n- optional interactive `/skill:<name>` commands\n\nThis document covers current runtime behavior in `src/extensibility/skills.ts`, `src/discovery/builtin.ts`, `src/internal-urls/skill-protocol.ts`, and `src/discovery/agents-md.ts`.\n\n## What a skill is in this codebase\n\nA discovered skill is represented as:\n\n- `name`\n- `description`\n- `filePath` (the `SKILL.md` path)\n- `baseDir` (skill directory)\n- source metadata (`provider`, `level`, path)\n\nThe runtime only requires `name` and `path` for validity. In practice, matching quality depends on `description` being meaningful.\n\n## Required layout and SKILL.md expectations\n\n### Directory layout\n\nFor provider-based discovery (native/Claude/Codex/Agents/plugin providers), skills are discovered as **one level under `skills/`**:\n\n- `<skills-root>/<skill-name>/SKILL.md`\n\nNested patterns like `<skills-root>/group/<skill>/SKILL.md` are not discovered by provider loaders.\n\nFor `skills.customDirectories`, scanning uses the same non-recursive layout (`*/SKILL.md`).\n\n```text\nProvider-discovered layout (non-recursive under skills/):\n\n<root>/skills/\n ├─ postgres/\n │ └─ SKILL.md ✅ discovered\n ├─ pdf/\n │ └─ SKILL.md ✅ discovered\n └─ team/\n └─ internal/\n └─ SKILL.md ❌ not discovered by provider loaders\n\nCustom-directory scanning is also non-recursive, so nested paths are ignored unless you point `customDirectories` at that nested parent.\n```\n\n### `SKILL.md` frontmatter\n\nSupported frontmatter fields on the skill type:\n\n- `name?: string`\n- `description?: string`\n- `globs?: string[]`\n- `alwaysApply?: boolean`\n- `hide?: boolean`\n- additional keys are preserved as unknown metadata\n\nCurrent runtime behavior:\n\n- `name` defaults to the skill directory name\n- `description` is required for:\n - native `.omp` provider skill discovery (`requireDescription: true`)\n - `skills.customDirectories` scans via `scanSkillsFromDir` in `src/discovery/helpers.ts` (non-recursive)\n- non-native providers can load skills without description\n\n## Discovery pipeline\n\n`discoverSkills()` in `src/extensibility/skills.ts` does two passes:\n\n1. **Capability providers** via `loadCapability(\"skills\")`\n2. **Custom directories** via `scanSkillsFromDir(..., { requireDescription: true })` (one-level directory enumeration)\n\nIf `skills.enabled` is `false`, discovery returns no skills.\n\n### Built-in skill providers and precedence\n\nProvider ordering is priority-first (higher wins), then registration order for ties.\n\nCurrent registered skill providers:\n\n1. `native` (priority 100) — `.omp` user/project skills via `src/discovery/builtin.ts`\n2. `omp-plugins` (priority 90) — `skills/` bundled next to extension packages loaded through `extensions:` or `--extension`/`-e`\n3. `claude` (priority 80)\n4. priority 70 group (in registration order):\n - `claude-plugins`\n - `agents`\n - `codex`\n5. `opencode` (priority 55)\n6. `github` (priority 30) — `.github/skills/<name>/SKILL.md` (GitHub Agent Skills layout, project-only)\n\nDedup key is skill name. First item with a given name wins.\n\n### Source toggles and filtering\n\n`discoverSkills()` applies these controls:\n\n- source toggles: `enableCodexUser`, `enableClaudeUser`, `enableClaudeProject`, `enablePiUser`, `enablePiProject`\n- `disabledExtensions` entries with `skill:<name>`\n- `ignoredSkills` (exclude)\n- `includeSkills` (include allowlist; empty means include all)\n\nFilter order is:\n\n1. not disabled by `disabledExtensions`\n2. source enabled\n3. not ignored\n4. included (if include list present)\n For providers other than codex/claude/native (for example `agents`, `claude-plugins`, `opencode`), enablement currently falls back to: enabled if **any** built-in source toggle is enabled.\n\n### Collision and duplicate handling\n\n- Capability dedup already keeps first skill per name (highest-precedence provider)\n- `extensibility/skills.ts` additionally:\n - de-duplicates identical files by `realpath` (symlink-safe)\n - emits collision warnings when a later skill name conflicts\n - keeps the convenience `discoverSkillsFromDir({ dir, source })` API as a thin adapter over `scanSkillsFromDir`\n- Custom-directory skills are merged after provider skills and follow the same collision behavior\n\n## Runtime usage behavior\n\n### System prompt exposure\n\nSystem prompt construction (`src/system-prompt.ts`) uses discovered skills as follows:\n\n- if `read` tool is available:\n - include discovered skills list in prompt, excluding skills with `hide: true`\n- otherwise:\n - omit discovered list\n\n`hide: true` does not disable the skill. Hidden skills are still loaded and remain reachable through `skill://<name>` and `/skill:<name>` when skill commands are enabled.\n\nTask tool subagents receive the session's discovered/provided skills list via normal session creation; there is no per-task skill pinning override.\n\n### Interactive `/skill:<name>` commands\n\nIf `skills.enableSkillCommands` is true, interactive mode registers one slash command per discovered skill.\n\n`/skill:<name> [args]` behavior:\n\n- reads the skill file directly from `filePath`\n- strips frontmatter\n- injects skill body as a custom message\n- delivery mode follows the **submission keybinding**:\n - **Enter** → invokes the skill on the `steer` queue while streaming (matches free-text Enter, which also steers), or as a normal idle prompt when the agent is not streaming\n - **Ctrl+Enter** (`app.message.followUp`) → invokes the skill on the `followUp` queue while streaming, or as a normal idle prompt when the agent is not streaming\n- appends metadata (`Skill: <path>`, optional `User: <args>`)\n\nThere is no flag, mode-selector, or frontmatter knob to override this — the keybinding _is_ the choice, identical to how free text is routed during streaming (`input-controller.ts:243-249` for Enter, `input-controller.ts:462-500` for Ctrl+Enter; both dispatch through `#invokeSkillCommand`).\n\n## `skill://` URL behavior\n\n`src/internal-urls/skill-protocol.ts` supports:\n\n- `skill://<name>` → resolves to that skill's `SKILL.md`\n- `skill://<name>/<relative-path>` → resolves inside that skill directory\n\n```text\nskill:// URL resolution\n\nskill://pdf\n -> <pdf-base>/SKILL.md\n\nskill://pdf/references/tables.md\n -> <pdf-base>/references/tables.md\n\nGuards:\n- reject absolute paths\n- reject `..` traversal\n- reject any resolved path escaping <pdf-base>\n```\n\nResolution details:\n\n- skill name must match exactly\n- relative paths are URL-decoded\n- absolute paths are rejected\n- path traversal (`..`) is rejected\n- resolved path must remain within `baseDir`\n- missing files return an explicit `File not found` error\n\nContent type:\n\n- `.md` => `text/markdown`\n- everything else => `text/plain`\n\nNo fallback search is performed for missing assets.\n\n## Skills vs AGENTS.md, commands, tools, hooks\n\n### Skills vs AGENTS.md\n\n- **Skills**: named, optional capability packs selected by task context or explicitly requested\n- **AGENTS.md/context files**: persistent instruction files loaded as context-file capability and merged by level/depth rules\n\n`src/discovery/agents-md.ts` specifically walks ancestor directories from `cwd` to discover standalone `AGENTS.md` files (up to depth 20), excluding hidden-directory segments.\n\n### Skills vs slash commands\n\n- **Skills**: model-readable knowledge/workflow content\n- **Slash commands**: user-invoked command entry points\n- `/skill:<name>` is a convenience wrapper that injects skill text; it does not change skill discovery semantics\n\n### Skills vs custom tools\n\n- **Skills**: documentation/workflow content loaded through prompt context and `read`\n- **Custom tools**: executable tool APIs callable by the model with schemas and runtime side effects\n\n### Skills vs hooks\n\n- **Skills**: passive content\n- **Hooks**: event-driven runtime interceptors that can block/modify behavior during execution\n\n## Practical authoring guidance tied to discovery logic\n\n- Put each skill in its own directory: `<skills-root>/<skill-name>/SKILL.md`\n- Always include explicit `name` and `description` frontmatter\n- Keep referenced assets under the same skill directory and access with `skill://<name>/...`\n- For nested taxonomy (`team/domain/skill`), point `skills.customDirectories` to the nested parent directory; scanning itself remains non-recursive\n- Avoid duplicate skill names across sources; first match wins by provider precedence\n",
|
|
62
|
+
"skills/authoring-extensions.md": "---\nname: authoring-extensions\ndescription: Use when creating a new omp extension. Covers ExtensionAPI, factory signature, tool/command/event registration, and local-dev testing.\n---\n\n# Authoring Extensions\n\nExtensions are the primary way to add capabilities to `oh-my-pi`. A single extension module can register tools the LLM can call, slash commands users can invoke, and event handlers that run throughout the session lifecycle — all from one TypeScript file.\n\n## Minimum viable extension\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function (pi: ExtensionAPI) {\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(\"My extension loaded!\", \"info\");\n });\n}\n```\n\nThat is a working extension. Drop it into `~/.omp/agent/extensions/hello.ts` and restart omp to see the notification.\n\n## Full example\n\nThe following extension registers a slash command, a tool, and a session-start hook:\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI) {\n const z = pi.zod;\n\n // Runs once when the session loads\n pi.on(\"session_start\", async (_event, ctx) => {\n ctx.ui.notify(`Session ready in ${ctx.cwd}`, \"info\");\n });\n\n // Slash command: /greet\n pi.registerCommand(\"greet\", {\n description: \"Send a greeting into the conversation\",\n handler: async (args, ctx) => {\n const name = args.trim() || \"world\";\n pi.sendMessage(\n {\n customType: \"greeting\",\n content: `Hello, ${name}!`,\n display: true,\n attribution: \"user\",\n },\n { triggerTurn: false }\n );\n ctx.ui.notify(`Greeted ${name}`, \"info\");\n },\n });\n\n // LLM-callable tool\n pi.registerTool({\n name: \"word_count\",\n label: \"Word Count\",\n description: \"Count the words in a string\",\n parameters: z.object({\n text: z.string().describe(\"Text to count\"),\n }),\n async execute(_id, params, _signal, _onUpdate, _ctx) {\n const count = params.text.split(/\\s+/).filter(Boolean).length;\n return {\n content: [{ type: \"text\", text: String(count) }],\n details: { count },\n };\n },\n });\n}\n```\n\n## Discovery paths\n\nomp loads extension modules from these sources:\n\n1. Native `.omp` locations discovered through the capability system:\n - `<cwd>/.omp/extensions/`\n - `~/.omp/agent/extensions/`\n - legacy extension paths listed in `.omp/settings.json#extensions` or `~/.omp/agent/settings.json#extensions`\n2. Marketplace-installed plugins from the OMP and Claude plugin registries.\n3. Explicit configured paths passed by the CLI (`omp --extension ./my-ext.ts`, also `-e`; `--hook` is treated as an alias) and by the `extensions:` setting in config.\n\nThe runtime de-duplicates by resolved absolute path — first seen wins.\n\nWhen a path points to a directory, omp resolves the entry point in this order:\n\n1. `package.json` with `omp.extensions` (or legacy `pi.extensions`) field\n2. `index.ts`\n3. `index.js`\n\nWhen scanning an `extensions/` directory, omp also loads direct `*.ts`/`*.js` files and one-level subdirectories that have `index.ts`, `index.js`, or a manifest.\n\nExtension packages can also bundle sibling capability directories. When a package is loaded through `extensions:` or `--extension`/`-e`, the `omp-plugins` provider discovers its `skills/`, `hooks/pre|post/`, `tools/`, `commands/`, `rules/`, `prompts/`, and `.mcp.json`.\n\n## package.json manifest\n\nTo package an extension as an installable plugin, add an `omp` field to `package.json`:\n\n```json\n{\n \"name\": \"my-omp-extension\",\n \"omp\": {\n \"extensions\": [\"./src/main.ts\"]\n }\n}\n```\n\nThe legacy `pi` key is also accepted for backwards compatibility:\n\n```json\n{\n \"pi\": {\n \"extensions\": [\"./index.ts\"]\n }\n}\n```\n\nMultiple entry points are supported:\n\n```json\n{\n \"omp\": {\n \"extensions\": [\"./src/safety.ts\", \"./src/tools.ts\"]\n }\n}\n```\n\n## Registering commands\n\n```ts\npi.registerCommand(\"my-cmd\", {\n description: \"What the command does\",\n handler: async (args, ctx) => {\n // args: everything the user typed after /my-cmd\n // ctx: ExtensionCommandContext — includes ctx.ui, ctx.cwd, session controls\n ctx.ui.notify(\"Running!\", \"info\");\n await ctx.waitForIdle();\n await ctx.newSession();\n },\n});\n```\n\n`ExtensionCommandContext` session-control methods (safe to call from commands only):\n\n| Method | Effect |\n|---|---|\n| `waitForIdle()` | Wait for the agent to finish streaming |\n| `newSession(opts?)` | Open a fresh session |\n| `switchSession(path)` | Switch to an existing session file |\n| `branch(entryId)` | Fork from a specific history entry |\n| `navigateTree(id, opts?)` | Jump to a different point in the session tree |\n| `reload()` | Reload the session runtime |\n| `compact(opts?)` | Compact the current context |\n\n## Registering tools\n\nTools are called by the LLM. Parameters use [Zod](https://zod.dev) schemas, available at `pi.zod`:\n\n```ts\nconst z = pi.zod;\n\npi.registerTool({\n name: \"search_notes\", // snake_case, unique\n label: \"Search Notes\", // human-readable label for TUI\n description: \"Full-text search through project notes\",\n parameters: z.object({\n query: z.string().describe(\"Search query\"),\n limit: z.number().default(10).describe(\"Max results\").optional(),\n }),\n async execute(toolCallId, params, signal, onUpdate, ctx) {\n if (signal?.aborted) {\n return { content: [{ type: \"text\", text: \"Cancelled\" }] };\n }\n onUpdate?.({ content: [{ type: \"text\", text: \"Searching...\" }] });\n // ... do work ...\n return {\n content: [{ type: \"text\", text: `Found N results for \"${params.query}\"` }],\n details: { query: params.query, count: 0 },\n };\n },\n});\n```\n\n## Subscribing to events\n\n```ts\npi.on(\"tool_call\", async (event, ctx) => {\n // event.toolName, event.input, event.toolCallId\n if (event.toolName !== \"bash\") return;\n\n const command = String((event.input as { command?: unknown }).command ?? \"\");\n if (command.includes(\"rm -rf /\")) {\n return { block: true, reason: \"Blocked by safety policy\" };\n }\n});\n\npi.on(\"turn_end\", async (_event, ctx) => {\n ctx.ui.setStatus(\"tokens\", `~${ctx.getContextUsage()?.tokens ?? \"?\"} tokens`);\n});\n```\n\nFull event catalog: see [hooks authoring guide](./authoring-hooks.md).\n\n## Extension vs hook — when to use which\n\n| Need | Use |\n|---|---|\n| Tools + commands + events in one module | **Extension** (`ExtensionAPI`) |\n| Pure event interception (policy, redaction) | **Extension** or **Hook** (both work; extension is preferred) |\n| Legacy hook module already exists | **Hook** (`HookAPI` from `@oh-my-pi/pi-coding-agent/extensibility/hooks`) |\n| Registering provider / custom message renderer | **Extension only** |\n| Shipping as a marketplace plugin | **Extension** (use `package.json` manifest) |\n\nExtensions are a strict superset of hooks. New authoring should use `ExtensionAPI`.\n\n## Debugging\n\nStart omp with `--log-level debug` to see extension load diagnostics:\n\n```\nomp --log-level debug\n```\n\nFailed extension loads are logged with their path and error. Loaded extensions may also emit their own debug logs via `pi.logger`.\n\nTo temporarily disable a specific extension module by name without removing the file:\n\n```yaml\n# ~/.omp/agent/config.yml\ndisabledExtensions:\n - extension-module:my-ext\n```\n\nThe derived name is the filename stem (or directory name for `index.ts`-style entries): `/path/to/my-ext.ts` → `my-ext`.\n\n## Important constraints\n\n- **Do not call runtime actions during load.** Methods like `pi.sendMessage()` throw `ExtensionRuntimeNotInitializedError` if called synchronously during module evaluation (before a session is active). Register handlers/tools/commands during load; perform runtime actions only from event handlers, tools, or commands.\n- **`tool_call` errors are fail-closed.** If a `tool_call` handler throws, the tool is blocked.\n- **Command names must not clash with built-ins.** Conflicts are skipped with a diagnostic log.\n- **Reserved shortcuts are ignored** (`ctrl+c`, `ctrl+d`, `ctrl+z`, `ctrl+k`, `ctrl+p`, `ctrl+l`, `ctrl+o`, `ctrl+t`, `ctrl+g`, `ctrl+q`, `alt+m`, `shift+tab`, `shift+ctrl+p`, `alt+enter`, `escape`, `enter`).\n\n## Further reading\n\n- `docs/extensions.md` — runtime internals and full API surface reference\n- `docs/extension-loading.md` — detailed path resolution rules\n- `docs/hooks.md` — hook subsystem internals\n- `docs/skills/examples/hello-extension/` — complete working example\n",
|
|
63
|
+
"skills/authoring-hooks.md": "---\nname: authoring-hooks\ndescription: Use when creating a new omp hook. Covers HookAPI, event catalog, blocking/overriding tool calls, and context modification.\n---\n\n# Authoring Hooks\n\nHooks are event-driven interceptors that run alongside the agent loop. They are best used for cross-cutting concerns: safety policy, secret redaction, context pruning, audit logging. A hook module registers handlers via `pi.on(event, handler)` and can block tool execution, override tool output, or rewrite the message context before each LLM call.\n\n> **Relationship to extensions:** The hook subsystem (`HookAPI`) is the legacy API. The extension runner now handles everything hooks can do plus more. `ExtensionAPI` supports the hook event model plus extension-only events. Use `ExtensionAPI` for new work; use `HookAPI` only if you are maintaining an existing hook module.\n\n## Factory signature\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function myHook(omp: HookAPI): void {\n omp.on(\"tool_call\", async (event, ctx) => {\n // intercept every tool call\n });\n}\n```\n\nThe default export must be a plain function (not async, not a class). It receives a `HookAPI` instance and must register all handlers synchronously during execution.\n\nAlternatively, using `ExtensionAPI` (preferred):\n\n```ts\nimport type { ExtensionAPI } from \"@oh-my-pi/pi-coding-agent\";\n\nexport default function myExtension(pi: ExtensionAPI): void {\n pi.on(\"tool_call\", async (event, ctx) => { /* ... */ });\n}\n```\n\n## Event catalog\n\n### Tool lifecycle\n\n| Event | Fires | Can return |\n|---|---|---|\n| `tool_call` | Before every tool execution | `{ block?: boolean; reason?: string }` |\n| `tool_result` | After every tool execution | `{ content?; details?; isError?: boolean }` |\n\n### Session lifecycle\n\n| Event | Fires | Can return |\n|---|---|---|\n| `session_start` | On initial session load | — |\n| `session_before_switch` | Before session switch | `{ cancel?: boolean }` |\n| `session_switch` | After session switch | — |\n| `session_before_branch` | Before session branch | `{ cancel?: boolean; skipConversationRestore?: boolean }` |\n| `session_branch` | After session branch | — |\n| `session_before_compact` | Before compaction | `{ cancel?: boolean; compaction?: CompactionResult }` |\n| `session.compacting` | During compaction (inject context) | `{ context?: string[]; prompt?: string; preserveData?: Record<string, unknown> }` |\n| `session_compact` | After compaction | — |\n| `session_before_tree` | Before tree navigation | `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }` |\n| `session_tree` | After tree navigation | — |\n| `session_shutdown` | On session shutdown | — |\n\n### Agent/turn lifecycle\n\n| Event | Fires | Can return |\n|---|---|---|\n| `before_agent_start` | Before agent starts a turn | `{ message?: { customType; content; display; details; attribution? } }` |\n| `agent_start` | Agent streaming starts | — |\n| `agent_end` | Agent streaming ends | — |\n| `turn_start` | Start of a user→agent turn | — |\n| `turn_end` | End of a user→agent turn | — |\n| `context` | Before each LLM API call | `{ messages?: Message[] }` |\n| `auto_compaction_start` | Auto-compaction begins | — |\n| `auto_compaction_end` | Auto-compaction ends | — |\n| `auto_retry_start` | Auto-retry begins | — |\n| `auto_retry_end` | Auto-retry ends | — |\n| `ttsr_triggered` | TTSR (too-short response) triggered | — |\n| `todo_reminder` | Todo reminder fires | — |\n\nExtension-only events such as `tool_execution_start`, `tool_execution_update`, `tool_execution_end`, `input`, `user_bash`, and `user_python` require `ExtensionAPI`.\n\n## Pre-tool blocking contract\n\nReturn `{ block: true, reason: \"...\" }` from a `tool_call` handler to prevent execution:\n\n```ts\nomp.on(\"tool_call\", async (event, ctx) => {\n if (event.toolName === \"bash\") {\n const cmd = String(event.input.command ?? \"\");\n if (/\\brm\\s+-rf\\s+\\//.test(cmd)) {\n return { block: true, reason: \"Refusing to delete root filesystem\" };\n }\n }\n});\n```\n\nContract:\n\n- If **any** handler returns `{ block: true }`, execution stops immediately.\n- `reason` is returned to the LLM as the tool error text.\n- If a handler **throws**, the tool is also blocked (fail-closed).\n- Last non-blocking return wins for non-blocking results; first `block: true` short-circuits.\n\n## Post-tool override contract\n\nReturn `{ content, details, isError }` from a `tool_result` handler to patch what the LLM sees:\n\n```ts\nomp.on(\"tool_result\", async (event, ctx) => {\n if (event.toolName === \"read\" && !event.isError) {\n const redacted = event.content.map(chunk => {\n if (chunk.type !== \"text\") return chunk;\n return {\n ...chunk,\n text: chunk.text.replace(/(?:sk|pk)-[a-zA-Z0-9]{20,}/g, \"[REDACTED_API_KEY]\"),\n };\n });\n return { content: redacted };\n }\n});\n```\n\nContract:\n\n- Handlers run in registration order. For `HookAPI`, each handler receives the original tool result event, and the last returned override wins.\n- `content` replaces the full content array for the LLM.\n- `details` replaces the structured details object.\n- `isError` exists on the shared result type, but `HookToolWrapper` does not propagate it into a successful tool result; on a tool failure, the original error is rethrown after handlers complete.\n- On a tool failure, `tool_result` is still emitted with `isError: true`.\n\n## Context modification contract\n\nReturn `{ messages: [...] }` from a `context` handler to rewrite the message list before each LLM API call:\n\n```ts\nomp.on(\"context\", async (event, ctx) => {\n // Remove debug-only custom messages from LLM context\n const filtered = event.messages.filter(\n msg => !(msg.role === \"custom\" && msg.customType === \"debug-only\")\n );\n return { messages: filtered };\n});\n```\n\nContract:\n\n- `event.messages` is the current accumulated list.\n- Handlers run in order; each receives the output of the previous handler.\n- Return `undefined` (or nothing) to pass messages through unmodified.\n\n## Three complete examples\n\n### 1. rm-rf blocker\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function rmRfBlocker(omp: HookAPI): void {\n omp.on(\"tool_call\", async (event, ctx) => {\n if (event.toolName !== \"bash\") return;\n\n const cmd = String(event.input.command ?? \"\");\n if (!/\\brm\\s+-rf\\s+\\//.test(cmd)) return;\n\n // Allow if user explicitly confirms (interactive mode only)\n if (ctx.hasUI) {\n const allow = await ctx.ui.confirm(\n \"Dangerous command\",\n `This command deletes from root:\\n${cmd}\\n\\nProceed?`\n );\n if (allow) return;\n }\n\n return { block: true, reason: \"rm -rf / blocked by safety policy\" };\n });\n}\n```\n\n### 2. API-key redactor\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\n// Common API-key shapes. Not exhaustive — providers using bespoke formats\n// (Anthropic `sk-ant-…`, JWT-style bearers, gateway-specific prefixes, etc.)\n// need their own entries.\nconst SECRET_PATTERNS = [\n /\\b(sk|pk)-[a-zA-Z0-9]{20,}\\b/g,\n /\\bAKIA[A-Z0-9]{16}\\b/g,\n /\\bghp_[a-zA-Z0-9]{36}\\b/g,\n // Zhipu / GLM Coding Plan: `<id>.<secret>` (no `sk-` prefix).\n /\\b[a-zA-Z0-9]{16,}\\.[a-zA-Z0-9]{16,}\\b/g,\n /\\b[a-zA-Z0-9_-]{20,}\\s*=\\s*[\"']?[a-zA-Z0-9._/+=-]{20,}[\"']?/g,\n];\n\nexport default function apiKeyRedactor(omp: HookAPI): void {\n omp.on(\"tool_result\", async (event) => {\n if (event.isError) return;\n\n let changed = false;\n const redacted = event.content.map(chunk => {\n if (chunk.type !== \"text\") return chunk;\n let text = chunk.text;\n for (const pattern of SECRET_PATTERNS) {\n const next = text.replace(pattern, \"[REDACTED]\");\n if (next !== text) { changed = true; text = next; }\n }\n return { ...chunk, text };\n });\n\n if (changed) return { content: redacted };\n });\n}\n```\n\n### 3. Context filter\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function contextFilter(omp: HookAPI): void {\n omp.on(\"context\", async (event) => {\n const MAX_TOOL_OUTPUT_CHARS = 8_000;\n\n const trimmed = event.messages.map(msg => {\n // Truncate very large tool results to keep context manageable\n if (msg.role !== \"tool\") return msg;\n const content = msg.content.map(chunk => {\n if (chunk.type !== \"text\" || chunk.text.length <= MAX_TOOL_OUTPUT_CHARS) return chunk;\n return {\n ...chunk,\n text: chunk.text.slice(0, MAX_TOOL_OUTPUT_CHARS) + \"\\n[... truncated by context-filter hook]\",\n };\n });\n return { ...msg, content };\n });\n\n return { messages: trimmed };\n });\n}\n```\n\n## UI methods in hook context\n\n`ctx.ui` is a `HookUIContext`. Available methods:\n\n| Method | Description |\n|---|---|\n| `notify(message, type?)` | Show an in-app notification |\n| `setStatus(key, text)` | Set footer status text (keyed, sorted by key) |\n| `select(title, options)` | Show a selection dialog |\n| `confirm(title, message)` | Show a yes/no dialog |\n| `input(title, placeholder?)` | Show a text input dialog |\n| `editor(title, prefill?, { signal }?, { promptStyle }?)` | Show a multi-line editor |\n| `setEditorText(text)` | Set the input editor content |\n| `getEditorText()` | Get current input editor content |\n| `custom(factory)` | Render a custom TUI component |\n| `theme` | Current theme object |\n\nPass `{ promptStyle: true }` as the fourth argument when Enter should submit and Shift+Enter should insert a newline. The default hook editor behavior keeps Enter as newline and Ctrl+Enter as submit.\n\n`ctx.hasUI` is `false` in headless/print/subagent mode — always guard interactive calls.\n\n## Further reading\n\n- `docs/hooks.md` — hook subsystem internals, ordering rules, error propagation\n- `docs/extensions.md` — `ExtensionAPI` (superset of `HookAPI`)\n- `docs/skills/examples/safety-hook/` — complete working example\n",
|
|
64
|
+
"skills/authoring-marketplaces.md": "---\nname: authoring-marketplaces\ndescription: Use when creating a new omp marketplace. Covers marketplace.json schema, source types, install commands, and publishing.\n---\n\n# Authoring Marketplaces\n\nA marketplace is a Git repository (or local directory) that contains a catalog file at `.claude-plugin/marketplace.json`. Anyone can author one. Users add it with `/marketplace add owner/repo` and then install individual plugins from it.\n\n## Minimum viable marketplace\n\n```\nmy-marketplace/\n .claude-plugin/\n marketplace.json\n plugins/\n my-plugin/\n package.json\n index.ts\n```\n\n```json\n{\n \"name\": \"my-marketplace\",\n \"owner\": { \"name\": \"Your Name\" },\n \"plugins\": [\n {\n \"name\": \"my-plugin\",\n \"description\": \"What it does\",\n \"source\": \"./plugins/my-plugin\"\n }\n ]\n}\n```\n\nPush to GitHub. Users install with:\n\n```\n/marketplace add your-github-username/my-marketplace\n/marketplace install my-plugin@my-marketplace\n```\n\n## marketplace.json schema\n\nThe catalog file must live at `.claude-plugin/marketplace.json` in the repository root.\n\n### Top-level fields\n\n| Field | Required | Description |\n|---|---|---|\n| `name` | yes | Marketplace name. Lowercase alphanumeric, hyphens, dots. Must start and end with alphanumeric. Max 64 chars. |\n| `owner` | yes | Object with at minimum `owner.name` (string) |\n| `owner.name` | yes | Marketplace owner name |\n| `owner.email` | no | Owner contact email |\n| `plugins` | yes | Array of plugin entries (see below) |\n| `metadata.description` | no | Short description of the marketplace |\n| `metadata.version` | no | Catalog metadata version string |\n| `metadata.pluginRoot` | no | String prepended to all relative plugin source paths |\n| extra top-level fields | no | Preserved by the parser but not used by marketplace install/runtime logic |\n\n### Plugin entry fields\n\n| Field | Required | Description |\n|---|---|---|\n| `name` | yes | Plugin name (same naming rules as marketplace name) |\n| `source` | yes | Where to find the plugin — string or object (see source types below) |\n| `description` | no | Short plugin description |\n| `version` | no | Version string |\n| `author` | no | `{ name, email? }` |\n| `homepage` | no | URL |\n| `category` | no | e.g. `development`, `productivity`, `security` |\n| `tags` / `keywords` | no | Arrays of string tags/keywords |\n| `repository` | no | Repository URL |\n| `license` | no | License string |\n| `strict` | no | Boolean plugin metadata flag |\n| `commands`, `agents`, `hooks`, `mcpServers`, `lspServers` | no | Capability metadata used by plugin tooling and selectors |\n\n### Full catalog example\n\n```json\n{\n \"$schema\": \"https://anthropic.com/claude-code/marketplace.schema.json\",\n \"name\": \"acme-plugins\",\n \"owner\": {\n \"name\": \"Acme Corp\",\n \"email\": \"plugins@acme.example\"\n },\n \"metadata\": {\n \"description\": \"Official Acme plugins for oh-my-pi\"\n },\n \"plugins\": [\n {\n \"name\": \"acme-linter\",\n \"description\": \"Enforce Acme coding standards\",\n \"category\": \"development\",\n \"source\": \"./plugins/linter\"\n },\n {\n \"name\": \"acme-deploy\",\n \"description\": \"One-command deploy to Acme cloud\",\n \"category\": \"devops\",\n \"source\": {\n \"source\": \"github\",\n \"repo\": \"acme-corp/omp-deploy-plugin\",\n \"ref\": \"main\"\n }\n }\n ]\n}\n```\n\n## Plugin source types\n\n### 1. Relative path string\n\nPoints to a subdirectory inside the marketplace repository itself. Must start with `./`.\n\n```json\n\"source\": \"./plugins/my-plugin\"\n```\n\nThe path is resolved relative to the marketplace repository root. Path traversal outside the repo root is rejected.\n\nUse `metadata.pluginRoot` to avoid repeating a common prefix:\n\n```json\n{\n \"metadata\": { \"pluginRoot\": \"./plugins\" },\n \"plugins\": [\n { \"name\": \"plugin-a\", \"source\": \"./plugin-a\" },\n { \"name\": \"plugin-b\", \"source\": \"./plugin-b\" }\n ]\n}\n```\n\n### 2. Git URL\n\nA full Git repository URL. Optionally pin to a branch/tag (`ref`) or exact commit (`sha`):\n\n```json\n\"source\": {\n \"source\": \"url\",\n \"url\": \"https://github.com/org/my-plugin.git\",\n \"ref\": \"main\",\n \"sha\": \"a1b2c3d4...\"\n}\n```\n\n### 3. GitHub shorthand\n\nShorthand for GitHub repositories. Functionally equivalent to a Git URL but more concise:\n\n```json\n\"source\": {\n \"source\": \"github\",\n \"repo\": \"org/my-plugin\",\n \"ref\": \"v2.1.0\",\n \"sha\": \"a1b2c3d4...\"\n}\n```\n\n### 4. Git subdirectory (monorepo)\n\nFor plugins living inside a subdirectory of a larger repository. `url` accepts a full HTTPS URL or a GitHub `owner/repo` shorthand:\n\n```json\n\"source\": {\n \"source\": \"git-subdir\",\n \"url\": \"https://github.com/org/monorepo.git\",\n \"path\": \"packages/my-plugin\",\n \"ref\": \"main\",\n \"sha\": \"a1b2c3d4...\"\n}\n```\n\nThe `path` must resolve inside the cloned repository — directory escape is rejected.\n\n### 5. NPM package\n\nDeclares the plugin as an npm package. `version` is optional:\n\n```json\n\"source\": {\n \"source\": \"npm\",\n \"package\": \"@acme/omp-plugin\",\n \"version\": \"1.2.0\"\n}\n```\n\n> Note: npm plugin sources are declared in the schema but installation support is not yet fully implemented. Use Git-based sources for plugins that need to work today.\n\n## Plugin structure\n\nEach plugin directory (regardless of source type) should contain:\n\n```\nmy-plugin/\n package.json ← required: declares omp.extensions entry points\n src/\n main.ts ← extension factory\n README.md ← recommended: description + usage\n```\n\nMinimum `package.json`:\n\n```json\n{\n \"name\": \"my-plugin\",\n \"omp\": {\n \"extensions\": [\"./src/main.ts\"]\n }\n}\n```\n\n## Install command\n\n```\n/marketplace install name@marketplace-name\n/marketplace install --force name@marketplace-name # reinstall\n/marketplace install --scope project name@marketplace # project-scoped\n```\n\nCLI equivalent:\n\n```\nomp plugin marketplace add owner/repo\nomp plugin install name@marketplace-name\n```\n\nScope behavior:\n\n- **user** (default) — installed in `~/.omp/plugins/installed_plugins.json`, available in all projects\n- **project** — installed in `<project>/.omp/plugins/installed_plugins.json`, available only in that project\n\nProject-scoped installs shadow user-scoped installs of the same plugin name.\n\n## Naming rules\n\nMarketplace names and plugin names must:\n\n- Contain only lowercase letters, digits, hyphens (`-`), and dots (`.`)\n- Start and end with a lowercase letter or digit\n- Be at most 64 characters\n\nPlugin IDs (`name@marketplace`) must be at most 128 characters total.\n\nValid: `my-plugin`, `code-review`, `acme.tools`, `ai-v2`\nInvalid: `-bad-start`, `bad-end-`, `.dot-start`, `Under_score`, `HAS_CAPS`\n\n## Publishing workflow\n\n1. Create `marketplace.json` at `.claude-plugin/marketplace.json` in a new Git repo.\n2. Add plugin entries pointing to subdirectories (or external sources).\n3. Push to GitHub.\n4. Share the `owner/repo` string. Users add it with `/marketplace add owner/repo`.\n5. When you update the catalog, users run `/marketplace update your-marketplace-name` to pull the latest.\n\nTo test locally before publishing:\n\n```\n/marketplace add ./path/to/my-marketplace\n```\n\nLocal path sources also accept `~/` and absolute paths.\n\n## Further reading\n\n- `docs/marketplace.md` — marketplace system internals, on-disk layout, command reference\n- `docs/skills/authoring-extensions.md` — how to author the extension modules inside plugins\n- `docs/skills/examples/mini-marketplace/` — minimal working marketplace example\n",
|
|
65
|
+
"skills/examples/hello-extension/README.md": "# hello-extension\n\nA minimal `oh-my-pi` extension that demonstrates the two most common authoring patterns: subscribing to `session_start` to notify on load, and registering a `/hello` slash command that sends a greeting into the conversation. It is intentionally small — use it as a copy-paste starting point for your own extension.\n\n## Install\n\n**Option A — drop into user extensions directory:**\n\n```\ncp -r . ~/.omp/agent/extensions/hello-extension\n```\n\nRestart `omp`. You will see the startup notification immediately.\n\n**Option B — point the settings `extensions` array at it:**\n\n```yaml\n# ~/.omp/agent/config.yml\nextensions:\n - /path/to/hello-extension\n```\n\n**Option C — load once via CLI flag:**\n\n```\nomp --extension ./hello-extension\n```\n\n## Usage\n\nAfter loading, type `/hello` or `/hello Ada` in the omp prompt. The command sends a visible greeting custom message into the conversation and shows a \"Message sent!\" notification.\n\n## What it demonstrates\n\n- Default export factory receiving `ExtensionAPI`\n- `pi.on(\"session_start\", ...)` — session lifecycle hook\n- `pi.registerCommand(...)` — slash command registration\n- `ctx.ui.notify(...)` — user-facing notification\n- `package.json` with `omp.extensions` manifest field\n",
|
|
66
|
+
"skills/examples/mini-marketplace/README.md": "# mini-marketplace\n\nA minimal `oh-my-pi` marketplace catalog that demonstrates the `marketplace.json` format. It lists one plugin (`my-plugin`) using a relative path source.\n\n## Install command\n\n```\n/marketplace add ./docs/skills/examples/mini-marketplace\n/marketplace install my-plugin@example-marketplace\n```\n\nOr from the CLI:\n\n```\nomp plugin marketplace add ./docs/skills/examples/mini-marketplace\nomp plugin install my-plugin@example-marketplace\n```\n\n## What it demonstrates\n\n- Minimum required `marketplace.json` fields: `name`, `owner.name`, `plugins`\n- Relative path plugin source using `./` prefix (`\"source\": \"./my-plugin\"`)\n- Plugin bundled inside the same directory tree as the marketplace catalog\n- Extra catalog metadata: the example includes a top-level `description`; current marketplace parsing preserves extra top-level fields, while runtime behavior uses required fields and plugin entries.\n\n## Structure\n\n```\nmini-marketplace/\n .claude-plugin/\n marketplace.json ← catalog\n README.md\n my-plugin/\n package.json ← omp.extensions manifest\n index.ts ← extension entry point\n```\n\nPublished and local marketplaces use the same catalog location: `.claude-plugin/marketplace.json` inside the marketplace root. Point `/marketplace add` at this folder to load the example.\n",
|
|
67
|
+
"skills/examples/safety-hook/README.md": "# safety-hook\n\nAn `oh-my-pi` extension that demonstrates `tool_call` blocking. It intercepts `bash` tool calls and returns `{ block: true, reason: \"...\" }` when the command contains `rm -rf /` with normal whitespace, preventing the tool from executing.\n\n## What it demonstrates\n\n- `pi.on(\"tool_call\", ...)` — pre-execution interception\n- `return { block: true, reason: \"...\" }` — blocking contract\n- Regex guard on bash input (`/\\brm\\s+-rf\\s+\\//`)\n\n## Install\n\n```\ncp -r . ~/.omp/agent/extensions/safety-hook\n```\n\nRestart `omp`. The hook is active for all sessions.\n\nOr load once:\n\n```\nomp --extension ./safety-hook\n```\n\n## How it works\n\n```\nLLM calls bash tool\n │\n ▼\ntool_call handlers run\n │\n ├─ command matches /\\brm\\s+-rf\\s+\\// ?\n │ yes → { block: true, reason: \"...\" } ← execution stops, reason sent to LLM\n │ no → undefined ← execution continues normally\n ▼\ntool executes (if not blocked)\n```\n\nThe `reason` text is what the LLM receives as the tool error, so it can understand why the call was rejected and try a different approach.\n",
|
|
68
|
+
"slash-command-internals.md": "# Slash command internals\n\nThis document describes how slash commands are discovered, deduplicated, surfaced in interactive mode, and expanded at prompt time in `coding-agent`.\n\n## Implementation files\n\n- [`src/extensibility/slash-commands.ts`](../packages/coding-agent/src/extensibility/slash-commands.ts)\n- [`src/capability/slash-command.ts`](../packages/coding-agent/src/capability/slash-command.ts)\n- [`src/discovery/builtin.ts`](../packages/coding-agent/src/discovery/builtin.ts)\n- [`src/discovery/claude.ts`](../packages/coding-agent/src/discovery/claude.ts)\n- [`src/discovery/codex.ts`](../packages/coding-agent/src/discovery/codex.ts)\n- [`src/discovery/claude-plugins.ts`](../packages/coding-agent/src/discovery/claude-plugins.ts)\n- [`src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`src/discovery/helpers.ts`](../packages/coding-agent/src/discovery/helpers.ts)\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`src/modes/interactive-mode.ts`](../packages/coding-agent/src/modes/interactive-mode.ts)\n- [`src/modes/controllers/input-controller.ts`](../packages/coding-agent/src/modes/controllers/input-controller.ts)\n- [`src/modes/utils/ui-helpers.ts`](../packages/coding-agent/src/modes/utils/ui-helpers.ts)\n- [`src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts)\n\n## 1) Discovery model\n\nSlash commands are a capability (`id: \"slash-commands\"`) keyed by command name (`key: cmd => cmd.name`).\n\nThe capability registry loads all registered providers, sorted by provider priority descending, and deduplicates by key with **first wins** semantics.\n\n### Provider precedence\n\nCurrent slash-command providers and priorities:\n\n1. `native` (OMP) — priority `100`\n2. `claude` — priority `80`\n3. `claude-plugins` — priority `70`\n4. `codex` — priority `70`\n5. `opencode` — priority `55`\n\nTie behavior: equal-priority providers keep registration order. Current import order registers `claude-plugins` before `codex`, so plugin commands win over codex commands on name collisions.\n\n### Name-collision behavior\n\nFor `slash-commands`, collisions are resolved strictly by capability dedup:\n\n- highest-precedence item is kept in `result.items`\n- lower-precedence duplicates remain only in `result.all` and are marked `_shadowed = true`\n\nThis applies across providers and also within a provider if it returns duplicate names.\n\n### File scanning behavior\n\nProviders mostly use `loadFilesFromDir(...)`, which currently:\n\n- defaults to non-recursive matching (`*.md`)\n- uses native glob with `gitignore: true`, `hidden: false`, `fileType: File`\n- reads matching files in parallel and transforms them into `SlashCommand` items\n\nSo hidden files/directories are not loaded, ignored paths are skipped, and file order follows native glob result order unless a provider adds its own ordering.\n\n## 2) Provider-specific source paths and local precedence\n\n## `native` provider (`builtin.ts`)\n\nSearch roots come from `.omp` directories:\n\n- project: `<cwd>/.omp/commands/*.md`\n- user: `~/.omp/agent/commands/*.md`\n\n`getConfigDirs()` returns project first, then user, so **project native commands beat user native commands** when names collide.\n\n## `claude` provider (`claude.ts`)\n\nLoads, subject to `commands.enableClaudeUser` and `commands.enableClaudeProject` settings:\n\n- user: `~/.claude/commands/*.md`\n- project: `<cwd>/.claude/commands/*.md`\n\nThe provider pushes user items before project items, so **user Claude commands beat project Claude commands** on same-name collisions inside this provider.\n\n## `codex` provider (`codex.ts`)\n\nLoads:\n\n- user: `~/.codex/commands/*.md`\n- project: `<cwd>/.codex/commands/*.md`\n\nBoth sides are loaded then flattened in user-first order, so **user Codex commands beat project Codex commands** on collisions.\n\nCodex command content is parsed with frontmatter stripping (`parseFrontmatter`), and command name can be overridden by frontmatter `name`; otherwise filename is used.\n\n## `opencode` provider (`opencode.ts`)\n\nLoads, subject to `commands.enableOpencodeUser` and `commands.enableOpencodeProject` settings:\n\n- user: `~/.config/opencode/commands/*.md`\n- project: `<cwd>/.opencode/commands/*.md`\n\nBoth sides are loaded then flattened in user-first order, so **user OpenCode commands beat project OpenCode commands** on collisions. OpenCode command content is parsed with frontmatter stripping, and command name can be overridden by frontmatter `name`; otherwise filename is used.\n\n## `claude-plugins` provider (`claude-plugins.ts`)\n\nLoads plugin command roots from `~/.claude/plugins/installed_plugins.json`, then scans `<pluginRoot>/commands/*.md`.\n\nOrdering follows registry iteration order and per-plugin entry order from that JSON data. There is no additional sort step.\n\n## 3) Materialization to runtime `FileSlashCommand`\n\n`loadSlashCommands()` in `src/extensibility/slash-commands.ts` converts capability items into `FileSlashCommand` objects used at prompt time.\n\nFor each command:\n\n1. parse frontmatter/body (`parseFrontmatter`)\n2. description source:\n - `frontmatter.description` if present\n - else first non-empty body line (trimmed, max 60 chars with `...`)\n3. keep parsed body as executable template content\n4. compute a display source string like `via Claude Code Project`\n\nFrontmatter parse severity is source-dependent:\n\n- `native` level -> parse errors are `fatal`\n- `user`/`project` levels -> parse errors are `warn` with fallback parsing\n\n### Bundled fallback commands\n\nAfter filesystem/provider commands, embedded command templates are appended (`EMBEDDED_COMMAND_TEMPLATES`) if their names are not already present.\n\nCurrent embedded set comes from `src/task/commands.ts` and is used as a fallback (`source: \"bundled\"`).\n\n## 4) Interactive mode: where command lists come from\n\nInteractive mode combines multiple command sources for autocomplete and command routing.\n\nAt construction time it builds a pending command list from:\n\n- built-ins (`BUILTIN_SLASH_COMMANDS`, includes argument completion and inline hints for selected commands)\n- extension-registered slash commands (`extensionRunner.getRegisteredCommands(...)`)\n- TypeScript custom commands (`session.customCommands`), mapped to slash command labels\n- optional skill commands (`/skill:<name>`) when `skills.enableSkillCommands` is enabled\n\nThen `init()` calls `refreshSlashCommandState(...)` to load file-based commands and install one `CombinedAutocompleteProvider` containing:\n\n- pending commands above\n- discovered file-based commands\n\n`refreshSlashCommandState(...)` also updates `session.setSlashCommands(...)` so prompt expansion uses the same discovered file command set.\n\n### Refresh lifecycle\n\nSlash command state is refreshed:\n\n- during interactive init\n- after `/move` changes working directory (`handleMoveCommand` calls `resetCapabilities()` then `refreshSlashCommandState(newCwd)`)\n\nThere is no continuous file watcher for command directories.\n\n### Other surfacing\n\nThe Extensions dashboard also loads `slash-commands` capability and displays active/shadowed command entries, including `_shadowed` duplicates.\n\n## 5) Prompt pipeline placement\n\n`AgentSession.prompt(...)` slash handling order (when `expandPromptTemplates !== false`):\n\n1. **Extension commands** (`#tryExecuteExtensionCommand`) \n If `/name` matches extension-registered command, handler executes immediately and prompt returns.\n2. **TypeScript custom commands** (`#tryExecuteCustomCommand`) \n Boundary only: if matched, it executes and may return:\n - `string` -> replace prompt text with that string\n - `void/undefined` -> treated as handled; no LLM prompt\n3. **File-based slash commands** (`expandSlashCommand`) \n If text still starts with `/`, attempt markdown command expansion.\n4. **Prompt templates** (`expandPromptTemplate`) \n Applied after slash/custom processing.\n5. **Delivery**\n - idle: prompt is sent immediately to agent\n - streaming: prompt is queued as steer/follow-up depending on `streamingBehavior`\n\nThis is why slash command expansion sits before prompt-template expansion, and why custom commands can transform away the leading slash before file-command matching.\n\n## 6) Expansion semantics for file-based slash commands\n\n`expandSlashCommand(text, fileCommands)` behavior:\n\n- only runs when text begins with `/`\n- parses command name from first token after `/`\n- parses args from remaining text via `parseCommandArgs`\n- finds exact name match in loaded `fileCommands`\n- if matched, applies:\n - positional replacement: `$1`, `$2`, ...\n - slice replacement: `$@[start]` / `$@[start:length]` using 1-based positions\n - aggregate replacement: `$ARGUMENTS` and `$@`\n - template rendering via `prompt.render` with `{ args, ARGUMENTS, arguments }`\n - inline-argument fallback append when the template did not use an inline argument placeholder\n\n### `parseCommandArgs` caveats\n\nThe parser is simple quote-aware splitting:\n\n- supports `'single'` and `\"double\"` quoting to keep spaces\n- strips quote delimiters\n- does not implement backslash escaping rules\n- unmatched quote is not an error; parser consumes until end\n\n## 7) Unknown `/...` behavior\n\nUnknown slash input is **not rejected** by core slash logic.\n\nIf command is not handled by extension/custom/file layers, `expandSlashCommand` returns original text, and the literal `/...` prompt proceeds through normal prompt-template expansion and LLM delivery.\n\nInteractive mode separately hard-handles many built-ins in `InputController` (for example `/settings`, `/model`, `/mcp`, `/move`, `/exit`). Those are consumed before `session.prompt(...)` and therefore never reach file-command expansion in that path.\n\n## 8) Streaming-time differences vs idle\n\n## Idle path\n\n- `session.prompt(\"/x ...\")` runs command pipeline and either executes command immediately or sends expanded text directly.\n\n## Streaming path (`session.isStreaming === true`)\n\n- `prompt(...)` still runs extension/custom/file/template transforms first\n- then requires `streamingBehavior`:\n - `\"steer\"` -> queue interrupt message (`agent.steer`)\n - `\"followUp\"` -> queue post-turn message (`agent.followUp`)\n- if `streamingBehavior` is omitted, prompt throws an error\n\n### Important command-specific streaming behavior\n\n- Extension commands are executed immediately even during streaming (not queued as text).\n- `steer(...)`/`followUp(...)` helper methods reject extension commands (`#throwIfExtensionCommand`) to avoid queuing command text for handlers that must run synchronously.\n- Compaction queue replay uses `isKnownSlashCommand(...)` to decide whether queued entries should be replayed via `session.prompt(...)` (for known slash commands) vs raw steer/follow-up methods.\n\n## 9) Error handling and failure surfaces\n\n- Provider load failures are isolated; registry collects warnings and continues with other providers.\n- Invalid slash command items (missing name/path/content or invalid level) are dropped by capability validation.\n- Frontmatter parse failures:\n - native commands: fatal parse error bubbles\n - non-native commands: warning + fallback key/value parse\n- Extension/custom command handler exceptions are caught and reported via extension error channel (or logger fallback for custom commands without extension runner), and treated as handled (no unintended fallback execution).\n",
|
|
69
|
+
"system-prompt-customization.md": "# System Prompt Customization\n\nHow the coding-agent assembles the system prompt sent to the model, and what users can control via `SYSTEM.md`, `APPEND_SYSTEM.md`, and the matching CLI flags.\n\nPrimary implementation:\n\n- `packages/coding-agent/src/system-prompt.ts` (`buildSystemPrompt`, `loadSystemPromptFiles`)\n- `packages/coding-agent/src/main.ts` (`discoverSystemPromptFile`, `discoverAppendSystemPromptFile`)\n- `packages/coding-agent/src/prompts/system/system-prompt.md` (default stable instruction template)\n- `packages/coding-agent/src/prompts/system/custom-system-prompt.md` (internal custom-prompt template; not the normal CLI `SYSTEM.md` path)\n- `packages/coding-agent/src/prompts/system/project-prompt.md` (project/environment footer)\n\n---\n\n## 1) Inputs\n\nFour user-controllable inputs feed prompt assembly. All four resolve a value as either a literal string or, if the argument looks like a file path, the contents of that file (`resolvePromptInput`).\n\n| Input | Source | Effect |\n|---|---|---|\n| `--system-prompt <text-or-file>` | CLI flag | Replaces block 0: the default stable instructions. Highest precedence. |\n| `SYSTEM.md` | `<cwd>/.omp/SYSTEM.md`, then `~/.omp/agent/SYSTEM.md` (and equivalent paths under `.claude`, `.codex`, `.gemini`) | Same effect as `--system-prompt`; used when the flag is absent. |\n| `--append-system-prompt <text-or-file>` | CLI flag | Adds a prompt block. Without a custom system prompt it goes after all default blocks; with one it goes after the custom block and before the preserved project/environment footer. |\n| `APPEND_SYSTEM.md` | Same discovery as `SYSTEM.md` | Same effect as `--append-system-prompt`; used when the flag is absent. |\n\nDiscovery for `SYSTEM.md` / `APPEND_SYSTEM.md` uses `findConfigFile` (`packages/coding-agent/src/config.ts`): the first existing file across the ordered bases (`.omp`, `.claude`, `.codex`, `.gemini` — project-level at `<cwd>` first, then user-level at `~`) wins. **No ancestor walk-up.** Running `omp` from `<repo>/subdir` does not pick up `<repo>/.omp/SYSTEM.md`; the file must live directly under the cwd's config base or in the user-level location. See [`docs/config-usage.md`](./config-usage.md) for the full discovery contract.\n\nPrecedence (highest first):\n\n1. `--system-prompt`\n2. project `SYSTEM.md`\n3. user `SYSTEM.md`\n\nFor append, the same precedence applies between `--append-system-prompt`, project `APPEND_SYSTEM.md`, and user `APPEND_SYSTEM.md`.\n\n---\n\n## 2) Replace vs. append\n\nNormal CLI startup builds the default provider-facing prompt blocks first, then applies CLI / discovered file overrides in `packages/coding-agent/src/main.ts`:\n\n```ts\nif (resolvedSystemPrompt && resolvedAppendPrompt) {\n options.systemPrompt = defaultPrompt => [resolvedSystemPrompt, resolvedAppendPrompt, ...defaultPrompt.slice(1)];\n} else if (resolvedSystemPrompt) {\n options.systemPrompt = defaultPrompt => [resolvedSystemPrompt, ...defaultPrompt.slice(1)];\n} else if (resolvedAppendPrompt) {\n options.systemPrompt = defaultPrompt => [...defaultPrompt, resolvedAppendPrompt];\n}\n```\n\nThe default blocks come from `buildSystemPrompt`:\n\n- block 0: `system-prompt.md` — the stable default instructions (staff-engineer preamble, tool inventory, exploration rules, workflow rules, etc.);\n- block 1, when non-empty: `project-prompt.md` — dynamic project/environment context (workstation info, context files, dir-context list, workspace tree, current date/cwd, and other project footer content).\n\nConsequences for normal CLI use:\n\n- Providing `--system-prompt` or `SYSTEM.md` replaces only block 0. The stable default instructions are removed, but the dynamic project/environment footer from `project-prompt.md` remains as `defaultPrompt.slice(1)`.\n- Providing `--append-system-prompt` or `APPEND_SYSTEM.md` without a custom system prompt appends a new block after all default blocks.\n- Providing both a custom system prompt and an append prompt produces: custom system prompt block, append prompt block, then the preserved dynamic project/environment footer.\n\nIf you want to keep both default blocks and add to them, use `--append-system-prompt` / `APPEND_SYSTEM.md` without `--system-prompt` / `SYSTEM.md`. If you want to replace the stable default instructions while keeping the dynamic footer, use `--system-prompt` / `SYSTEM.md`.\n\n---\n\n## 3) Templating contract\n\n**Contents of `SYSTEM.md`, `APPEND_SYSTEM.md`, `--system-prompt`, and `--append-system-prompt` are treated as plain text.** They are resolved before prompt-block replacement and are not rendered as Handlebars templates.\n\nThe built-in prompt templates are Handlebars (`packages/utils/src/prompt.ts`), but user-provided strings are not compiled with that renderer. The secondary capability path can insert `systemPromptCustomization` into a Handlebars parent template, but a `{{value}}` reference in Handlebars still does not recursively render its substituted contents — the value is emitted as a string. Concretely:\n```handlebars\n{{! parent template — handled by Handlebars }}\n{{#if systemPromptCustomization}}\n{{systemPromptCustomization}}\n{{/if}}\n```\n\nIf `SYSTEM.md` contains:\n\n```handlebars\nWorking in {{cwd}} on {{date}}.\n{{#if hasMemoryRoot}}Memory enabled.{{/if}}\n```\n\nthe rendered output contains those characters verbatim — `{{cwd}}`, `{{#if hasMemoryRoot}}`, etc. are NOT substituted. They will be shown to the model as literal Handlebars syntax.\n\nThis is by design. The internal template variables (`cwd`, `date`, `environment`, `workspaceTree`, `skills`, `rules`, `toolRefs`, `hasMemoryRoot`, `hasObsidian`, `mcpDiscoveryServerSummaries`, ...) are not a supported public surface — they change between releases as the prompt is rewritten, and they would couple user configs to internals. Treat them as private.\n\nIf a future release exposes a templating surface for `SYSTEM.md`, it will be opt-in (e.g. via a settings flag or a different filename) and documented here.\n\n---\n\n## 4) Recommended patterns\n\n### \"Tweak the default\" — keep default, add a few rules\n\nUse `APPEND_SYSTEM.md` (or `--append-system-prompt`) without `SYSTEM.md`. The default stable instructions and the dynamic project/environment footer stay intact; your text is appended as an additional block.\n\n```text\n# ~/.omp/agent/APPEND_SYSTEM.md\nPrefer Bun APIs over Node APIs in this project.\nWhen you change a public function, run `bun check` before yielding.\n```\n\n### \"Replace the stable default instructions\" — bring your own base prompt\n\nUse `SYSTEM.md` (or `--system-prompt`). You replace the stable default instructions in block 0, but normal CLI startup still preserves the dynamic project/environment footer block (`project-prompt.md`): workstation info, context files, dir-context list, workspace tree, current date, cwd, and related project context.\n\n```text\n# ~/.omp/agent/SYSTEM.md\nYou are a code reviewer. Read diffs, surface issues, never edit files.\n- Cite paths with backticks.\n- Prefer concrete fixes over abstract advice.\n```\n\nIf you do this and want default tool guidance, exploration rules, or workflow rules, copy what you need from `packages/coding-agent/src/prompts/system/system-prompt.md` and maintain it yourself — there is currently no way to inherit selected sections from that stable default instruction block.\n\n### \"Customize while keeping generated skills/rules/tool guidance\"\n\nUse `APPEND_SYSTEM.md`, not `SYSTEM.md`. Skills, rulebook summaries, always-apply rules, the tool inventory, and the built-in guidance that tells the model when to read `skill://<name>` are part of block 0 (`system-prompt.md`). Because `SYSTEM.md` replaces block 0, those generated lists are not available to the model in a custom system prompt.\n\nThe dynamic project/environment footer that remains after `SYSTEM.md` is only block 1 (`project-prompt.md`): workstation info, AGENTS.md context files, dir-context list, workspace tree, current date, cwd, and related project context. It does not include discovered skills.\n\nThere is currently no supported CLI mode for \"replace the stable default instructions but keep the generated skills/rules/tool guidance.\" If you need automatic skills loading, keep the default block and add your customization via `APPEND_SYSTEM.md`. If you fully replace with `SYSTEM.md`, you must hard-code any skill names/instructions you want the model to know about, and those will not track discovery automatically.\n\n### \"Customize automatic session titles\"\n\n`SYSTEM.md` and `APPEND_SYSTEM.md` do not affect the model call that names a new session. Create the title-specific prompt file instead:\n\n```text\n# ~/.omp/agent/TITLE_SYSTEM.md\nGenerate a session name using lowercase `<type>:<primary-objective>`.\nIf the message carries no concrete task, output exactly `none`.\n```\n\n`TITLE_SYSTEM.md` is discovered with the same project-then-user config-directory pattern as `SYSTEM.md` / `APPEND_SYSTEM.md`. When absent, OMP uses the bundled `title-system.md` / `tiny-title-system.md` prompts. When present, the online title path still forces the `set_title` tool call, and the local tiny-model path keeps the `<title>...</title>` wrapper while using this file as the system turn.\n\n### \"Replace everything, including project context\" — SDK-only\n\nThe normal CLI file/flag path intentionally preserves `defaultPrompt.slice(1)`. Code using `CreateAgentSessionOptions.systemPrompt` directly can return a full replacement array and omit the project footer, but that is not what `.omp/SYSTEM.md`, `~/.omp/agent/SYSTEM.md`, or `--system-prompt` do.\n\n### \"Replace, but keep one section of the default instructions\" — not directly supported\n\nThere is no built-in way to inherit specific sections from `system-prompt.md` while replacing the rest. The supported CLI modes are: append to the default prompt, or replace block 0 and keep the dynamic footer.\n\n---\n\n## 5) Deduplication\n\nThe CLI path avoids double-injecting discovered `SYSTEM.md` by replacing block 0 after the default prompt blocks are rendered. Any `systemPromptCustomization` from the secondary capability path would have been rendered into block 0, and that block is discarded when `main.ts` applies `[resolvedSystemPrompt, ...defaultPrompt.slice(1)]`.\n\nInside `buildSystemPrompt` itself, secondary customization and always-apply rules are still deduplicated:\n\n- `dedupePromptSource` drops a `systemPromptCustomization` block when it already appears in an internally supplied `customPrompt` or append prompt.\n- `dedupeAlwaysApplyRules` omits always-apply rules whose body appears verbatim in any of `{customPrompt, appendPrompt, systemPromptCustomization}`.\n\n---\n\n## 6) Discovery paths\n\nOnly one path actually drives the customization a CLI user sees: the primary CLI path. The capability layer exists but its `SYSTEM.md` output never reaches the rendered prompt under normal CLI startup.\n\n- The primary CLI path (`discoverSystemPromptFile` / `discoverAppendSystemPromptFile` in `main.ts`, which feeds `resolvedSystemPrompt` / `resolvedAppendPrompt`) calls `findConfigFile`. `findConfigFile` checks only `<cwd>/.omp`, `<cwd>/.claude`, `<cwd>/.codex`, `<cwd>/.gemini`, and the user-level equivalents — it does **not** walk up ancestors. Files in `<ancestor>/.omp/SYSTEM.md` are ignored when `omp` is started from a subdirectory.\n- The secondary capability path (`loadSystemPromptFiles` → builtin discovery) does walk up via `findNearestProjectConfigDir` and requires the project `.omp/` directory to be non-empty. Its result is rendered into the template variable `systemPromptCustomization`. Under normal CLI startup the default template (`system-prompt.md`) never references that variable, so ancestor-walk capability content has no user-visible effect.\n\nNet effect for CLI users: put `SYSTEM.md` / `APPEND_SYSTEM.md` directly under `<cwd>/.omp` (or another supported config base under cwd) or in the user-level location (`~/.omp/agent/SYSTEM.md` etc.). Ancestor paths are not searched.\n\n---\n\n## 7) Quick reference\n\n| Goal | Use |\n|---|---|\n| Add an instruction on top of the full default prompt | `APPEND_SYSTEM.md` or `--append-system-prompt` |\n| Replace the stable default instructions but keep project/environment context | `SYSTEM.md` or `--system-prompt` |\n| Preserve generated skills/rules/tool guidance while customizing | `APPEND_SYSTEM.md`; `SYSTEM.md` replaces that generated block |\n| Customize automatic session titles | `TITLE_SYSTEM.md`; chat-turn `SYSTEM.md` / `APPEND_SYSTEM.md` do not affect title generation |\n| Use `{{cwd}}` / `{{date}}` / other internals in my file | Not supported. Files are inserted verbatim. |\n| Inherit specific sections from `system-prompt.md` | Not supported; use append, or copy what you need into `SYSTEM.md`. |\n| Override at a per-repo level | Project `.omp/SYSTEM.md` under the cwd you launch `omp` from |\n| Override globally | `~/.omp/agent/SYSTEM.md` or `~/.omp/agent/APPEND_SYSTEM.md` |\n",
|
|
70
|
+
"task-agent-discovery.md": "# Task Agent Discovery and Selection\n\nThis document describes how the task subsystem discovers agent definitions, merges multiple sources, and resolves a requested agent at execution time.\n\nIt covers runtime behavior as implemented today, including precedence, invalid-definition handling, and spawn/depth constraints that can make an agent effectively unavailable.\n\n## Implementation files\n\n- [`src/task/discovery.ts`](../packages/coding-agent/src/task/discovery.ts)\n- [`src/task/agents.ts`](../packages/coding-agent/src/task/agents.ts)\n- [`src/task/types.ts`](../packages/coding-agent/src/task/types.ts)\n- [`src/task/index.ts`](../packages/coding-agent/src/task/index.ts)\n- [`src/task/commands.ts`](../packages/coding-agent/src/task/commands.ts)\n- [`src/prompts/agents/task.md`](../packages/coding-agent/src/prompts/agents/task.md)\n- [`src/prompts/tools/task.md`](../packages/coding-agent/src/prompts/tools/task.md)\n- [`src/discovery/helpers.ts`](../packages/coding-agent/src/discovery/helpers.ts)\n- [`src/config.ts`](../packages/coding-agent/src/config.ts)\n- [`src/task/executor.ts`](../packages/coding-agent/src/task/executor.ts)\n\n---\n\n## Agent definition shape\n\nTask agents normalize into `AgentDefinition` (`src/task/types.ts`):\n\n- `name`, `description`, `systemPrompt` (required for a valid loaded agent)\n- optional `tools`, `spawns`, `model`, `thinkingLevel`, `output`, `blocking`, `autoloadSkills`, `readSummarize`\n- `source`: `\"bundled\" | \"user\" | \"project\"`\n- optional `filePath`\n\nParsing comes from frontmatter via `parseAgentFields()` (`src/discovery/helpers.ts`):\n\n- missing `name` or `description` => invalid (`null`), caller treats as parse failure\n- `tools` accepts CSV or array; if provided, `yield` is auto-added\n- `spawns` accepts `*`, CSV, or array\n- backward-compat behavior: if `spawns` missing but `tools` includes `task`, `spawns` becomes `*`\n- `output` is passed through as opaque schema data\n- `read-summarize: false` (parsed as `readSummarize`) forces the subagent's `read` tool to return verbatim file content instead of structural summaries — `runSubprocess` applies it as a `read.summarize.enabled: false` override on the subagent's isolated settings (`src/task/executor.ts`). `explore` and `librarian` ship with it disabled. Defaults to enabled when the field is absent.\n\n## Bundled agents\n\nBundled agents are embedded at build time (`src/task/agents.ts`) using text imports.\n\n`EMBEDDED_AGENT_DEFS` defines:\n\n- `explore`, `plan`, `designer`, `reviewer` from prompt files\n- `task` and `quick_task` from shared `task.md` body plus injected frontmatter\n\nLoading path:\n\n1. `loadBundledAgents()` parses embedded markdown with `parseAgent(..., \"bundled\", \"fatal\")`\n2. results are cached in-memory (`bundledAgentsCache`)\n3. `clearBundledAgentsCache()` is test-only cache reset\n\nBecause bundled parsing uses `level: \"fatal\"`, malformed bundled frontmatter throws and can fail discovery entirely.\n\n## Filesystem and plugin discovery\n\n`discoverAgents(cwd, home)` (`src/task/discovery.ts`) merges agents from multiple places before appending bundled definitions.\n\n### Discovery inputs\n\n1. User config agent dirs from `getConfigDirs(\"agents\", { project: false })`\n2. Nearest project agent dirs from `findAllNearestProjectConfigDirs(\"agents\", cwd)`\n3. Claude plugin roots (`listClaudePluginRoots(home)`) with `agents/` subdirs\n4. Bundled agents (`loadBundledAgents()`)\n\n### Actual source order\n\nSource-family order comes from `getConfigDirs(\"\", { project: false })`, which is derived from `priorityList` in `src/config.ts`:\n\n1. `.omp`\n2. `.claude`\n3. `.codex`\n4. `.gemini`\n\nFor each source family, discovery order is:\n\n1. nearest project dir for that source (if found)\n2. user dir for that source\n\nAfter all source-family dirs, plugin `agents/` dirs are appended (project-scope plugins first, then user-scope).\n\nBundled agents are appended last.\n\n### Important caveat: stale comments vs current code\n\n`discovery.ts` header comments still mention `.pi` and do not mention `.codex`/`.gemini`. Actual runtime order is driven by `src/config.ts` and currently uses `.omp`, `.claude`, `.codex`, `.gemini`.\n\n## Merge and collision rules\n\nDiscovery uses first-wins dedup by exact `agent.name`:\n\n- A `Set<string>` tracks seen names.\n- Loaded agents are flattened in directory order and kept only if name unseen.\n- Bundled agents are filtered against the same set and only added if still unseen.\n\nImplications:\n\n- Project overrides user for same source family.\n- Higher-priority source family overrides lower (`.omp` before `.claude`, etc.).\n- Non-bundled agents override bundled agents with the same name.\n- Name matching is case-sensitive (`Task` and `task` are distinct).\n- Within one directory, markdown files are read in lexicographic filename order before dedup.\n\n## Invalid/missing agent file behavior\n\nPer directory (`loadAgentsFromDir`):\n\n- unreadable/missing directory: treated as empty (`readdir(...).catch(() => [])`)\n- file read or parse failure: warning logged, file skipped\n- parse path uses `parseAgent(..., level: \"warn\")`\n\nFrontmatter failure behavior comes from `parseFrontmatter`:\n\n- parse error at `warn` level logs warning\n- parser falls back to a simple `key: value` line parser\n- if required fields are still missing, `parseAgentFields` fails, then `AgentParsingError` is thrown and caught by caller (file skipped)\n\nNet effect: one bad custom agent file does not abort discovery of other files.\n\n## Agent lookup and selection\n\nLookup is exact-name linear search:\n\n- `getAgent(agents, name)` => `agents.find(a => a.name === name)`\n\nIn synchronous task execution (`TaskTool.#executeSync`):\n\n1. agents are rediscovered at execution time (`discoverAgents(this.session.cwd)`)\n2. requested `params.agent` is resolved through `getAgent`\n3. missing agent returns immediate tool response:\n - `Unknown agent \"...\". Available: ...`\n - no subprocess runs\n\n### Description vs execution-time discovery\n\n`TaskTool.create()` builds the tool description from discovery results at initialization time. `#executeSync` rediscovers agents, so the runtime set can differ from what was listed in the earlier tool description if agent files changed mid-session. The async entry path still uses the initialization-time list to decide whether an agent is marked `blocking` before scheduling.\n\n## Structured-output guardrails and schema precedence\n\nRuntime output schema precedence in `TaskTool.execute`:\n\n1. agent frontmatter `output`\n2. parent session `outputSchema`\n\n(`effectiveOutputSchema = effectiveAgent.output ?? this.session.outputSchema` — the task call itself never carries a schema; ad-hoc structured workflows go through the eval bridge's `agent(prompt, schema)`.)\n\nPrompt-time guardrail text in `src/prompts/tools/task.md` warns about mismatch behavior for structured-output agents (`explore`, `reviewer`): output-format instructions in prose can conflict with built-in schema and produce `null` outputs.\n\nThis is guidance, not hard runtime validation logic in `discoverAgents`.\n\n## Command discovery interaction\n\n`src/task/commands.ts` is parallel infrastructure for workflow commands (not agent definitions), but it follows the same overall pattern:\n\n- discover from capability providers first\n- deduplicate by name with first-wins\n- append bundled commands if still unseen\n- exact-name lookup via `getCommand`\n\nIn `src/task/index.ts`, command helpers are re-exported with agent discovery helpers. Agent discovery itself does not depend on command discovery at runtime.\n\n## Availability constraints beyond discovery\n\nAn agent can be discoverable but still unavailable to run because of execution guardrails.\n\n### Disabled-agent settings\n\n`TaskTool.#executeSync` checks `task.disabledAgents` after resolving the agent. If the requested name is disabled, execution returns an immediate error listing enabled alternatives when available.\n\n### Parent spawn policy\n\n`TaskTool.#executeSync` checks `session.getSessionSpawns()`:\n\n- `\"*\"` => allow any\n- `\"\"` => deny all\n- CSV list => allow only listed names\n\nIf denied: immediate `Cannot spawn '...'. Allowed: ...` response.\n\n### Blocked self-recursion env guard\n\n`PI_BLOCKED_AGENT` is read at tool construction. If request matches, execution is rejected with recursion-prevention message.\n\n### Recursion-depth gating (task tool availability inside child sessions)\n\nIn `runSubprocess` (`src/task/executor.ts`):\n\n- depth computed from `taskDepth`\n- `task.maxRecursionDepth` controls cutoff\n- when at max depth:\n - `task` tool is removed from child tool list\n - child `spawns` env is set to empty\n\nSo deeper levels cannot spawn further tasks even if the agent definition includes `spawns`.\n\n## Plan mode behavior\n\nWhen parent plan mode is enabled, `TaskTool.execute` builds an `effectiveAgent` before launching subprocesses:\n\n- prepends the plan-mode subagent system prompt\n- restricts tools to `read`, `search`, `find`, `lsp`, and `web_search`\n- clears child spawns\n\nThe same `effectiveAgent` is used for subprocess launch, model/thinking overrides, and output-schema selection.\n",
|
|
71
|
+
"theme.md": "# Theming Reference\n\nThis document describes how theming works in the coding-agent today: schema, loading, runtime behavior, and failure modes.\n\n## What the theme system controls\n\nThe theme system drives:\n\n- foreground/background color tokens used across the TUI\n- markdown styling adapters (`getMarkdownTheme()`)\n- selector/editor/settings list adapters (`getSelectListTheme()`, `getEditorTheme()`, `getSettingsListTheme()`)\n- symbol preset + symbol overrides (`unicode`, `nerd`, `ascii`)\n- syntax highlighting colors used by native highlighter (`@oh-my-pi/pi-natives`)\n- status line segment colors\n\nPrimary implementation: `src/modes/theme/theme.ts`.\n\n## Theme JSON shape\n\nTheme files are JSON objects validated against the runtime schema in `theme.ts` (`ThemeJsonSchema`) and mirrored by `src/modes/theme/theme-schema.json`.\n\nTop-level fields:\n\n- `name` (required)\n- `colors` (required; all color tokens required)\n- `vars` (optional; reusable color variables)\n- `export` (optional; HTML export colors)\n- `symbols` (optional)\n - `preset` (optional: `unicode | nerd | ascii`)\n - `overrides` (optional: key/value overrides for `SymbolKey`)\n\nColor values accept:\n\n- hex string (`\"#RRGGBB\"`)\n- 256-color index (`0..255`)\n- variable reference string (resolved through `vars`)\n- empty string (`\"\"`) meaning terminal default (`\\x1b[39m` fg, `\\x1b[49m` bg)\n\n## Required color tokens (current)\n\nAll tokens below are required in `colors`.\n\n### Core text and borders (11)\n\n`accent`, `border`, `borderAccent`, `borderMuted`, `success`, `error`, `warning`, `muted`, `dim`, `text`, `thinkingText`\n\n### Background blocks (7)\n\n`selectedBg`, `userMessageBg`, `customMessageBg`, `toolPendingBg`, `toolSuccessBg`, `toolErrorBg`, `statusLineBg`\n\n### Message/tool text (5)\n\n`userMessageText`, `customMessageText`, `customMessageLabel`, `toolTitle`, `toolOutput`\n\n### Markdown (10)\n\n`mdHeading`, `mdLink`, `mdLinkUrl`, `mdCode`, `mdCodeBlock`, `mdCodeBlockBorder`, `mdQuote`, `mdQuoteBorder`, `mdHr`, `mdListBullet`\n\n### Tool diff + syntax highlighting (12)\n\n`toolDiffAdded`, `toolDiffRemoved`, `toolDiffContext`,\n`syntaxComment`, `syntaxKeyword`, `syntaxFunction`, `syntaxVariable`, `syntaxString`, `syntaxNumber`, `syntaxType`, `syntaxOperator`, `syntaxPunctuation`\n\n### Mode/thinking borders (8)\n\n`thinkingOff`, `thinkingMinimal`, `thinkingLow`, `thinkingMedium`, `thinkingHigh`, `thinkingXhigh`, `bashMode`, `pythonMode`\n\n### Status line segment colors (14)\n\n`statusLineSep`, `statusLineModel`, `statusLinePath`, `statusLineGitClean`, `statusLineGitDirty`, `statusLineContext`, `statusLineSpend`, `statusLineStaged`, `statusLineDirty`, `statusLineUntracked`, `statusLineOutput`, `statusLineCost`, `statusLineSubagents`\n\n## Optional tokens\n\n### `export` section (optional)\n\nUsed for HTML export theming helpers:\n\n- `export.pageBg`\n- `export.cardBg`\n- `export.infoBg`\n\nIf omitted, export code derives defaults from resolved theme colors.\n\n### `symbols` section (optional)\n\n- `symbols.preset` sets a theme-level default symbol set.\n- `symbols.overrides` can override individual `SymbolKey` values.\n- `symbols.spinnerFrames` overrides the loading spinner frames. Accepts either a flat `string[]` (applied to both spinner types) or an object `{ \"status\"?: string[], \"activity\"?: string[] }` to override each type independently. Any type not specified falls back to the symbol preset's default frames. `status` drives the ~12.5fps spinner used by loaders and tool-execution indicators; `activity` drives the ~30fps spinner used by markdown progress bars and similar high-frequency UI.\n\nRuntime precedence:\n\n1. settings `symbolPreset` override (if set)\n2. theme JSON `symbols.preset`\n3. fallback `\"unicode\"`\n\nInvalid override keys are ignored and logged (`logger.debug`).\n\n## Built-in vs custom theme sources\n\nTheme lookup order (`loadThemeJson`):\n\n1. built-in embedded themes (`dark.json`, `light.json`, and all `defaults/*.json` compiled into `defaultThemes`)\n2. custom theme file: `<customThemesDir>/<name>.json`\n\nCustom themes directory comes from `getCustomThemesDir()`:\n\n- default: `~/.omp/agent/themes`\n- overridden by `PI_CODING_AGENT_DIR` (`$PI_CODING_AGENT_DIR/themes`)\n\n`getAvailableThemes()` returns merged built-in + custom names, sorted, with built-ins taking precedence on name collision.\n\n## Loading, validation, and resolution\n\nFor custom theme files:\n\n1. read JSON\n2. parse JSON\n3. validate against `ThemeJsonSchema`\n4. resolve `vars` references recursively\n5. convert resolved values to ANSI by terminal capability mode\n\nValidation behavior:\n\n- missing required color tokens: explicit grouped error message\n- bad token types/values: validation errors with JSON path\n- unknown theme file: `Theme not found: <name>`\n\nVar reference behavior:\n\n- supports nested references\n- throws on missing variable reference\n- throws on circular references\n\n## Terminal color mode behavior\n\nColor mode detection (`detectColorMode`):\n\n- `COLORTERM=truecolor|24bit` => truecolor\n- `WT_SESSION` => truecolor\n- `TERM` in `dumb`, `linux`, or empty => 256color\n- otherwise => truecolor\n\nConversion behavior:\n\n- hex -> `Bun.color(..., \"ansi-16m\" | \"ansi-256\")`\n- numeric -> `38;5` / `48;5` ANSI\n- `\"\"` -> default fg/bg reset\n\n## Runtime switching behavior\n\n### Initial theme (`initTheme`)\n\n`main.ts` initializes theme with settings:\n\n- `symbolPreset`\n- `colorBlindMode`\n- `theme.dark`\n- `theme.light`\n\nAuto theme slot selection uses terminal appearance in this order:\n\n1. terminal-reported OSC 11 background luminance, unless the macOS/Zellij fallback path is active\n2. `COLORFGBG` background index (`< 8` => dark, `>= 8` => light)\n3. macOS appearance fallback only for the known-broken macOS/Zellij OSC 11 path\n4. dark slot fallback\n\nCurrent defaults from settings schema:\n\n- `theme.dark = \"titanium\"`\n- `theme.light = \"light\"`\n- `symbolPreset = \"unicode\"`\n- `colorBlindMode = false`\n\n### Explicit switching (`setTheme`)\n\n- loads selected theme\n- updates global `theme` singleton\n- optionally starts watcher\n- triggers `onThemeChange` callback\n\nOn failure:\n\n- falls back to built-in `dark`\n- returns `{ success: false, error }`\n\n### Preview switching (`previewTheme`)\n\n- applies temporary preview theme to global `theme`\n- does **not** change persisted settings by itself\n- returns success/error without fallback replacement\n\nSettings UI uses this for live preview and restores prior theme on cancel.\n\n## Watchers and live reload\n\nWhen watcher is enabled (`setTheme(..., true)` / interactive init):\n\n- watches `<customThemesDir>/<currentTheme>.json` only when that file exists\n- built-ins are effectively not watched; built-in theme lookup also takes precedence over same-name custom files\n- matching file changes schedule a debounced reload; reload errors or temporary file absence keep the last successfully loaded theme\n- the watcher does not perform a delete/rename fallback; it waits for a future successful reload or explicit theme switch\n\nAuto mode also reevaluates dark/light slot mapping from terminal appearance changes, `SIGWINCH`, and the macOS fallback observer when active.\n\n## Color-blind mode behavior\n\n`colorBlindMode` changes only one token at runtime:\n\n- `toolDiffAdded` is HSV-adjusted (green shifted toward blue)\n- adjustment is applied only when resolved value is a hex string\n\nOther tokens are unchanged.\n\n## Where theme settings are persisted\n\nTheme-related settings are persisted by `Settings` to global config YAML:\n\n- path: `<agentDir>/config.yml`\n- default agent dir: `~/.omp/agent`\n- effective default file: `~/.omp/agent/config.yml`\n\nPersisted keys:\n\n- `theme.dark`\n- `theme.light`\n- `symbolPreset`\n- `colorBlindMode`\n\nLegacy migration exists: old flat `theme: \"name\"` is migrated to nested `theme.dark` or `theme.light` based on luminance detection.\n\n## Creating a custom theme (practical)\n\n1. Create file in custom themes dir, e.g. `~/.omp/agent/themes/my-theme.json`.\n2. Include `name`, optional `vars`, and **all required** `colors` tokens.\n3. Optionally include `symbols` and `export`.\n4. Select the theme in Settings (`Display -> Dark theme` or `Display -> Light theme`) depending on which auto slot you want.\n\nMinimal skeleton:\n\n```json\n{\n \"name\": \"my-theme\",\n \"vars\": {\n \"accent\": \"#7aa2f7\",\n \"muted\": 244\n },\n \"colors\": {\n \"accent\": \"accent\",\n \"border\": \"#4c566a\",\n \"borderAccent\": \"accent\",\n \"borderMuted\": \"muted\",\n \"success\": \"#9ece6a\",\n \"error\": \"#f7768e\",\n \"warning\": \"#e0af68\",\n \"muted\": \"muted\",\n \"dim\": 240,\n \"text\": \"\",\n \"thinkingText\": \"muted\",\n\n \"selectedBg\": \"#2a2f45\",\n \"userMessageBg\": \"#1f2335\",\n \"userMessageText\": \"\",\n \"customMessageBg\": \"#24283b\",\n \"customMessageText\": \"\",\n \"customMessageLabel\": \"accent\",\n \"toolPendingBg\": \"#1f2335\",\n \"toolSuccessBg\": \"#1f2d2a\",\n \"toolErrorBg\": \"#2d1f2a\",\n \"toolTitle\": \"\",\n \"toolOutput\": \"muted\",\n\n \"mdHeading\": \"accent\",\n \"mdLink\": \"accent\",\n \"mdLinkUrl\": \"muted\",\n \"mdCode\": \"#c0caf5\",\n \"mdCodeBlock\": \"#c0caf5\",\n \"mdCodeBlockBorder\": \"muted\",\n \"mdQuote\": \"muted\",\n \"mdQuoteBorder\": \"muted\",\n \"mdHr\": \"muted\",\n \"mdListBullet\": \"accent\",\n\n \"toolDiffAdded\": \"#9ece6a\",\n \"toolDiffRemoved\": \"#f7768e\",\n \"toolDiffContext\": \"muted\",\n\n \"syntaxComment\": \"#565f89\",\n \"syntaxKeyword\": \"#bb9af7\",\n \"syntaxFunction\": \"#7aa2f7\",\n \"syntaxVariable\": \"#c0caf5\",\n \"syntaxString\": \"#9ece6a\",\n \"syntaxNumber\": \"#ff9e64\",\n \"syntaxType\": \"#2ac3de\",\n \"syntaxOperator\": \"#89ddff\",\n \"syntaxPunctuation\": \"#9aa5ce\",\n\n \"thinkingOff\": 240,\n \"thinkingMinimal\": 244,\n \"thinkingLow\": \"#7aa2f7\",\n \"thinkingMedium\": \"#2ac3de\",\n \"thinkingHigh\": \"#bb9af7\",\n \"thinkingXhigh\": \"#f7768e\",\n\n \"bashMode\": \"#2ac3de\",\n \"pythonMode\": \"#bb9af7\",\n\n \"statusLineBg\": \"#16161e\",\n \"statusLineSep\": 240,\n \"statusLineModel\": \"#bb9af7\",\n \"statusLinePath\": \"#7aa2f7\",\n \"statusLineGitClean\": \"#9ece6a\",\n \"statusLineGitDirty\": \"#e0af68\",\n \"statusLineContext\": \"#2ac3de\",\n \"statusLineSpend\": \"#7dcfff\",\n \"statusLineStaged\": \"#9ece6a\",\n \"statusLineDirty\": \"#e0af68\",\n \"statusLineUntracked\": \"#f7768e\",\n \"statusLineOutput\": \"#c0caf5\",\n \"statusLineCost\": \"#ff9e64\",\n \"statusLineSubagents\": \"#bb9af7\"\n }\n}\n```\n\n## Testing custom themes\n\nUse this workflow:\n\n1. Start interactive mode (watcher enabled from startup).\n2. Open settings and preview theme values (live `previewTheme`).\n3. For custom theme files, edit the JSON while running and confirm auto-reload on save.\n4. Exercise critical surfaces:\n - markdown rendering\n - tool blocks (pending/success/error)\n - diff rendering (added/removed/context)\n - status line readability\n - thinking level border changes\n - bash/python mode border colors\n5. Validate both symbol presets if your theme depends on glyph width/appearance.\n\n## Real constraints and caveats\n\n- All `colors` tokens are required for custom themes.\n- `export` and `symbols` are optional.\n- `$schema` in theme JSON is informational; runtime validation is enforced by a Zod schema in code.\n- `setTheme` failure falls back to `dark`; `previewTheme` failure does not replace current theme.\n- File watcher reload errors or temporary missing files keep the current loaded theme until a successful reload or explicit theme switch.\n",
|
|
72
|
+
"tools/ask.md": "# ask\n\n> Prompts the interactive user for one or more option-picker or free-form answers.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ask.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ask.md`\n- Key collaborators:\n - `packages/coding-agent/src/config/settings-schema.ts` — `ask.timeout` / `ask.notify` defaults\n - `packages/coding-agent/src/modes/theme/theme.ts` — checkbox and tree glyphs for TUI rendering\n - `packages/coding-agent/src/tui.ts` — status-line rendering\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `questions` | `Question[]` | Yes | One or more questions. Empty arrays are rejected by schema and also guarded at runtime. |\n\n### `Question`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `id` | `string` | Yes | Stable identifier used in multi-question results. |\n| `question` | `string` | Yes | Prompt text shown to the user. |\n| `options` | `{ label: string }[]` | Yes | Option labels for the picker. The schema does not require a minimum length; the UI always appends `Other (type your own)`, and callers must not include it. |\n| `multi` | `boolean` | No | Enables multi-select mode. Default: `false`. |\n| `recommended` | `number` | No | Zero-based recommended option index. In single-select mode the label gets ` (Recommended)` appended in the UI. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is plain text:\n - single question: `User selected: ...` and/or `User provided custom input: ...`\n - multiple questions: `User answers:` followed by one line per `id`\n- `details`:\n - single question: `{ question, options, multi, selectedOptions, customInput? }`\n - multiple questions: `{ results: QuestionResult[] }`, where each item includes `id`, `question`, `options`, `multi`, `selectedOptions`, and optional `customInput`\n- Cancellation and headless cases throw instead of returning a structured success result.\n\n## Flow\n1. `AskTool.createIf()` only registers the tool when `session.hasUI` is true; headless sessions never get it.\n2. `execute()` requires `context.ui`; if missing it aborts the context and throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n3. It reads `ask.timeout` from settings, converts seconds to milliseconds (`0` disables timeout), and disables timeout entirely while plan mode is enabled (`packages/coding-agent/src/tools/ask.ts`).\n4. If `ask.notify` is not `off`, it sends a terminal notification: `Waiting for input`.\n5. For each question, `askSingleQuestion()` drives either:\n - single-select list + optional editor for `Other`\n - multi-select checkbox loop + `Done selecting` sentinel + optional editor for `Other`\n6. In multi-question mode, left/right arrow handlers enable back/forward navigation between questions and preserve prior selections.\n7. If a timeout fires before any selection/custom input, the tool auto-selects the recommended option, or the first option when no valid `recommended` index exists.\n8. If the user cancels without timeout, `execute()` aborts the tool context and throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n9. On success it formats human-readable text plus structured `details`; the TUI renderer uses `details` for rich display.\n\n## Modes / Variants\n- Single question: returns flattened `details` fields for one question.\n- Multiple questions: returns `details.results[]` and allows back/forward navigation across questions.\n- Single-select: one option or custom input.\n- Multi-select: toggled checkbox list, `Done selecting` sentinel only when forward navigation is not active.\n\n## Side Effects\n- User-visible prompts / interactive UI\n - Opens a selection dialog via `context.ui.select(...)`.\n - Opens a text editor dialog via `context.ui.editor(...)` for `Other`.\n - Sends a terminal notification unless `ask.notify=off`.\n- Session state\n - Reads plan-mode state to disable timeouts.\n - Calls `context.abort()` on headless use or user cancellation.\n- Background work / cancellation\n - Wraps UI waits in `untilAborted(...)` so abort signals interrupt pending dialogs.\n\n## Limits & Caps\n- `questions` must contain at least 1 item (`askSchema` in `packages/coding-agent/src/tools/ask.ts`).\n- `ask.timeout` default is `0` seconds, which disables timeout (`packages/coding-agent/src/config/settings-schema.ts`). Configured non-zero values are seconds.\n- Prompt guidance says provide 2-5 options, but code only requires the `options` array field and does not enforce a minimum or maximum length (`packages/coding-agent/src/prompts/tools/ask.md`).\n- Timeout only applies to the option picker; once the user chooses `Other`, the editor has no timeout (`packages/coding-agent/src/prompts/tools/ask.md`).\n\n## Errors\n- Missing interactive UI: throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n- User cancels picker/editor without timeout: throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n- Abort signal during input: converted to `ToolAbortError(\"Ask input was cancelled\")`.\n- Empty `questions` at runtime returns a text error payload instead of throwing: `Error: questions must not be empty`.\n\n## Notes\n- `recommended` is only a UI hint; invalid indexes are ignored.\n- In single-select mode the returned `selectedOptions` value strips the appended ` (Recommended)` suffix.\n- Multi-select results preserve selection order by `Set` insertion order, not original option order after arbitrary toggles.\n- Option labels and prompt text are returned verbatim in `details`; the tool does not interpret them beyond UI affordances like `Other` and ` (Recommended)`.\n",
|
|
73
|
+
"tools/ast-edit.md": "# ast_edit\n\n> Preview and apply structural rewrites over source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-edit.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-edit.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native rewrite planning and file mutation\n - `crates/pi-ast/src/language/mod.rs` — language aliases and extension inference used by the native wrapper.\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/resolve.ts` — preview/apply queueing\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number diff references\n - `packages/hashline/src/format.ts` — stable hashline header formatting for preview anchors\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `{ pat: string; out: string }[]` | Yes | One or more rewrite rules. `pat` must be non-empty. Duplicate `pat` values fail before native execution. Empty `out` deletes the matched node. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n\nShared AST pattern grammar and language catalog: see [`ast_grep`](./ast-grep.md#inputs).\n\n- `ast_edit` uses the same `$NAME`, `$_`, `$$$NAME`, and `$$$` metavariable semantics.\n- The tool prompt adds rewrite-specific constraints:\n - metavariable names must be uppercase and must stand for whole AST nodes,\n - captures from `pat` are substituted into `out`,\n - each rewrite is a 1:1 structural substitution; one capture cannot expand into multiple sibling nodes unless the grammar itself permits that expansion at that position.\n\n## Outputs\n- Single-shot preview result from `ast_edit` itself.\n- Model-facing `content` is one text block showing proposed edits, grouped by file for directory/multi-file runs.\n - Each change renders as two lines. Hashline mode uses `-LINE:before` / `+LINE:after` under a `¶PATH#TAG` header; plain mode uses `-LINE:COLUMN before` / `+LINE:COLUMN after`.\n - Only the first line of each `before`/`after` snippet is shown, truncated to 120 characters in the wrapper.\n - `Limit reached; narrow paths.` and formatted parse issues are appended when applicable.\n- If no rewrites match, text is `No replacements made` plus formatted parse issues when present.\n- `details` includes aggregate preview metadata:\n - `totalReplacements`, `filesTouched`, `filesSearched`, `applied`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileReplacements`, `displayContent`, `meta`\n- The tool always previews first (`applied: false` in the direct result). Actual file writes happen only later through `resolve(action: \"apply\", ...)`.\n- When preview produced replacements, `ast_edit` also queues a pending `resolve` action. Successful apply returns a separate `resolve` result, not another `ast_edit` result.\n\n## Flow\n1. `AstEditTool.execute()` validates each op in `packages/coding-agent/src/tools/ast-edit.ts`:\n - empty `pat` fails,\n - at least one op is required,\n - duplicate `pat` values fail,\n - ops are converted to a `Record<pattern, replacement>`.\n2. The wrapper reads `PI_MAX_AST_FILES` via `$envpos(..., 1000)` and uses that as the native `maxFiles` cap for both preview and apply.\n3. Path normalization, internal URL handling, missing-path partitioning, and multi-path resolution follow the same `path-utils.ts` flow as `ast_grep`.\n4. The wrapper stats the resolved base path to decide whether to render grouped directory output.\n5. `runAstEditOnce(...)` always runs native `astEdit(...)` with `dryRun: true` and `failOnParseError: false` on the first pass.\n6. Native `ast_edit` in `crates/pi-natives/src/ast.rs`:\n - normalizes the rewrite map and sorts rules by pattern string,\n - resolves strictness (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers a single language for the whole call unless `lang` was supplied,\n - compiles every rewrite pattern for that language,\n - parses each file, skips files with syntax-error trees, collects `replace_by(...)` edits for every match, enforces replacement and file caps, and returns textual before/after slices plus source ranges.\n7. The TS wrapper deduplicates parse errors, groups changes by file, and renders preview diff lines.\n8. If preview found replacements and `applied` is false, `queueResolveHandler(...)` registers a forced `resolve` action and injects a `resolve-reminder` steering message.\n9. On `resolve(action: \"apply\")`, the queued callback reruns the same rewrite set with `dryRun: false`, recomputes counts, and returns an error result if the live result no longer matches the preview (`stalePreview`). The current implementation compares replacement totals and per-file counts after the rerun; if the new run has already written different counts, the result is marked error.\n10. On a non-stale apply, the callback returns `Applied N replacements in M files.`; on discard, `resolve` returns a discard message without mutating files.\n\n## Modes / Variants\n- Single file: preview or apply against one file.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router resolves them to a backing file path.\n- Preview mode: always the direct `ast_edit` tool result.\n- Apply mode: only reachable through the queued `resolve` callback after a preview.\n- Hashline output mode vs plain line/column mode: controlled by `resolveFileDisplayMode()`.\n\n## Side Effects\n- Filesystem\n - Preview reads files and scans directories.\n - Apply rewrites files in place with `std::fs::write(...)`, but only when the computed output differs from the original source.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Queues a one-shot forced `resolve` tool choice through `queueResolveHandler(...)`.\n - Adds a `resolve-reminder` steering message.\n- User-visible prompts / interactive UI\n - Direct `ast_edit` results are previews.\n - Follow-up apply/discard is exposed through the hidden `resolve` tool.\n- Background work / cancellation\n - Native preview/apply work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- File cap exposed by the wrapper: `PI_MAX_AST_FILES`, default `1000`, in `packages/coding-agent/src/tools/ast-edit.ts`.\n- Native `maxFiles` and `maxReplacements` are both clamped to at least `1` when provided in `crates/pi-natives/src/ast.rs`.\n- The wrapper never sets `maxReplacements`; native behavior therefore defaults to effectively unbounded replacements for a run.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` is deduplicated but not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No separate glob-expansion count cap exists. Candidate count is whatever the resolved path/glob expands to after gitignore filtering, then native `maxFiles` stops mutations after the configured number of touched files.\n- Preview text truncates each rendered `before` and `after` first line to 120 characters in `packages/coding-agent/src/tools/ast-edit.ts`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, duplicate rewrite patterns, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - inability to infer one language across all candidates when `lang` is absent,\n - unsupported explicit `lang`,\n - bad glob compilation or unreadable search roots,\n - overlapping computed edits (`Overlapping replacements detected; refine pattern to avoid ambiguous edits`),\n - out-of-bounds edit ranges or non-UTF-8 replacement text,\n - write failures during apply,\n - cancellation or timeout.\n- With `failOnParseError: false` (the wrapper always uses this), pattern compile failures and file parse failures become `parseErrors` instead of aborting the whole run.\n- If every rewrite pattern fails to compile, native `ast_edit` returns a successful zero-replacement result with `parseErrors` populated.\n- Files containing tree-sitter error nodes are skipped for rewriting; they do not get partial edits.\n- Apply can fail after a successful preview if the preview becomes stale. The resolve callback compares replacement totals and per-file counts and returns an error result rather than silently reporting success for a mismatched preview.\n\n## Notes\n- `ast_edit` does not expose the native `lang`, `strictness`, `selector`, `maxReplacements`, `failOnParseError`, or `timeoutMs` fields to the model. The runtime fixes the call shape to a preview-first, smart-strictness, best-effort parse mode.\n- Because the wrapper does not expose `lang`, mixed-language rewrites only succeed when every candidate infers to the same canonical language. This is stricter than `ast_grep`.\n- Idempotency is not enforced syntactically. A rewrite like `foo($A) -> foo($A)` previews zero changes because output equals input; a rewrite that keeps matching its own output may still produce replacements on repeated calls.\n- Rewrites are accumulated per file, then applied from the end of the file backward after an overlap check. Independent matches can coexist; overlapping matches abort the run.\n- Native rewrite rule order is by pattern-string sort, not by the original `ops` array order, because `normalize_rewrite_map(...)` sorts the `(pattern, rewrite)` pairs.\n- Preview/apply parity is validated by totals and per-file counts after the apply rerun, not by a byte-for-byte diff of every replacement payload.",
|
|
74
|
+
"tools/ast-grep.md": "# ast_grep\n\n> Structural code search over supported source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-grep.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-grep.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native scan, parse, match engine\n - `crates/pi-ast/src/language/mod.rs` — language aliases and extension inference used by the native wrapper.\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/tools/match-line-format.ts` — hashline match rendering\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number output mode\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `pat` | `string` | Yes | Single AST pattern. The wrapper trims it and rejects empty strings. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n| `skip` | `number` | No | Match offset. Defaults to `0`, then `Math.floor(...)`; negatives and non-finite values fail. |\n\nPattern grammar and language support exposed to the model:\n- `$NAME` — capture one AST node.\n- `$_` — match one AST node without binding.\n- `$$$NAME` — capture zero or more AST nodes; ast-grep stops lazily at the next satisfiable node.\n- `$$$` — match zero or more AST nodes without binding.\n- Metavariable names must be uppercase and must stand for whole AST nodes, not partial tokens or string fragments.\n- Reusing the same metavariable requires identical code at each occurrence.\n- Patterns must parse as one valid AST node for the inferred target language.\n- Supported canonical languages come from `SupportLang::all_langs()` in `crates/pi-ast/src/language/mod.rs`: `astro`, `bash`, `c`, `cmake`, `cpp`, `csharp`, `dart`, `clojure`, `css`, `diff`, `dockerfile`, `elixir`, `erlang`, `go`, `graphql`, `haskell`, `hcl`, `html`, `ini`, `java`, `javascript`, `json`, `just`, `julia`, `kotlin`, `lua`, `make`, `markdown`, `nix`, `objc`, `ocaml`, `odin`, `perl`, `php`, `powershell`, `protobuf`, `python`, `r`, `regex`, `ruby`, `rust`, `scala`, `solidity`, `sql`, `starlark`, `svelte`, `swift`, `toml`, `tlaplus`, `tsx`, `typescript`, `verilog`, `vue`, `xml`, `yaml`, `zig`.\n\n## Outputs\n- Single-shot tool result.\n- Model-facing `content` is one text block:\n - grouped by file for directory/multi-file searches,\n - match lines rendered under `¶PATH#HASH` as `*LINE:text` in hashline mode or `*LINE|text` otherwise,\n - continuation lines for multi-line matches rendered with a leading space,\n - optional `meta: NAME=value` lines when ast-grep captured metavariables.\n- If no matches are found, text is `No matches found` or `No matches found. Parse issues mean the query may be mis-scoped; narrow paths before concluding absence.` plus formatted parse issues.\n- If the wrapper truncates visible results, the text ends with `Result limit reached; narrow paths or increase limit.`\n- `details` includes counts and metadata, not full match payloads:\n - `matchCount`, `fileCount`, `filesSearched`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileMatches`, `displayContent`, `meta`\n- Native ranges (`byteStart`, `byteEnd`, `startLine`, `startColumn`, `endLine`, `endColumn`) exist only inside the native result; the wrapper does not emit them directly to the model.\n\n## Flow\n1. `AstGrepTool.execute()` validates `pat`, normalizes `skip`, and normalizes each `paths` entry in `packages/coding-agent/src/tools/ast-grep.ts`.\n2. Internal URLs are resolved through `session.internalRouter`; entries without `sourcePath` fail, and internal-URL globs fail early.\n3. For multiple path inputs, `partitionExistingPaths()` drops missing bases only when at least one surviving base remains; if all bases are missing the call fails.\n4. `parseSearchPath()` splits a single path into `basePath` plus optional `glob`. `resolveExplicitSearchPaths()` collapses multiple inputs into a common base plus a brace-union glob, or separate `targets` when the only common base is a filesystem root.\n5. The wrapper stats the resolved base path to decide whether output should be grouped as a directory result.\n6. Execution dispatches to either:\n - one native `astGrep(...)` call for a single resolved base, or\n - `runMultiTargetAstGrep(...)`, which calls the native binding once per target, rebases paths back to the common root, sorts globally, then applies `skip` and the wrapper limit.\n7. Native `ast_grep` in `crates/pi-natives/src/ast.rs`:\n - normalizes and deduplicates patterns,\n - resolves a `MatchStrictness` (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers language per candidate from extension unless `lang` was provided,\n - compiles the pattern separately for each language present,\n - reads each file, reports syntax-error trees as parse issues, runs `find_all`, and optionally captures metavariable bindings.\n8. Native results are sorted by path and source position, then paged by `offset`/`limit`.\n9. The TS wrapper normalizes parse-error strings, deduplicates them, groups matches by formatted path, renders anchor lines, appends limit/parse notices, and returns `toolResult(...).text(...).done()`.\n\n## Modes / Variants\n- Single file: native path is the file; output is a flat list of rendered match lines.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router can resolve them to a backing file path.\n- Hashline output mode vs plain line-number mode: controlled by `resolveFileDisplayMode()`; hashline mode requires the edit tool and non-raw, mutable sources.\n\n## Side Effects\n- Filesystem\n - Stats input paths in the TS wrapper.\n - Native code reads matched files and scans directories through `fs_cache`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - None beyond normal tool transcript/result metadata.\n- Background work / cancellation\n - Native work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- Wrapper-visible result cap: `DEFAULT_AST_LIMIT = 50` in `packages/coding-agent/src/tools/ast-grep.ts`.\n - Single-target calls rely on the native default limit of 50 in `crates/pi-natives/src/ast.rs`.\n - Multi-target calls fetch `skip + 50 + 1` matches per target, then re-page after global sort.\n- Native `limit` is clamped to at least `1`; omitted `offset` defaults to `0` in `crates/pi-natives/src/ast.rs`.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` itself is only deduplicated, not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No hard file-count cap is applied by the wrapper or native `ast_grep`; candidate count is whatever the resolved path/glob expands to after gitignore filtering.\n- Multi-path union deduplicates identical path inputs before resolution in `resolveExplicitSearchPaths()`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, invalid `skip`, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - unsupported explicit `lang`,\n - inability to infer language for a candidate when `lang` is not supplied,\n - invalid AST pattern compilation for every relevant language,\n - unreadable search roots or bad glob compilation,\n - cancellation (`Aborted: Signal`) or timeout (`Aborted: Timeout`).\n- File-level parse failures and many per-language pattern compile failures are non-fatal: they are accumulated in `parseErrors` and surfaced alongside successful matches.\n- `no matches` is not an error, even when parse issues were recorded.\n\n## Notes\n- `pat` is always wrapped into a one-element `patterns` array by the TS tool; the model cannot send multiple patterns through `ast_grep` even though the native binding supports it.\n- `ast_grep` can search mixed-language trees because native compilation happens per discovered language, but the prompt still tells the model to keep calls single-language when possible to reduce parse noise.\n- Pattern compilation is per language present in the candidate set. One pattern can succeed for some languages and generate per-file parse errors for others in the same run.\n- A file with tree-sitter error nodes still gets searched; the syntax warning is additive, not a skip condition.\n- For glob semantics, `*.ts` matches only direct children while `**/*.ts` recurses; this is covered by native tests in `crates/pi-natives/src/ast.rs`.\n- Output anchors are intended for follow-up tools, but the exact anchor format depends on session edit mode (`hashline` vs line-number mode).",
|
|
75
|
+
"tools/bash.md": "# bash\n\n> Execute a shell command in the session workspace, with optional PTY or background-job handling.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/bash.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/bash.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/bash-interactive.ts` — PTY/TUI execution path.\n - `packages/coding-agent/src/tools/bash-interceptor.ts` — blocks tool-better shell patterns.\n - `packages/coding-agent/src/tools/bash-skill-urls.ts` — expands internal URLs to paths.\n - `packages/coding-agent/src/exec/bash-executor.ts` — non-PTY shell execution.\n - `packages/coding-agent/src/session/streaming-output.ts` — tail buffer, truncation, artifact spill.\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout clamp bounds.\n - `packages/coding-agent/src/config/settings-schema.ts` — default interceptor rules.\n - `docs/bash-tool-runtime.md` — deeper executor/runtime notes; use as the companion doc for shell-session internals.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `command` | `string` | Yes | Shell command text to execute. A leading `cd <path> && ...` is rewritten into `cwd` only when `cwd` was omitted. |\n| `env` | `Record<string, string>` | No | Extra environment variables. Keys must match `^[A-Za-z_][A-Za-z0-9_]*$` or the tool throws. Values go through internal-URL expansion and are passed as environment values, not shell text. |\n| `timeout` | `number` | No | Timeout in seconds. Default `300`; clamped to `1..3600` by `clampTimeout(\"bash\", ...)`. |\n| `cwd` | `string` | No | Working directory, resolved against `session.cwd` via `resolveToCwd`. Must exist and be a directory. |\n| `pty` | `boolean` | No | Request PTY mode. Default `false`. PTY is used only when `pty: true`, `PI_NO_PTY !== \"1\"`, and the tool context has a UI. |\n| `async` | `boolean` | No | Background execution request. Present only when `async.enabled` is true for the session. Returns immediately with a job id instead of waiting. |\n\n## Outputs\nThe tool returns a single `text` content block plus optional `details`.\n\n- Success, foreground:\n - `content[0].text`: command output, or `(no output)` when the command produced nothing.\n - `details.timeoutSeconds`: effective timeout after clamping.\n - `details.requestedTimeoutSeconds`: present when the requested timeout differed from the effective timeout.\n - `details.wallTimeMs`: elapsed wall-clock milliseconds for completed local/client-terminal runs.\n - `details.terminalId`: present when execution was routed through a client terminal bridge.\n - `details.exitCode`: present when the command completed with a non-zero exit code.\n - `details.meta.truncation`: present when output was truncated in memory; includes `artifactId` when full output spilled to an artifact.\n - non-zero exits return a tool result marked `isError` with output plus `Command exited with code <n>`; they are not thrown.\n- Success, background start (`async: true` or auto-background):\n - `content[0].text`: optional preview tail, timeout notice if any, then `Background job <id> started: <label>` with follow-up instructions.\n - `details.async`: `{ state: \"running\", jobId, type: \"bash\" }`.\n- Background progress / completion:\n - delivered through `onUpdate` / async job manager, not the initial return.\n - running updates contain tail text and `details.async.state: \"running\"` only after the job is considered backgrounded.\n - completion/failure updates carry final text and `details.async.state: \"completed\" | \"failed\"`. A non-zero exit is recorded as a failed background job.\n- Failure:\n - unfinished execution (`cancelled`, timeout, missing exit status), validation failures, and intercepted commands throw `ToolError` / `ToolAbortError`.\n\nStdout and stderr are merged before the model sees them. Definite non-zero exit codes are appended to the returned error result text as `Command exited with code <n>`.\n\n## Flow\n1. `BashTool.execute()` in `packages/coding-agent/src/tools/bash.ts` reads `command`, normalizes `env`, and defaults `timeout` to `300`.\n2. If `cwd` is absent, it rewrites a leading `cd <path> && ...` into the structured `cwd` field and strips that prefix from `command`.\n3. If `async: true` is requested while `async.enabled` is off, it throws `ToolError` before any execution.\n4. If `bashInterceptor.enabled` is on, `checkBashInterception()` runs against both the original command and the `cd`-stripped command. A matching enabled rule throws before URL expansion or execution.\n5. `expandInternalUrls()` rewrites supported internal URLs inside `command`, each `env` value, and protocol-looking `cwd` values. Command replacements are shell-escaped; `env` and `cwd` replacements use raw filesystem/string values because they are not interpolated into shell text.\n6. `resolveToCwd()` resolves `cwd` against `session.cwd`; `fs.stat()` verifies that the target exists and is a directory.\n7. `clampTimeout(\"bash\", requestedTimeoutSec)` enforces `TOOL_TIMEOUTS.bash` (`default: 300`, `min: 1`, `max: 3600`). When clamped, `#buildCompletedResult()` / `#buildBackgroundStartResult()` append a notice line.\n8. Execution path splits:\n 1. `async: true` -> `#startManagedBashJob()` registers a session async job and returns immediately.\n 2. Non-PTY with `bash.autoBackground.enabled` and an async job manager -> starts a managed job, waits up to `min(thresholdMs, timeoutMs - 1000)`, and either returns the completed result or converts the run into a background job.\n 3. Non-PTY client-terminal bridge, when the session advertises terminal capability and `pty` is false -> creates a remote terminal, streams/polls current output, and releases the terminal after completion.\n 4. Otherwise runs foreground execution.\n9. Foreground non-PTY without client terminal calls `executeBash()` from `packages/coding-agent/src/exec/bash-executor.ts`.\n10. Foreground PTY calls `runInteractiveBashPty()` from `packages/coding-agent/src/tools/bash-interactive.ts`.\n11. Local non-PTY and PTY paths allocate an output artifact first when `session.allocateOutputArtifact` is available. The artifact path/id are passed into the sink so large output can spill to disk.\n12. `executeBash()` loads shell settings, optional shell snapshot, and shell minimizer settings, then runs via a persistent native `Shell` session or one-shot `executeShell()`. `docs/bash-tool-runtime.md` covers that path in detail.\n13. `runInteractiveBashPty()` creates a `PtySession`, overlays an xterm-backed console UI, forwards user key input into the PTY, captures output through `OutputSink`, and kills the PTY on dismiss/dispose.\n14. Client-terminal bridge mode calls `session.getClientBridge().createTerminal(...)`, emits `terminalId` updates, polls output until exit/timeout/abort, maps signal exits to `137`, and releases the handle in `finally`.\n15. On completion, `#buildCompletedResult()` formats `(no output)` when needed, attaches truncation metadata from the output summary, appends wall-time/timeout/exit notices, and re-checks unfinished status before returning.\n16. On timeout, missing exit status, or cancellation, the tool throws with captured output included when available.\n\n## Modes / Variants\n1. Foreground non-PTY local\n - Default path when no client terminal bridge is available.\n - Uses `executeBash()`.\n - Streams tail-only updates through `streamTailUpdates()` and `TailBuffer(DEFAULT_MAX_BYTES)`.\n2. Foreground non-PTY client terminal\n - Used when `session.getClientBridge()?.capabilities.terminal` is true, `createTerminal` exists, and `pty` is false.\n - Streams current terminal output via polling updates with `details.terminalId`.\n - Enforces the same timeout and abort behavior, then releases the terminal handle.\n3. Foreground PTY\n - Requires `pty: true`, UI context, and `PI_NO_PTY !== \"1\"`.\n - Uses `runInteractiveBashPty()` and a `PtySession` overlay.\n - Supports interactive input; `Esc` kills the session from the overlay.\n4. Explicit background job\n - Requires `async: true` and `async.enabled`.\n - Registers a job with `session.asyncJobManager` and returns `{ state: \"running\", jobId }` immediately.\n5. Auto-backgrounded non-PTY job\n - Requires `bash.autoBackground.enabled`, no PTY, and an async job manager.\n - Starts like a foreground managed job, then backgrounds it when it outlives the wait window.\n6. Intercepted command\n - No subprocess created.\n - Returns a `ToolError` pointing the model at `read`, `search`, `find`, `edit`, or `write`.\n\n## Side Effects\n- Filesystem\n - Validates `cwd` with `fs.stat()`.\n - May allocate and write artifact files for full local output (`bash`) and minimizer-preserved raw output (`bash-original`).\n - `expandInternalUrls(..., { ensureLocalParentDirs: true })` creates parent directories for `local://` paths before execution.\n- Subprocesses / native bindings / client terminal\n - Non-PTY local execution uses native shell execution via `@oh-my-pi/pi-natives` (`Shell.run()` or `executeShell()`).\n - PTY uses native `PtySession.start()`.\n - Client-terminal mode delegates process execution to the connected client terminal capability.\n- Session state\n - Reads session settings for async, auto-background, interceptor, tool availability, and shell configuration.\n - Registers jobs with `session.asyncJobManager` for explicit/auto background runs.\n - Uses `session.getSessionId()` to isolate shell reuse and async session keys.\n - Uses `session.allocateOutputArtifact()` for spill files.\n- User-visible prompts / interactive UI\n - PTY mode opens a TUI overlay titled `Console` and forwards input to the PTY.\n - Background start messages direct the agent to the `job` tool (use `list: true` for a snapshot, or pass `poll: [id]` to wait).\n- Background work / cancellation\n - Async and auto-background jobs continue after the initial tool return.\n - Cancellation aborts the native run; PTY overlay dismissal also kills the PTY.\n\n## Limits & Caps\n- Default timeout: `300s` (`TOOL_TIMEOUTS.bash.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Timeout clamp: `1..3600s` (`TOOL_TIMEOUTS.bash.min/max`).\n- Auto-background default threshold: `60_000ms` (`DEFAULT_AUTO_BACKGROUND_THRESHOLD_MS` in `packages/coding-agent/src/tools/bash.ts`), further capped to `timeoutMs - 1000` by `#resolveAutoBackgroundWaitMs()`.\n- Hard kill grace beyond requested timeout in non-PTY executor: `5_000ms` (`HARD_TIMEOUT_GRACE_MS` in `packages/coding-agent/src/exec/bash-executor.ts`).\n- In-memory output tail cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`). Once exceeded, the sink keeps only the tail window in memory.\n- Streaming callback throttle in `executeBash()`: `50ms` between `onChunk` calls when streaming is enabled.\n- TUI collapsed preview: `10` visual lines (`BASH_DEFAULT_PREVIEW_LINES`) when rendered inline in the agent UI; this is a renderer cap, not a tool output cap.\n\n## Errors\n- Input validation:\n - invalid env key -> `ToolError(\"Invalid bash env name: <key>\")`.\n - async requested while disabled -> `ToolError(\"Async bash execution is disabled...\")`.\n - missing async job manager -> `ToolError(\"Async job manager unavailable for this session.\")`.\n - missing/bad `cwd` -> `ToolError(\"Working directory does not exist: ...\")` or `ToolError(\"Working directory is not a directory: ...\")`.\n- Interceptor:\n - matched command -> `ToolError` with `Blocked: <rule.message>` and the original command.\n - invalid interceptor regexes are silently skipped by `compileRules()`.\n- Internal URL expansion:\n - unsupported scheme, unknown skill, path traversal, missing router support, or router resolution failures all throw `ToolError` from `packages/coding-agent/src/tools/bash-skill-urls.ts`.\n- Execution:\n - non-zero exit -> returned tool result marked `isError`, with `details.exitCode` and text ending in `Command exited with code <n>`.\n - missing exit code -> thrown `ToolError` with `Command failed: missing exit status`.\n - timeout -> thrown `ToolError`; PTY/client-terminal modes use `Command timed out after <n> seconds`, non-PTY executor returns cancelled output that `BashTool` converts to an error.\n - user abort -> `ToolAbortError` when the caller signal is aborted.\n- Artifact allocation / artifact save failures are swallowed in `saveBashOriginalArtifact()` and `OutputSink.#createFileSink()`; execution continues without that artifact.\n\n## Notes\n- `strict = true` and `concurrency = \"exclusive\"` are set on `BashTool`; the tool does not run concurrently with another bash tool call in the same session.\n- `command` URL expansions shell-escape replacements; `env` and `cwd` expansion use `noEscape: true` because they become environment values / filesystem paths, not shell text.\n- `checkBashInterception()` blocks only when the matching rule's `tool` name is present in `ctx.toolNames`; missing tools disable their corresponding rule.\n- Default interceptor rules come from `DEFAULT_BASH_INTERCEPTOR_RULES` in `packages/coding-agent/src/config/settings-schema.ts`:\n - `cat|head|tail|less|more` -> `read`\n - `grep|rg|ripgrep|ag|ack` -> `search`\n - `find|fd|locate` with name/type/glob flags -> `find`\n - `sed -i`, `perl -i`, `awk -i inplace` -> `edit`\n - `echo|printf|cat <<` with redirection -> `write`\n- PTY mode is ignored in non-UI contexts and when `PI_NO_PTY=1`; the tool silently falls back to non-PTY execution.\n- Non-PTY runs merge `NON_INTERACTIVE_ENV` with `env`; PTY runs also prepend `NON_INTERACTIVE_ENV` before custom env values.\n- When the shell minimizer rewrites output inside `executeBash()`, the visible output is replaced with minimized text and a `[raw output: artifact://<id>]` footer may be appended if `onMinimizedSave` persisted the original text.\n- The TUI renderer parses partial JSON to recover `env` assignments early in streaming previews; that behavior is display-only.\n- For executor internals that are not tool-specific — shell session reuse keys, snapshots, prefix handling, and native timeout behavior — see `docs/bash-tool-runtime.md`.\n",
|
|
76
|
+
"tools/browser.md": "# browser\n\n> Open, reuse, close, and script Puppeteer tabs against headless Chromium or CDP-attached apps.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/browser.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/browser.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/browser/tab-supervisor.ts` — global tab registry; worker lifecycle; run/close coordination.\n - `packages/coding-agent/src/tools/browser/tab-worker.ts` — executes `run` code; implements the `tab` helper API.\n - `packages/coding-agent/src/tools/browser/tab-worker-entry.ts` — worker-thread transport bootstrap.\n - `packages/coding-agent/src/tools/browser/registry.ts` — browser-handle registry keyed by browser kind.\n - `packages/coding-agent/src/tools/browser/launch.ts` — Puppeteer loading, Chromium resolution/download, headless launch, stealth injection.\n - `packages/coding-agent/src/tools/browser/attach.ts` — CDP attach/reuse, target picking, spawned-app process handling.\n - `packages/coding-agent/src/tools/browser/tab-protocol.ts` — worker init/run/result message schema.\n - `packages/coding-agent/src/tools/browser/readable.ts` — `tab.extract()` readability extraction.\n - `packages/coding-agent/src/tools/browser/render.ts` — TUI rendering for `open`/`close` status lines and `run` JS cells.\n - `packages/coding-agent/src/tools/puppeteer/00_stealth_tampering.txt` — mask patched functions/descriptors as native.\n - `packages/coding-agent/src/tools/puppeteer/01_stealth_activity.txt` — synthesize visibility/focus/scroll activity.\n - `packages/coding-agent/src/tools/puppeteer/02_stealth_hairline.txt` — fix Modernizr hairline detection.\n - `packages/coding-agent/src/tools/puppeteer/03_stealth_botd.txt` — spoof `navigator.webdriver`, `window.chrome`, and Chrome fingerprint surfaces.\n - `packages/coding-agent/src/tools/puppeteer/04_stealth_iframe.txt` — patch iframe `contentWindow`/`srcdoc` behavior.\n - `packages/coding-agent/src/tools/puppeteer/05_stealth_webgl.txt` — spoof WebGL vendor/renderer/precision.\n - `packages/coding-agent/src/tools/puppeteer/06_stealth_screen.txt` — normalize screen/viewport/device-pixel-ratio values.\n - `packages/coding-agent/src/tools/puppeteer/07_stealth_fonts.txt` — spoof local fonts and perturb canvas text rendering.\n - `packages/coding-agent/src/tools/puppeteer/08_stealth_audio.txt` — spoof audio latency/sample-rate and perturb offline rendering.\n - `packages/coding-agent/src/tools/puppeteer/09_stealth_locale.txt` — force locale/languages/timezone/date strings.\n - `packages/coding-agent/src/tools/puppeteer/10_stealth_plugins.txt` — synthesize `navigator.plugins`/`navigator.mimeTypes`.\n - `packages/coding-agent/src/tools/puppeteer/11_stealth_hardware.txt` — spoof `navigator.hardwareConcurrency`.\n - `packages/coding-agent/src/tools/puppeteer/12_stealth_codecs.txt` — spoof media codec support.\n - `packages/coding-agent/src/tools/puppeteer/13_stealth_worker.txt` — carry UA/platform spoofing into `Worker`/`SharedWorker`.\n\n## Inputs\n\n### Shared fields\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"open\" \\| \"close\" \\| \"run\"` | Yes | Dispatches to the open/close/run path. |\n| `name` | `string` | No | Tab id. Defaults to `\"main\"`. Tabs live in a process-global map, so the same name is reused across later calls and in-process subagents until closed. |\n| `timeout` | `number` | No | Tool wall-clock timeout in seconds. Defaults to `30`; clamped to the browser tool range before execution. |\n\n### `action: \"open\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `url` | `string` | No | Navigate after the tab is ready. Existing reusable tabs also navigate when `url` is supplied. |\n| `viewport` | `{ width: number; height: number; scale?: number }` | No | Requested viewport. For headless launch this becomes the initial viewport; for a page it is applied with `page.setViewport()`. `scale` maps to Puppeteer `deviceScaleFactor`. |\n| `wait_until` | `\"load\" \\| \"domcontentloaded\" \\| \"networkidle0\" \\| \"networkidle2\"` | No | Navigation wait condition. Defaults to `\"load\"` where omitted, including `open` navigation and later `tab.goto(...)`. |\n| `dialogs` | `\"accept\" \\| \"dismiss\"` | No | Installs a page `dialog` handler that auto-accepts or auto-dismisses dialogs. Omitted means no handler. |\n| `app` | `{ path?: string; cdp_url?: string; args?: string[]; target?: string }` | No | Selects browser kind. No `app` uses the session `browser.headless` setting. `app.path` is resolved against the session cwd and used as the executable path for spawn/attach reuse. `app.cdp_url` connects to an existing CDP endpoint. `args` are appended only when spawning `app.path`. `target` is only used for attached/spawned-app page selection. |\n\n### `action: \"close\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `all` | `boolean` | No | Close every known tab. Omitted closes only `name`. |\n| `kill` | `boolean` | No | When a tab release drops a spawned-app browser handle to refcount 0, also terminate its process tree. Has no effect on headless shutdown and only disconnects connected CDP browsers. |\n\n### `action: \"run\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `code` | `string` | Yes | Async-function body executed in a VM context with `page`, `browser`, `tab`, `display`, `assert`, `wait`, `console`, timers, `URL`, `TextEncoder`, `TextDecoder`, and `Buffer` in scope. |\n\n## Outputs\nThe tool returns one result per call; no streaming partial output is emitted from the browser implementation itself.\n\n- `open`: text content with `Opened` or `Reused`, browser description, URL, and optional title. `details` includes `action`, `name`, `browser`, `url`, `viewport`, and the same text in `details.result`.\n- `close`: text content with either `Closed ...` or `No tab named ...`. `details` includes `action`, `name`, and `details.result`.\n- `run`: ordered `content` array built as:\n 1. every `display(value)` call in execution order,\n 2. final return value, JSON-stringified unless already a string,\n 3. or `Ran code on tab \"...\"` if nothing else was produced.\n- `display(value)` coercion in `packages/coding-agent/src/tools/browser/tab-worker.ts`:\n - `{ type: \"image\", data: string, mimeType: string }` becomes image content,\n - `string` becomes text content,\n - other values become pretty JSON text when serializable, else `String(value)`.\n- `tab.screenshot()` also appends text plus an image content item unless `silent: true`; `details.screenshots` records persisted screenshot metadata `{ dest, mimeType, bytes, width, height }`.\n- `run` `details` includes `action`, `name`, current `browser`/`url` when the tab exists, optional `screenshots`, and `details.result` containing only the concatenated text outputs.\n\n## Flow\n1. `BrowserTool.execute()` (`packages/coding-agent/src/tools/browser.ts`) abort-checks, clamps `timeout` via `clampTimeout(\"browser\", ...)`, defaults `name` to `\"main\"`, and dispatches on `action`.\n2. `open` resolves browser kind with `resolveBrowserKind()`:\n - `app.cdp_url` → `{ kind: \"connected\" }` after trimming trailing slashes.\n - `app.path` → `{ kind: \"spawned\" }` after resolving against session cwd.\n - otherwise → `{ kind: \"headless\", headless: session.settings.get(\"browser.headless\") }`.\n3. `open` rejects reusing the same tab name across different browser kinds (`sameBrowserKind()`); callers must close first.\n4. `open` acquires a browser handle through `acquireBrowser()` (`packages/coding-agent/src/tools/browser/registry.ts`):\n - existing connected handle is reused by browser-kind key;\n - stale disconnected handles are disposed and recreated;\n - headless launches via `launchHeadlessBrowser()`;\n - `connected` waits for `${cdpUrl}/json/version`, then `puppeteer.connect()`;\n - `spawned` first tries `findReusableCdp()`, else kills same-path processes, allocates a free loopback port, spawns the executable with `--remote-debugging-port=<port>`, waits for CDP, then connects.\n5. `open` acquires a tab through `acquireTab()` (`packages/coding-agent/src/tools/browser/tab-supervisor.ts`):\n - same-name + same-browser + alive tab is reused unless `dialogs` changed;\n - same-name but different browser handle, dead state, or changed dialog policy forces release and recreation;\n - reusing with a new `url` navigates by issuing `await tab.goto(...)` through the worker, defaulting to `waitUntil: \"load\"` when `wait_until` is omitted.\n6. New tabs build a `WorkerInitPayload` in `buildInitPayload()`:\n - headless mode sends `url`, `waitUntil`, `viewport`, `dialogs`, and timeout; the worker defaults missing `waitUntil` to `\"load\"`.\n - attach mode resolves a page with `pickElectronTarget()`, gets its target id, and sends `targetId` plus `dialogs`.\n7. `acquireTab()` spawns a dedicated Bun `Worker` from `tab-worker-entry.ts`; if that fails it falls back to inline execution in the main thread (`spawnInlineWorker()`), preserving behavior but losing protection against synchronous infinite loops.\n8. `WorkerCore.#init()` (`packages/coding-agent/src/tools/browser/tab-worker.ts`) connects back to the browser websocket endpoint. Headless mode opens a new page, applies stealth patches, applies viewport, installs dialog handling if requested, and optionally navigates. Attach mode resolves the requested target page and optionally installs dialog handling.\n9. On success the worker sends `ready` with `{ url, title, viewport, targetId }`; the supervisor stores a `TabSession`, increments browser-handle refcount with `holdBrowser()`, and keeps the tab in a process-global `Map<string, TabSession>`.\n10. `run` requires non-empty `code`, looks up the tab with `getTab()`, then delegates to `runInTab()`.\n11. `runInTabWithSnapshot()` rejects dead tabs and concurrent runs (`Tab ... is busy`), captures session cwd plus optional `browser.screenshotDir`, registers an abort hook, sends a `run` message to the worker, and races the result against `timeoutMs + 750` ms. Timeouts force-kill the tab worker and, for headless tabs, close the orphaned page target.\n12. `WorkerCore.#run()` creates a VM context, exposes the raw Puppeteer `page`/`browser` plus a synthetic `tab` API, and executes `(async () => { ...code... })()` via `vm.runInContext()`.\n13. The `tab` helper API implemented in `#createTabApi()` is:\n - `tab.name: string`\n - `tab.page: Page`\n - `tab.signal?: AbortSignal`\n - `tab.url(): string`\n - `tab.title(): Promise<string>`\n - `tab.goto(url, { waitUntil? })`\n - `tab.observe({ includeAll?, viewportOnly? })`\n - `tab.screenshot({ selector?, fullPage?, save?, silent? })`\n - `tab.extract(format = \"markdown\")`\n - `tab.click(selector)`\n - `tab.type(selector, text)`\n - `tab.fill(selector, value)`\n - `tab.press(key, { selector? })`\n - `tab.scroll(deltaX, deltaY)`\n - `tab.drag(from, to)`\n - `tab.waitFor(selector)`\n - `tab.evaluate(fn, ...args)`\n - `tab.scrollIntoView(selector)`\n - `tab.select(selector, ...values)`\n - `tab.uploadFile(selector, ...filePaths)`\n - `tab.waitForUrl(pattern, { timeout? })`\n - `tab.waitForResponse(pattern, { timeout? })`\n - `tab.id(n)`\n14. Selector handling in `normalizeSelector()` accepts plain CSS and Puppeteer query handlers, and rewrites legacy Playwright-style prefixes `p-text/`, `p-xpath/`, `p-pierce/`, `p-aria/`; other `p-*` prefixes throw a `ToolError`.\n15. `tab.observe()` clears the element cache, takes a Puppeteer accessibility snapshot, filters to interactive nodes unless `includeAll`, optionally filters to viewport-visible nodes, assigns numeric ids, caches `ElementHandle`s, and returns URL/title/viewport/scroll metadata plus `elements`.\n16. `tab.id(n)` resolves the cached `ElementHandle`, verifies `el.isConnected`, and throws a stale-id error after cache invalidation if the DOM changed or the cache was cleared.\n17. `tab.goto()` clears the cached element ids before navigating. Any new `tab.observe()` also clears and rebuilds the cache.\n18. `tab.click()` uses a custom retry loop for `text/...` selectors to find an actionable visible match; other selectors use `page.locator(...).click()` with the run timeout.\n19. `tab.screenshot()` captures either the whole page or a selector PNG, downsizes a copy for model output, chooses a persistence path, writes the image to disk, records metadata, and optionally emits text + image display entries.\n20. `display()` calls accumulate in an array. After code finishes, the worker posts `{ displays, returnValue, screenshots }`; `BrowserTool.#run()` appends the return value as trailing text content when not `undefined`.\n21. `close` releases one tab or all tabs via `releaseTab()` / `releaseAllTabs()`. Each tab aborts pending runs, asks the worker to close, waits up to `750` ms for a `closed` ack, terminates the worker, decrements browser refcount, and disposes the browser handle when refcount reaches zero.\n\n## Modes / Variants\n- **Action dispatch**\n - `open` — acquire/reuse browser + tab.\n - `close` — release one tab or all tabs.\n - `run` — execute JS inside the tab worker.\n- **Browser kind**\n - **Headless**: launches local Chromium with Puppeteer, applies stealth patches, and creates a fresh page per tab.\n - **Spawned app (`app.path`)**: reuses an existing CDP-enabled process for that executable when possible; otherwise kills same-path processes, spawns the executable with remote debugging enabled, then attaches. No stealth patches are injected.\n - **Connected browser (`app.cdp_url`)**: attaches to an already-running CDP endpoint. No process ownership; close only disconnects.\n- **Target selection for attached/spawned browsers**\n - With `app.target`, `pickElectronTarget()` returns the first page whose URL or title contains the case-insensitive substring.\n - Without `app.target`, it skips titles/URLs matching `request handler|devtools|background page|background host|service worker` and otherwise falls back to the first page.\n- **Worker mode**\n - **Dedicated worker**: normal path; user code runs off the main thread and can be aborted even when it blocks synchronously.\n - **Inline fallback**: activated when Bun worker spawn fails; behavior matches, but synchronous infinite loops on user code cannot be interrupted.\n- **Dialog policy**\n - No `dialogs` field: no auto-handler.\n - `accept`/`dismiss`: page `dialog` events are handled automatically.\n - Changing dialog policy on an existing live tab forces tab recreation instead of mutating the worker in place.\n- **Screenshot persistence**\n - `save` provided: persist full-resolution PNG at the resolved cwd-relative or absolute path.\n - `browser.screenshotDir` session setting set: persist full-resolution PNG under that directory with a timestamped filename.\n - Neither set: persist the resized image to a temp-file path under the OS temp dir.\n\n## Side Effects\n- Filesystem\n - `loadPuppeteer()` writes `{}` to `<puppeteer-safe-dir>/package.json` before importing `puppeteer-core`.\n - First headless launch may download Chromium into the Puppeteer cache directory returned by `getPuppeteerDir()`.\n - `tab.screenshot()` creates parent directories and writes image files.\n - `tab.uploadFile()` resolves supplied paths against the session cwd.\n- Network\n - CDP attach paths poll `http://127.0.0.1:<port>/json/version` or the supplied `cdp_url` `/json/version`.\n - Headless/browser-attach sessions create CDP websocket connections.\n - Headless first-use Chromium download uses `@puppeteer/browsers`.\n - User `page` / `tab` operations perform normal browser network traffic.\n- Subprocesses / native bindings\n - Headless mode launches Chromium through Puppeteer.\n - `app.path` mode may spawn the target executable via `Bun.spawn()`.\n - `killExistingByPath()` / `gracefulKillTreeOnce()` use `@oh-my-pi/pi-natives` process inspection/termination.\n - Worker mode uses Bun `Worker`; fallback mode does not.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Browser handles are cached in a process-global `Map` keyed by browser kind in `packages/coding-agent/src/tools/browser/registry.ts`.\n - Tabs are cached in a process-global `Map` keyed by `name` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`.\n - `run` captures session cwd and optional `browser.screenshotDir` for screenshot/save path resolution.\n - `restartForModeChange()` drops only headless tabs.\n- User-visible prompts / interactive UI\n - None beyond normal tool output. Dialog auto-handling is invisible unless it fails and emits debug logs.\n- Background work / cancellation\n - `open`, `run`, CDP waits, and browser actions thread through abort signals.\n - A timed-out `run` aborts the worker execution path and can tear down the tab.\n\n## Limits & Caps\n- Tool timeout clamp: default `30` s, min `1` s, max `300` s (`TOOL_TIMEOUTS.browser` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Supervisor grace period around init/run/close: `750` ms (`GRACE_MS` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`).\n- Puppeteer protocol timeout for launch/connect operations: `60_000` ms (`BROWSER_PROTOCOL_TIMEOUT_MS` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Connected-browser CDP readiness wait: `5_000` ms before `puppeteer.connect()` (`packages/coding-agent/src/tools/browser/registry.ts`).\n- Spawned-app CDP readiness wait after spawn: `30_000` ms (`packages/coding-agent/src/tools/browser/registry.ts`).\n- CDP polling cadence: 150 ms in `waitForCdp()` (`packages/coding-agent/src/tools/browser/attach.ts`).\n- Headless default viewport: `1365x768` at `deviceScaleFactor: 1.25` (`DEFAULT_VIEWPORT` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Screenshot model-attachment resize cap: `maxWidth 1024`, `maxHeight 1024`, `maxBytes 150 * 1024`, `jpegQuality 70` (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- `tab.waitForUrl()` polling interval: `200` ms (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- Drag simulation uses `12` mouse-move steps (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n\n## Errors\n- `BrowserTool.execute()` converts DOM-style `AbortError` into `ToolAbortError`; other errors propagate.\n- `run` hard-fails on missing code: `Missing required parameter 'code' for action 'run'.`\n- `open` fails when reusing a name across browser kinds: `Tab \"...\" is bound to a different browser (...). Close it first.`\n- `runInTabWithSnapshot()` fails when the tab is absent/dead (`Tab \"...\" is not alive. Reopen it.`) or already running (`Tab \"...\" is busy`).\n- Worker init failures and run failures are serialized through `RunErrorPayload`; `ToolError` and abort state are reconstructed on the host side by `errorFromPayload()`.\n- Attached-target mismatches surface as:\n - `No page targets available on the attached browser`\n - `No page target matched \"...\". Available pages:\\n...`\n - `Target ... is no longer available on the attached browser`\n- Spawned-app path validation requires an absolute executable path after cwd resolution, not an app bundle directory path.\n- Spawn/attach failures are wrapped into `ToolError`s such as `Timed out waiting for CDP endpoint ...`, `Failed to attach to ...`, or `Connected to ... but puppeteer.connect failed: ...`.\n- `tab` helper errors are user-visible `ToolError`s, including unsupported selector prefix, stale/unknown element id, invalid drag target, missing upload files, non-`<select>` for `tab.select()`, non-file-input for `tab.uploadFile()`, and screenshot selector misses.\n- On run timeout, the worker reports `Browser code execution timed out after <ms>ms`; the supervisor may escalate to `Browser code execution hung past grace; tab killed` if the worker does not respond after the grace window.\n\n## Notes\n- `loadPuppeteer()` and `loadPuppeteerInWorker()` temporarily redirect `cwd` to a safe Puppeteer directory before importing `puppeteer-core`, because Puppeteer probes the current working directory during module load.\n- Headless launch prefers a detected system Chrome/Chromium, then `PUPPETEER_EXECUTABLE_PATH`, and only then downloads Chromium.\n- Headless launch always passes `--no-sandbox`, `--disable-setuid-sandbox`, `--disable-blink-features=AutomationControlled`, and a `--window-size=...` matching the initial viewport. It also ignores Puppeteer default args `--disable-extensions`, `--disable-default-apps`, and `--disable-component-extensions-with-background-pages`.\n- Proxy-related env vars only affect headless launch: `PUPPETEER_PROXY`, `PUPPETEER_PROXY_BYPASS_LOOPBACK`, and `PUPPETEER_PROXY_IGNORE_CERT_ERRORS`.\n- Stealth patches are applied only in headless mode. Spawned or externally connected browsers are intentionally left untouched.\n- `applyStealthPatches()` also strips Puppeteer's `//# sourceURL=__puppeteer_evaluation_script__` suffix from CDP `Runtime.evaluate` / `Runtime.callFunctionOn` payloads.\n- `tab.extract()` reads `page.content()`, runs Readability first, then falls back to `main article`/`article`/`main`/`[role='main']`/`body`, and returns `null` if neither extraction path yields content.\n- `close(all: true, kill: false)` disconnects from spawned/connected browsers when the last tab closes but leaves spawned app processes running.\n- Headless orphan cleanup is best-effort: if a worker dies before closing its page, the supervisor searches browser targets by `targetId` and closes that page.\n- Console methods inside `run` do not appear in tool output; they are forwarded as debug/warn/error logs through the worker transport.",
|
|
77
|
+
"tools/checkpoint.md": "# checkpoint\n\n> Mark the current top-level conversation state so later `rewind` can collapse exploratory context into a report.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/checkpoint.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/checkpoint.md`\n- Key collaborators:\n - `packages/coding-agent/src/session/agent-session.ts` — captures the active checkpoint after tool success.\n - `packages/coding-agent/src/session/session-manager.ts` — persists the normal session entry stream; not the active checkpoint marker.\n - `packages/coding-agent/src/tools/index.ts` — registers the tool and gates it behind `checkpoint.enabled`.\n - `packages/coding-agent/src/config/settings-schema.ts` — defines the disabled-by-default feature flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `goal` | `string` | Yes | Investigation goal. Required by the schema and echoed in the tool result. |\n\n## Outputs\nThe tool returns a single text result plus structured details:\n\n- text body:\n - `Checkpoint created.`\n - `Goal: <goal>`\n - `Run your investigation, then call rewind with a concise report.`\n- `details`:\n - `goal: string`\n - `startedAt: string` — ISO timestamp created inside `CheckpointTool.execute()`\n\nNo checkpoint ID, artifact URI, job handle, file path, or restore token is returned.\n\n## Flow\n1. `CheckpointTool.createIf()` in `packages/coding-agent/src/tools/checkpoint.ts` returns `null` for subagents by checking `session.taskDepth`; only top-level sessions can see the tool.\n2. `CheckpointTool.execute()` rejects subagent calls again with `ToolError(\"Checkpoint not available in subagents.\")`.\n3. It rejects nested checkpoints with `ToolError(\"Checkpoint already active.\")` when `session.getCheckpointState?.()` is already set.\n4. It creates `startedAt = new Date().toISOString()` and returns a normal `toolResult()` payload. The tool itself does not persist anything.\n5. On the later `tool_execution_end` event, `AgentSession` in `packages/coding-agent/src/session/agent-session.ts` detects successful `checkpoint` execution and captures three in-memory fields:\n - `checkpointMessageCount` — current `agent.state.messages.length`, after the checkpoint tool result has already been appended\n - `checkpointEntryId` — `sessionManager.getEntries().at(-1)?.id ?? null`, i.e. the last persisted session entry ID at checkpoint time\n - `startedAt` — copied from tool details or regenerated\n6. `AgentSession` stores that object in its private `#checkpointState` field and clears `#pendingRewindReport`.\n\n## Side Effects\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Sets `AgentSession.#checkpointState` in memory.\n - Records the checkpoint boundary as a message count plus a session entry ID.\n - Enables the later yield guard: if a checkpoint is active and no rewind report is pending, `#enforceRewindBeforeYield()` injects a developer-role warning and schedules another turn.\n- User-visible prompts / interactive UI\n - The tool result tells the model to call `rewind` after the investigation.\n - If the agent tries to `yield` first, `AgentSession` injects:\n\n```text\n<system-warning>\nYou are in an active checkpoint. You MUST call rewind with your investigation findings before yielding. Do NOT yield without completing the checkpoint.\n</system-warning>\n```\n\n## Limits & Caps\n- Availability is gated by `checkpoint.enabled`, default `false`, in `packages/coding-agent/src/config/settings-schema.ts`.\n- The tool is registered as discoverable in `packages/coding-agent/src/tools/index.ts`.\n- Only one active checkpoint is allowed per top-level session.\n- Checkpoint state is not persisted as a dedicated session entry. If the process exits, a resumed session can reload the conversation history, but not the live `#checkpointState` guard.\n- Session persistence still applies to the ordinary checkpoint tool call message. Global session persistence truncation is `MAX_PERSIST_CHARS = 500_000` in `packages/coding-agent/src/session/session-manager.ts`.\n\n## Errors\n- `ToolError(\"Checkpoint not available in subagents.\")` — thrown for subagent sessions.\n- `ToolError(\"Checkpoint already active.\")` — thrown when a prior checkpoint has not been rewound or cleared.\n- The tool body has no local `try/catch`; unexpected exceptions propagate.\n\n## Notes\n- Despite the summary string `Create a git-based checkpoint to save and restore session state`, the implementation does not call git and does not snapshot filesystem state.\n- Captured state is conversation/session metadata only:\n - in-memory message count\n - session entry ID in the session tree\n - timestamp\n- Not captured:\n - working tree contents\n - staged changes\n - artifacts\n - blob-store contents\n - SQLite history rows from `packages/coding-agent/src/session/history-storage.ts`\n - auth or agent records from `packages/coding-agent/src/session/agent-storage.ts`\n",
|
|
78
|
+
"tools/debug.md": "# debug\n\n> Drive one DAP debug session; adjacent debug UI code reuses the same subsystem for logs, raw SSE capture, reports, profiling, and system diagnostics.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/debug.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/debug.md`\n- Key collaborators:\n - `packages/coding-agent/src/dap/session.ts` — session lifecycle, breakpoint/state cache\n - `packages/coding-agent/src/dap/client.ts` — adapter process/socket transport, DAP message loop\n - `packages/coding-agent/src/dap/config.ts` — adapter resolution and auto-selection\n - `packages/coding-agent/src/dap/defaults.json` — built-in adapter definitions\n - `packages/coding-agent/src/dap/types.ts` — request/response/capability shapes\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — per-tool timeout clamp\n - `packages/coding-agent/src/debug/index.ts` — interactive debug selector menu\n - `packages/coding-agent/src/debug/log-viewer.ts` — recent-log TUI viewer\n - `packages/coding-agent/src/debug/raw-sse.ts` — raw SSE TUI viewer\n - `packages/coding-agent/src/debug/raw-sse-buffer.ts` — bounded SSE capture buffer\n - `packages/coding-agent/src/debug/profiler.ts` — CPU/heap profiling helpers\n - `packages/coding-agent/src/debug/report-bundle.ts` — `.tar.gz` report bundling, log source, cache cleanup\n - `packages/coding-agent/src/debug/system-info.ts` — system snapshot collection and env redaction\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"launch\" \\| \"attach\" \\| \"set_breakpoint\" \\| \"remove_breakpoint\" \\| \"set_instruction_breakpoint\" \\| \"remove_instruction_breakpoint\" \\| \"data_breakpoint_info\" \\| \"set_data_breakpoint\" \\| \"remove_data_breakpoint\" \\| \"continue\" \\| \"step_over\" \\| \"step_in\" \\| \"step_out\" \\| \"pause\" \\| \"evaluate\" \\| \"stack_trace\" \\| \"threads\" \\| \"scopes\" \\| \"variables\" \\| \"disassemble\" \\| \"read_memory\" \\| \"write_memory\" \\| \"modules\" \\| \"loaded_sources\" \\| \"custom_request\" \\| \"output\" \\| \"terminate\" \\| \"sessions\"` | Yes | Dispatch key for the tool switch in `packages/coding-agent/src/tools/debug.ts`. |\n| `program` | `string` | No | Launch target path. Required for `launch`. Resolved relative to `cwd` if provided, otherwise session cwd. |\n| `args` | `string[]` | No | Program argv for `launch`. |\n| `adapter` | `string` | No | Explicit adapter name. Otherwise `selectLaunchAdapter()` / `selectAttachAdapter()` auto-pick from `packages/coding-agent/src/dap/config.ts`. |\n| `cwd` | `string` | No | Launch/attach working directory. Defaults to session cwd. |\n| `file` | `string` | No | Source file path for source breakpoints. |\n| `line` | `number` | No | Source line for source breakpoints. |\n| `function` | `string` | No | Function breakpoint name. Mutually exclusive with `file`+`line` in breakpoint actions. |\n| `name` | `string` | No | Data breakpoint info target name. Required for `data_breakpoint_info`. |\n| `condition` | `string` | No | Conditional expression for source/function/instruction/data breakpoints. |\n| `hit_condition` | `string` | No | Hit-count condition for instruction/data breakpoints. |\n| `expression` | `string` | No | Expression or raw debugger command. Required for `evaluate`. |\n| `context` | `string` | No | Evaluate context. Defaults to `\"repl\"`. Passed through as DAP evaluate context. |\n| `frame_id` | `number` | No | Frame selector for `evaluate`, `scopes`, `data_breakpoint_info`. `scopes` and `evaluate` default to the current stopped frame when omitted. |\n| `scope_id` | `number` | No | Variables reference from a scope. Accepted by `variables`; also used as a fallback variables reference for `data_breakpoint_info`. |\n| `variable_ref` | `number` | No | Variables reference for `variables`; preferred over `scope_id` when both are present. |\n| `pid` | `number` | No | Local process id for `attach`. `attach` requires `pid` or `port`. |\n| `port` | `number` | No | Remote attach port. If no adapter is forced, attach prefers `debugpy` when `port` is present. |\n| `host` | `string` | No | Remote attach host for `attach`. |\n| `levels` | `number` | No | Max stack frames for `stack_trace`. |\n| `memory_reference` | `string` | No | Memory reference/address for `disassemble`, `read_memory`, `write_memory`. `disassemble` uses this when provided; otherwise it falls back to the current stopped location's instruction-pointer reference if the adapter supplied one. |\n| `instruction_reference` | `string` | No | Instruction breakpoint reference; required for instruction breakpoint actions. Not used by `disassemble`. |\n| `instruction_count` | `number` | No | Required for `disassemble`. |\n| `instruction_offset` | `number` | No | Instruction offset for `disassemble`. |\n| `count` | `number` | No | Byte count for `read_memory`. Required there. |\n| `data` | `string` | No | Base64 payload for `write_memory`. Required there. |\n| `data_id` | `string` | No | Data breakpoint id. Required for `set_data_breakpoint` / `remove_data_breakpoint`. |\n| `access_type` | `\"read\" \\| \"write\" \\| \"readWrite\"` | No | Access filter for `set_data_breakpoint`. |\n| `command` | `string` | No | Custom DAP request command. Required for `custom_request`. |\n| `arguments` | `Record<string, unknown>` | No | Custom DAP request body for `custom_request`. |\n| `offset` | `number` | No | Offset for instruction breakpoints, disassembly, memory read, memory write. |\n| `resolve_symbols` | `boolean` | No | `disassemble` symbol-resolution flag. |\n| `allow_partial` | `boolean` | No | `write_memory` partial-write allowance. |\n| `start_module` | `number` | No | Modules pagination start index for `modules`. |\n| `module_count` | `number` | No | Modules pagination count for `modules`. |\n| `timeout` | `number` | No | Per-request timeout in seconds. Default `30`, clamped to `5..300`. |\n\n### Action-specific requirements\n- `launch`: `program`\n- `attach`: `pid` or `port`\n- `set_breakpoint` / `remove_breakpoint`: `function`, or `file` + `line`\n- `set_instruction_breakpoint` / `remove_instruction_breakpoint`: `instruction_reference`\n- `data_breakpoint_info`: `name`\n- `set_data_breakpoint` / `remove_data_breakpoint`: `data_id`\n- `evaluate`: `expression`\n- `variables`: `variable_ref` or `scope_id`\n- `disassemble`: capability `supportsDisassembleRequest`, plus `instruction_count`, and either `memory_reference` or a current stopped location with `instructionPointerReference`\n- `read_memory`: capability `supportsReadMemoryRequest`, plus `memory_reference` and `count`\n- `write_memory`: capability `supportsWriteMemoryRequest`, plus `memory_reference` and `data`\n- `modules`: capability `supportsModulesRequest`\n- `loaded_sources`: capability `supportsLoadedSourcesRequest`\n- `custom_request`: `command`\n\n### Interactive selector values\n`packages/coding-agent/src/debug/index.ts` also exposes a fixed UI-only selector with values `open-artifacts`, `performance`, `work`, `dump`, `memory`, `logs`, `system`, `raw-sse`, `transcript`, `clear-cache`. These are not model-callable through `debugSchema`; they are local TUI menu routes.\n\n## Outputs\nThe agent tool returns a standard `toolResult()` payload from `packages/coding-agent/src/tools/debug.ts`:\n- `content`: one text block. Every action renders human-readable text; there is no structured JSON block in `content`.\n- `details.action`: echoed action.\n- `details.success`: always initialized `true`; failures surface by throwing before a result is returned.\n- `details.snapshot`: present for actions that operate on or create a session, using `DapSessionSummary` from `packages/coding-agent/src/dap/types.ts`.\n- Action-specific `details` fields:\n - `launch` / `attach`: `adapter`\n - breakpoint actions: `breakpoints`, `functionBreakpoints`, `instructionBreakpoints`, `dataBreakpoints`\n - `data_breakpoint_info`: `dataBreakpointInfo`\n - `continue` / `step_*`: `state`, `timedOut`\n - `threads`: `threads`\n - `stack_trace`: `stackFrames`\n - `scopes`: `scopes`\n - `variables`: `variables`\n - `evaluate`: `evaluation`\n - `disassemble`: `disassembly`\n - `read_memory`: `memoryAddress`, `memoryData`, `unreadableBytes`\n - `write_memory`: `bytesWritten`\n - `modules`: `modules`\n - `loaded_sources`: `sources`\n - `custom_request`: `customBody`\n - `output`: `output`\n - `sessions`: `sessions`\n\nStreaming/UI behavior:\n- The tool renderer merges call and result (`mergeCallAndResult: true`) and renders inline.\n- `debug.ts` itself does not emit progress updates through `_onUpdate`; result delivery is single-shot.\n- Approval is action-sensitive: read-only actions (`output`, `threads`, `stack_trace`, `scopes`, `variables`, `disassemble`, `read_memory`, `loaded_sources`, `modules`, `sessions`) request read approval; all other actions request exec approval.\n- The interactive selector is UI-driven instead of model-driven. It swaps TUI components, appends status lines to the chat pane, opens files in external viewers, or writes archives/temp files.\n\nSide-channel artifacts outside the model tool result:\n- `createReportBundle()` writes `omp-report-<timestamp>.tar.gz` under the reports dir and returns the filesystem path to the UI handler.\n- `#handleWorkReport()` writes `/tmp/work-profile-<Date.now()>.svg` before opening it.\n- `RawSseViewerComponent` and `DebugLogViewerComponent` can copy captured text to the clipboard.\n\n## Flow\n1. Tool registration is conditional: `DebugTool.createIf()` in `packages/coding-agent/src/tools/debug.ts` returns `null` unless `session.settings.get(\"debug.enabled\")` is true. `packages/coding-agent/src/tools/index.ts` wires the factory and rechecks the same setting in tool filtering.\n2. `DebugTool.execute()` clamps `params.timeout` through `clampTimeout(\"debug\", params.timeout)` and composes the caller `AbortSignal` with `AbortSignal.timeout(...)`.\n3. `launch` and `attach` resolve cwd/program paths, select an adapter in `packages/coding-agent/src/dap/config.ts`, then delegate to `dapSessionManager.launch()` / `.attach()`.\n4. `DapSessionManager.launch()` / `.attach()` enforce the single-session rule with `#ensureLaunchSlot()`, spawn the adapter through `DapClient.spawn()`, register listeners, send `initialize`, cache capabilities, start listening for an initial stop event before sending `launch`/`attach`, then complete the `initialized` → `configurationDone` handshake in `#completeConfigurationHandshake()`.\n5. `DapClient.spawn()` starts the adapter detached with `NON_INTERACTIVE_ENV`. Most adapters use stdio; socket-mode adapters (`dlv`) use `#spawnSocketUnix()` on Linux or `#spawnSocketClientAddr()` on macOS/other.\n6. `#registerSession()` in `packages/coding-agent/src/dap/session.ts` installs reverse-request handlers:\n - `runInTerminal`: spawns the requested debuggee command detached via `ptree.spawn()` and returns `{ processId }`\n - `startDebugging`: logs the child-session request and returns `{}`; it does not create nested sessions\n - events: `output`, `initialized`, `stopped`, `continued`, `exited`, `terminated` update cached session state\n7. Operational actions (`set_breakpoint`, `evaluate`, `threads`, `read_memory`, `custom_request`, and similar) call `dapSessionManager` methods. Most flow through `#sendRequestWithConfig()`, which first sends `configurationDone` when required, then sends the DAP request, then updates `lastUsedAt`.\n8. Breakpoint actions maintain local cached breakpoint sets in `DapSessionManager` and remap adapter responses back onto those cached records.\n9. `continue` and the three step actions clear cached stop state, subscribe for `stopped`/`terminated`/`exited` before sending the DAP request, then `#awaitStopOutcome()` either returns the new stopped location or reports that the program is still running after timeout.\n10. `pause` sends DAP `pause`, waits for a stopped event if needed, and reuses cached stop state if the program was already stopped.\n11. `stack_trace`, `scopes`, `variables`, and `evaluate` default to the current stopped thread/frame when the caller omits ids and cached state is available.\n12. `output` reads the in-memory output ring from `DapSessionManager.getOutput()`. `terminate` sends `terminate` when supported, always attempts `disconnect`, marks the session terminated, and disposes the client.\n13. `sessions` reads the manager’s current map and formats all summaries. Although the manager stores a map, only one active session can exist because new launch/attach calls are blocked until the active one is terminated or cleaned up.\n14. The interactive selector in `packages/coding-agent/src/debug/index.ts` builds a `SelectList` of fixed values and dispatches each to a handler:\n - `performance`: `startCpuProfile()`, wait for Enter/Escape, stop profiling, read a 30-second work profile with `getWorkProfile(30)`, then bundle via `createReportBundle()`\n - `work`: read `getWorkProfile(30)`, write a temp SVG, open it externally\n - `dump`: create a report bundle immediately\n - `memory`: force GC, call `Bun.generateHeapSnapshot(\"v8\")`, then bundle\n - `logs`: build a `DebugLogSource` and mount `DebugLogViewerComponent`\n - `raw-sse`: resolve a `RawSseDebugBuffer` from the session and mount `RawSseViewerComponent`\n - `system`: call `collectSystemInfo()` and render `formatSystemInfo()` into the chat pane\n - `open-artifacts`: open the current session artifact directory if it exists\n - `transcript`: delegates to `ctx.handleDebugTranscriptCommand()`\n - `clear-cache`: show confirmation, then remove artifact directories older than 30 days with `clearArtifactCache()`\n\n## Modes / Variants\n- **Availability gate**\n - Tool hidden when `debug.enabled` is false.\n- **Adapter selection**\n - `launch`: explicit `adapter` wins; otherwise `selectLaunchAdapter()` ranks available adapters by extension match, root-marker match, then native-debugger preference (`gdb`, `lldb-dap`) for extensionless binaries.\n - `attach`: explicit `adapter` wins; otherwise remote `port` prefers `debugpy`, then native debuggers, then first available adapter.\n- **Transport**\n - stdio adapters: direct `stdin`/`stdout` framing.\n - socket adapters: Unix domain socket on Linux; TCP callback on macOS/other.\n- **DAP agent-tool actions**\n - `launch` — spawn adapter, initialize session, maybe stop on entry; returns formatted session snapshot and `details.adapter`.\n - `attach` — connect to a live process or remote port; same output shape as `launch`.\n - `set_breakpoint` — source or function breakpoint add/update; returns the current breakpoint list for that target.\n - `remove_breakpoint` — source or function breakpoint removal; returns the remaining breakpoint list.\n - `set_instruction_breakpoint` / `remove_instruction_breakpoint` — require `supportsInstructionBreakpoints`; return current instruction breakpoint list.\n - `data_breakpoint_info` — require `supportsDataBreakpoints`; asks the adapter for a `dataId`, access types, and description for `name`.\n - `set_data_breakpoint` / `remove_data_breakpoint` — require `supportsDataBreakpoints`; return the cached data-breakpoint list.\n - `continue` / `step_over` / `step_in` / `step_out` — return text describing whether execution stopped, terminated, or kept running, plus `details.state` and `details.timedOut`.\n - `pause` — interrupts a running target and returns a stopped snapshot.\n - `evaluate` — adapter expression evaluation; defaults context to `repl`.\n - `stack_trace` — fetches frames for the resolved thread.\n - `threads` — fetches current threads.\n - `scopes` — frame scopes for an explicit `frame_id` or the current stopped frame.\n - `variables` — variables for `variable_ref` or `scope_id`.\n - `disassemble` — require `supportsDisassembleRequest`; disassembles around `memory_reference`, or around the current stopped instruction pointer when no memory reference is supplied.\n - `read_memory` — require `supportsReadMemoryRequest`; returns address, base64 data, unreadable-byte count.\n - `write_memory` — require `supportsWriteMemoryRequest`; writes base64 data and reports bytes written.\n - `modules` — require `supportsModulesRequest`; optional pagination via `start_module` / `module_count`.\n - `loaded_sources` — require `supportsLoadedSourcesRequest`; returns loaded source descriptors.\n - `custom_request` — sends any DAP request name with arbitrary arguments.\n - `output` — dumps captured stdout/stderr/console text from the session cache.\n - `terminate` — disconnects and disposes the active session; returns `No debug session to terminate.` when none exists.\n - `sessions` — lists all cached session summaries.\n- **Interactive selector routes (UI-only)**\n - `logs` — loads today’s log tail and optional older daily log files into `DebugLogViewerComponent`; supports copy, range selection, pid filtering, load-older.\n - `raw-sse` — live view over the session’s `RawSseDebugBuffer`; supports tail-follow, scrolling, copy-all.\n - `performance` — CPU profile + 30-second work profile + report bundle.\n - `memory` — heap snapshot + report bundle.\n - `dump` — report bundle without profiler artifacts.\n - `work` — standalone work-profile flamegraph export/open.\n - `system` — formatted OS/arch/CPU/memory/version/cwd/shell/terminal dump.\n - `open-artifacts` / `transcript` / `clear-cache` — artifact directory open, transcript export, artifact-cache pruning.\n\n## Side Effects\n- Filesystem\n - Resolves program/file/cwd paths against the session cwd.\n - Report creation writes `.tar.gz` bundles and may read the session JSONL, artifact files, subagent session JSONLs, and log files.\n - Work-profile export writes `/tmp/work-profile-<timestamp>.svg`.\n - Log source reads daily log files from the logs dir.\n - Artifact-cache cleanup removes session artifact directories older than the cutoff.\n - `resolveRawSseDebugBuffer()` may attach a non-enumerable `rawSseDebugBuffer` property to the owner object.\n- Network\n - Socket-mode adapters bind/connect local sockets.\n - Remote attach may connect through the adapter to a remote debug port.\n- Subprocesses / native bindings\n - Spawns debugger adapters (`gdb`, `lldb-dap`, `python -m debugpy.adapter`, `dlv`, and others from `defaults.json`) detached.\n - Reverse DAP `runInTerminal` requests spawn the debuggee detached via `ptree.spawn()`.\n - `getWorkProfile(30)` comes from `@oh-my-pi/pi-natives`.\n - CPU profiling uses `node:inspector/promises`; heap snapshots use `Bun.generateHeapSnapshot(\"v8\")`; raw/log viewers sanitize text via `@oh-my-pi/pi-natives`.\n - `openPath()` launches the OS default file/browser handler for artifact dirs and SVGs.\n - Log/raw-SSE viewers can call `copyToClipboard()`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `DapSessionManager` keeps session summaries, breakpoints, threads, stack frames, stop location, output capture, capabilities, and last-used timestamps in memory.\n - Active-session id is global to the singleton `dapSessionManager`.\n - `RawSseDebugBuffer` stores recent SSE events per owner/session.\n - The tool is `exclusive`; concurrent debug tool calls are blocked by the scheduler.\n- User-visible prompts / interactive UI\n - Debug selector shows confirmation before cache deletion.\n - Performance profiling temporarily hijacks editor Enter/Escape handlers until profiling stops.\n - Log/raw-SSE viewers replace the editor pane with custom components.\n- Background work / cancellation\n - Every DAP request accepts an `AbortSignal`; timeouts and caller cancellation abort the active request, not the whole session lifetime.\n - `DapSessionManager` runs a background cleanup loop every 30 seconds.\n - Raw SSE viewers subscribe to buffer updates until closed.\n\n## Limits & Caps\n- Tool timeout clamp: `default=30`, `min=5`, `max=300` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- Per-request DAP default timeout: `DEFAULT_REQUEST_TIMEOUT_MS = 30_000` in `packages/coding-agent/src/dap/client.ts`.\n- Single active session: enforced by `#ensureLaunchSlot()` in `packages/coding-agent/src/dap/session.ts`.\n- Idle session cleanup: `IDLE_TIMEOUT_MS = 10 * 60 * 1000`, checked every `CLEANUP_INTERVAL_MS = 30 * 1000`.\n- Adapter liveness heartbeat: `HEARTBEAT_INTERVAL_MS = 5 * 1000`.\n- Output capture cap: `MAX_OUTPUT_BYTES = 128 * 1024`; older text is trimmed in ~1 KiB slices and `outputTruncated` is recorded.\n- Initial stop capture timeout after launch/attach: `STOP_CAPTURE_TIMEOUT_MS = 5_000`.\n- Socket-mode adapter readiness timeout: `10_000` ms in `waitForCondition()` and TCP connect timeout logic in `packages/coding-agent/src/dap/client.ts`.\n- Raw SSE buffer caps in `packages/coding-agent/src/debug/raw-sse-buffer.ts`:\n - `MAX_RAW_SSE_EVENTS = 1_000`\n - `MAX_RAW_SSE_CHARS = 512_000`\n - `MAX_RAW_SSE_EVENT_CHARS = 64_000` per event, with `: omp-debug-truncated ...` marker appended on trim\n- Log viewer window in `packages/coding-agent/src/debug/log-viewer.ts`:\n - `INITIAL_LOG_CHUNK = 50`\n - `LOAD_OLDER_CHUNK = 50`\n- Report/log ingestion caps in `packages/coding-agent/src/debug/report-bundle.ts`:\n - `MAX_LOG_LINES = 5000` for interactive log reading\n - `MAX_LOG_BYTES = 2 * 1024 * 1024` tail-read ceiling\n - report bundles include only the last `1000` log lines\n - subagent session inclusion is capped at the most recent `10` JSONL files\n- Interactive profiling windows in `packages/coding-agent/src/debug/index.ts`: both performance and work reports request `getWorkProfile(30)`.\n- Artifact cache pruning default: `30` days in `clearArtifactCache()` and the selector confirmation text.\n\n## Errors\n- Parameter validation in `packages/coding-agent/src/tools/debug.ts` throws `ToolError` with explicit messages such as:\n - `program is required for launch`\n - `attach requires pid or port`\n - `set_breakpoint requires file+line or function`\n - `variables requires variable_ref or scope_id`\n - `instruction_count is required for disassemble`\n - `disassemble requires memory_reference unless the current stop location has an instruction pointer reference`\n - `memory_reference is required for read_memory`\n - `count is required for read_memory`\n - `data is required for write_memory`\n - `command is required for custom_request`\n- Adapter selection failure throws `No debugger adapter available. Installed adapters: ...`.\n- Capability-gated actions throw from `requireCapability(...)`, e.g. `Active adapter does not support memory reads.`\n- No-session and state errors come from `DapSessionManager`, e.g. `No active debug session. Launch or attach first.`, `No active stack frame. Run stack_trace first or supply frame_id.`, `Debugger reported no threads.`\n- Launching a second live session throws `Debug session <id> is still active. Terminate it before launching another.`\n- DAP transport/request failures surface as thrown errors from `DapClient`:\n - `DAP request <command> timed out after <ms>ms`\n - `DAP event <event> timed out after <ms>ms`\n - `DAP adapter <name> is not running`\n - `DAP adapter exited (code N): <stderr>` or `DAP adapter exited unexpectedly (code N)`\n - adapter response `message` when a DAP request fails\n- `continue` / `step_*` are intentionally non-fatal when the target stays running past the timeout: they return `details.timedOut = true` and `state: \"running\"` instead of throwing.\n- `terminate` suppresses adapter errors while sending `terminate`/`disconnect`; it still disposes the client and returns the last summary when possible.\n- Interactive selector handlers report UI errors instead of throwing:\n - profiler start/stop, report bundling, log reading, system-info collection, cache clearing, and artifact opening use `ctx.showError(...)` / `ctx.showWarning(...)`\n - empty logs and empty artifact caches are warnings/status messages, not failures\n - copy failures in log/raw-SSE viewers become status/error text in the UI\n- Report-bundle helpers are intentionally best-effort for many file reads: missing session files, missing artifact dirs, unreadable artifact files, missing log dirs, inaccessible cache dirs, and missing subagent files are skipped silently.\n- `collectSystemInfo()` is best-effort for CPU probing; failure there falls back to `Unknown CPU`.\n\n## Notes\n- `packages/coding-agent/src/prompts/tools/debug.md` tells the model only one active session is supported; that is not advisory, it is enforced in code.\n- `configurationDone` is sent automatically both during launch/attach handshake and lazily before later requests if the adapter required it and the initial handshake did not complete.\n- `startDebugging` reverse requests are acknowledged but not implemented; child debug sessions are not spawned.\n- `output` exposes the merged `output` event stream only; the tool does not distinguish stdout, stderr, and console categories.\n- Session summaries expose `needsConfigurationDone`; this is derived from adapter capabilities and whether `configurationDone` has been sent.\n- Source breakpoint file paths are normalized with `path.resolve()` before caching and sending to the adapter.\n- `evaluate` defaults to `repl`, so the tool can forward raw debugger commands when the adapter supports them.\n- `disassemble` resolves its target from `memory_reference` first, then the current stopped session's `instructionPointerReference`; it throws if neither is present.\n- `RawSseDebugBuffer.recordEvent()` increments `totalEvents` before bounded retention. A snapshot can therefore show fewer retained records than total observed events.\n- Raw SSE buffer listener failures are swallowed so viewer bugs do not break capture.\n- `createDebugLogSource()` walks daily log files newest-first, but `loadOlderLogs()` reverses each requested slice before concatenation so older chunks prepend in chronological order.\n- `clearArtifactCache()` deletes directories by directory mtime, not per-file age.\n- `addDirectoryToArchive()` reads artifact files as text with `Bun.file(...).text()`. Binary artifact contents are not preserved byte-for-byte in the report bundle.\n- The tool renderer truncates displayed output for the TUI preview, but the underlying text result still contains the full returned string.\n",
|
|
79
|
+
"tools/edit.md": "# edit\n\n> Applies source edits; default mode is the hashline patch language consumed from a single `input` string.\n\n## Source\n- Entry: `packages/coding-agent/src/edit/index.ts`\n- Model-facing prompt: `packages/hashline/src/prompt.md`\n- Key collaborators:\n - `packages/coding-agent/src/utils/edit-mode.ts` — selects active edit mode\n - `packages/hashline/src/grammar.lark` — canonical constrained-decoding grammar\n - `packages/hashline/src/format.ts` — sigils and header constants (`¶`, `#`, `+`, `replace`, `delete`, `insert`)\n - `packages/hashline/src/input.ts` — parses `¶PATH#TAG` sections\n - `packages/hashline/src/tokenizer.ts` / `packages/hashline/src/parser.ts` — tokenizes and parses ops\n - `packages/hashline/src/apply.ts` — applies parsed edits to file text\n - `packages/hashline/src/mismatch.ts` — stale-anchor mismatch formatting\n - `packages/hashline/src/recovery.ts` — snapshot-based stale-anchor recovery\n - `packages/hashline/src/snapshots.ts` — mints and resolves per-path opaque snapshot tags\n\n## Inputs\n\n### Hashline mode (default)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | One or more file sections. Anchored sections must start with `¶PATH#TAG`; `TAG` is the four-hex snapshot tag emitted by the latest `read`/`search`/`write`/successful `edit`. Optional `*** Begin Patch` / `*** End Patch` envelope is ignored if present. |\n\nPatch language inside `input`:\n\n- **File header**: `¶PATH#TAG`. `TAG` is four uppercase-hex chars minted by the session snapshot store.\n- **Operations**:\n - `replace N..M:` — replace original lines N..M with the body rows below.\n - `replace block N:` — replace the whole tree-sitter block beginning on line N (its header line through its closing line) with the body rows. The line span is resolved at apply time from the file's parse tree; point N at the line that opens the construct. The resolved span is exactly the node that begins on line N — a leading decorator, attribute, or doc-comment is a separate node and is not included; point N at the first decorator line (Python wraps `@dec` + `def` as one block) or fall back to `replace N..M:` to take a leading line-comment that parses as its own node (e.g. Rust `///`). On success the result echoes the matched span (`replace block N → resolved lines A-B`). Errors (and steers to `replace N..M:`) when the language is unsupported, line N is blank or a closing delimiter, no node begins there, or the resolved block has a syntax error.\n - `delete N..M` — delete original lines N..M. No body.\n - `delete block N` — delete the whole tree-sitter block beginning on line N (resolved like `replace block N`, with the same decorator/comment caveat). No body. On success the result echoes the matched span (`delete block N → resolved lines A-B`). Same resolution failure modes and `delete N..M` fallback.\n - `insert before N:` — insert body rows immediately before line N.\n - `insert after N:` — insert body rows immediately after line N.\n - `insert head:` — insert body rows at the start of the file.\n - `insert tail:` — insert body rows at the end of the file.\n- **Body rows**:\n - Only body-bearing headers end in `:`.\n - Every body row is `+TEXT`; `+` alone adds a blank line.\n - `delete` never has body rows.\n - There is no repeat row kind. To keep a line, leave it out of every range; split edits into multiple hunks when needed.\n - `-` rows are invalid. Literal text beginning with `-` or `+` must be written as `+-text` / `++text`.\n\nAnchors come from `read`/`search` output. `read` emits a `¶PATH#TAG` header from the session snapshot store and lines as `LINE:TEXT`; copy the header into the edit section and copy only the line number into hunk headers.\n\n### Tolerated input shapes (lenient parsing)\n\nThe canonical grammar is strict, but the hand parser accepts a few non-dangerous variants:\n\n- `replace N:` — accepted as `replace N..N:`.\n- `delete N` — accepted as single-line delete.\n- Missing trailing colon on `replace` or `insert` — accepted.\n- `replace N-M:`, `replace N…M:`, and `replace N M:` — accepted as `replace N..M:`.\n- Bare body rows with no `+` prefix are auto-prepended with `+` and a `BARE_BODY_AUTO_PIPED_WARNING` is appended.\n- `*** Begin Patch` / `*** End Patch` envelopes are silently consumed. `*** Abort` terminates parsing silently — ops parsed before the marker still apply, no warning surfaced.\n- Some malformed `¶` headers are recovered after stripping apply-patch path noise such as `Update File:` / `Add File:` and extra `***`, but the recovered header still needs a valid four-hex tag for the patcher to apply it.\n- `*** Update File:` / `*** Add File:` / `*** Delete File:` / `*** Move to:` apply_patch sentinels inside the diff body throw an `apply_patch sentinel … is not valid in hashline` error.\n- `@@`-bracketed hunk headers are rejected with guidance to write a verb header.\n- Bare `N` and bare `N M` / `N..M` headers are rejected with guidance to write `replace` or `delete`.\n- `delete N..M:` and any body rows under `delete` / `delete block` are rejected.\n- Empty `replace` / `insert` / `replace block` hunks are rejected.\n- `-` body rows are rejected with `MINUS_ROW_REJECTED`.\n- `replace block N:` / `delete block N` require a wired tree-sitter resolver; `replace block` additionally needs at least one `+TEXT` body row, while `delete block` takes none. An unresolvable block (unsupported language, blank/closing-delimiter line, no node beginning on N, or a syntax error in the resolved block) is rejected on the apply/final-preview path; the streaming preview silently drops it instead.\n\n## Outputs\n- Single-shot tool result; hashline mode does not use a `resolve` preview/apply handshake.\n- `content` contains one text block per call. For a successful single-file edit it is either:\n - `<path>:` plus a compact diff preview from `packages/hashline/src/diff-preview.ts`, or\n - `Updated <path>` / `Created <path>` when no compact preview text is emitted.\n- When the patch used `replace block`/`delete block` ops (and the apply matched the tagged content), one `replace block N → resolved lines A-B (K lines)` line per block op is inserted between the `¶PATH#TAG` header and the diff preview, so the caller can confirm tree-sitter resolved the construct it intended.\n- Parse, apply, or recovery warnings are appended as:\n\n```text\nWarnings:\n...\n```\n\n- `details` is `EditToolDetails` from `packages/coding-agent/src/edit/renderer.ts`:\n - `diff`: unified diff string\n - `firstChangedLine`: first changed post-edit line\n - `diagnostics`: LSP/format result if available\n - `op`: `\"create\"` or `\"update\"` for hashline mode\n - `meta`: output metadata\n - `perFileResults`: present for multi-section input\n- Multi-section input returns one aggregated result with combined text and per-file details.\n\n## Worked examples\n\nReference file (the exact shape `read` returns):\n\n```text\n¶a.ts#0A3B\n1:const X = \"a\";\n2:const Y = X;\n3:\n4:console.log(X);\n5:console.log(Y);\n6:export { X, Y };\n```\n\nReplace line 1 with two lines:\n\n```text\n¶a.ts#0A3B\nreplace 1..1:\n+const X = \"b\";\n+export const Y = X;\n```\n\nInsert below line 5:\n\n```text\n¶a.ts#0A3B\ninsert after 5:\n+console.log(X + Y);\n```\n\nInsert above line 5:\n\n```text\n¶a.ts#0A3B\ninsert before 5:\n+console.log(X + Y);\n```\n\nDelete lines 4..5 entirely:\n\n```text\n¶a.ts#0A3B\ndelete 4..5\n```\n\nInsert at start and end of file:\n\n```text\n¶a.ts#0A3B\ninsert head:\n+// header\ninsert tail:\n+// trailer\n```\n\nMulti-file:\n\n```text\n¶src/a.ts#0A3B\nreplace 4..4:\n+const enabled = true;\n¶src/b.ts#1F7C\ndelete 20\n```\n\n## Limits & Caps\n- File snapshot tags are exactly four uppercase-hex chars minted by the per-session snapshot store.\n- The visible mismatch report shows 2 lines of context on each side (`MISMATCH_CONTEXT`) in `packages/hashline/src/messages.ts`.\n- Stale-anchor recovery uses `fuzzFactor: 0` in `packages/hashline/src/recovery.ts`.\n- `HL_FILE_PREFIX` is `¶`, `HL_PAYLOAD_REPLACE` is `+`, `HL_RANGE_SEP` is `..`, `HL_FILE_HASH_SEP` is `#`, and hunk keyword constants are `replace` / `delete` / `insert` (`packages/hashline/src/format.ts`).\n\n## Errors\n- Missing section header:\n - `input must begin with \"¶PATH#HASH\" on the first non-blank line for anchored edits; got: ...`\n- Missing tag for any section:\n - `Missing hashline snapshot tag for anchored edit to <path>; use ¶<path>#tag from your latest read/search output.`\n- Stray payload line:\n - `line N: payload line has no preceding hunk header. Use \\`replace N..M:\\`, \\`delete N..M\\`, or \\`insert before|after|head|tail:\\` above the body. Got \"...\".`\n- Minus row:\n - ``line N: `-` rows are not valid; hashline ranges already name the lines being changed. To insert a literal line starting with `-`, write `+-…`.``\n- Empty body-bearing hunk:\n - `line N: \\`replace N..M:\\` needs at least one \\`+TEXT\\` body row. To delete lines, use \\`delete N..M\\`.`\n - `line N: \\`insert\\` needs at least one \\`+TEXT\\` body row.`\n - `line N: \\`replace block N:\\` needs at least one \\`+TEXT\\` body row. To delete a block, use \\`delete N..M\\` with the block's line range.`\n- Unresolvable `replace block N:` (apply / final-preview path only):\n - `line N: \\`replace block X:\\` could not resolve a syntactic block beginning on line X. The language may be unsupported, the line may be blank or a closing delimiter, or the block may not parse. Use \\`replace X..M:\\` with the block's explicit end line instead.` — followed by a blank line and numbered `*`-marked context rows around line X (same shape as the mismatch preview).\n- Delete with body:\n - `line N: \\`delete N..M\\` does not take body rows. Remove the body, or use \\`replace N..M:\\`.`\n - `line N: \\`delete block N\\` does not take body rows. Remove the body, or use \\`replace block N:\\` to replace the block.`\n- Range out of order:\n - `line N: range A..B ends before it starts.`\n- Overlapping hunks on the same anchor:\n - `line N: anchor line X is already targeted by another hunk on line Y. Issue ONE hunk per range; payload is only the final desired content, never a before/after pair.`\n- apply_patch / unified-diff contamination:\n - `line N: apply_patch sentinel \"*** …\" is not valid in hashline. File sections start with \\`¶path#HASH\\` (no \\`Update File:\\` / \\`Add File:\\` keyword). Use \\`replace N..M:\\`, \\`delete N..M\\`, or \\`insert before|after|head|tail:\\` ops.`\n - `line N: unified-diff hunk header (\\`@@ -N,M +N,M @@\\`) is not valid in hashline. Use \\`replace N..M:\\`, \\`delete N..M\\`, or \\`insert before|after|head|tail:\\` ops.`\n - `line N: \\`@@\\`-bracketed hunk header \"@@ …\" is not valid in hashline. Drop the \\`@@ ... @@\\` brackets and write a verb header such as \\`replace N..M:\\`.`\n - `line N: hunk headers need a verb. Use \\`replace N..N:\\` to replace, or \\`delete N\\` to delete.`\n - `line N: bare range hunk header \"N M\" is not valid. Hunk headers need a verb: write \\`replace N..M:\\` or \\`delete N..M\\`.`\n- Out-of-range anchor:\n - `Line N does not exist (file has M lines)`\n- Stale snapshot tag: the `Patcher` first attempts snapshot-based recovery. When recovery cannot prove a valid result it throws `MismatchError`, which distinguishes recognized-but-drifted hashes from never-recorded hashes. The error includes the current file hash plus context around each anchor.\n- No-op edit:\n - `Edits to <path> parsed and applied cleanly, but produced no change: your body row(s) are byte-identical to the file at the targeted lines. The bug is somewhere else — re-read the file before issuing another edit. Do NOT widen the payload or add lines; verify the anchor first.`\n- Recovery failure is silent internally: if cache-based merge cannot prove a valid result, the mismatch error is surfaced unchanged.\n\n## Warnings\n- `Auto-prefixed bare body row(s) with +. Body rows must be +TEXT literal lines …` (`BARE_BODY_AUTO_PIPED_WARNING`)\n- Recovery banners: `RECOVERY_EXTERNAL_WARNING`, `RECOVERY_SESSION_CHAIN_WARNING`, `RECOVERY_SESSION_REPLAY_WARNING` (`packages/hashline/src/messages.ts`).\n",
|
|
80
|
+
"tools/eval.md": "# eval\n\n> Execute Python or JavaScript code in persistent cell-based runtimes.\n\n> **Notice:** Do not shell out to `python -c`/`python -e`, `bun -e`, or `node -e` via the `bash` tool for ad-hoc code execution. Use this tool instead — it gives you persistent state across cells, structured `display()` output, image/JSON capture, and proper cancellation/timeout handling that one-shot `-e`/`-c` invocations cannot provide.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/eval.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/eval.md`\n- Key collaborators:\n - `packages/coding-agent/src/eval/backend.ts` — backend execution contract\n - `packages/coding-agent/src/eval/agent-bridge.ts` — host-side `agent()` bridge into the subagent executor\n - `packages/coding-agent/src/eval/js/executor.ts` — JS backend adapter\n - `packages/coding-agent/src/eval/js/worker-core.ts` — JS execution, VM context, display/log capture\n - `packages/coding-agent/src/eval/js/shared/prelude.txt` — JS global helper installer\n - `packages/coding-agent/src/eval/js/shared/helpers.ts` — JS filesystem/text/env helper implementations\n - `packages/coding-agent/src/eval/py/index.ts` — Python backend adapter\n - `packages/coding-agent/src/eval/py/executor.ts` — kernel session retention, reset, cleanup\n - `packages/coding-agent/src/eval/py/kernel.ts` — Jupyter gateway/kernel protocol, display capture\n - `packages/coding-agent/src/eval/py/prelude.py` — Python helper functions and status events\n - `packages/coding-agent/src/session/streaming-output.ts` — truncation, artifacts, streamed chunks\n - `docs/python-repl.md` — Python kernel/gateway internals\n\n## Inputs\n\nTool parameters are a JSON object with a single `cells` field — an ordered array of cell objects. Each cell is a structured record; there is no `*** Cell` header parsing, no language sniffing, and no implicit single-cell fallback. Cells run in array order; state persists within each language across cells and across tool calls.\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `cells` | `EvalCellInput[]` | Yes | Cells executed in order. At least one cell is required (`.min(1)`). |\n\nEach `EvalCellInput` (from `evalCellSchema` in `packages/coding-agent/src/tools/eval.ts`):\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `language` | `\"py\" \\| \"js\"` | Yes | Backend selector. `\"py\"` maps to the IPython/Jupyter kernel (`python` backend); `\"js\"` maps to the persistent JavaScript VM. |\n| `code` | `string` | Yes | Cell body, verbatim. JSON-encoded — embed newlines, quotes, and indentation directly; no fences, no headers. |\n| `title` | `string` | No | Short label rendered in the transcript (e.g. `\"imports\"`, `\"load config\"`). |\n| `timeout` | `integer` | No | Per-cell timeout in seconds, clamped to `1..600`. Defaults to 30 when omitted. |\n| `reset` | `boolean` | No | Wipe this cell's language kernel before running. Reset is per-language: a `py` cell's reset does not touch the JS VM and vice versa. Defaults to `false`. |\n\nMinimal example matching the live schema:\n\n```json\n{\n \"cells\": [\n { \"language\": \"py\", \"title\": \"imports\", \"timeout\": 10, \"code\": \"import json\\nfrom pathlib import Path\" },\n { \"language\": \"py\", \"title\": \"load config\", \"code\": \"data = json.loads(read('package.json'))\\ndisplay(data)\" },\n { \"language\": \"js\", \"title\": \"summary\", \"reset\": true, \"code\": \"const data = JSON.parse(await read('package.json'));\\ndisplay(data);\\nreturn data.name;\" }\n ]\n}\n```\n\n## Outputs\n\nFinal result from `EvalTool.execute()` is single-shot, but `onUpdate` streams partial text and `details` while cells run.\n\nReturned shape:\n\n- `content`: one text block containing combined cell output, `(displayed N image(s); no text output)` when only images exist, or `(no output)` when nothing visible was produced; image outputs are appended as additional image content blocks.\n- `details` (`EvalToolDetails` from `packages/coding-agent/src/eval/types.ts`):\n - `cells`: per-cell code, status (`pending`/`running`/`complete`/`error`), output, duration, exit code, status events, markdown flag\n - `language`: first backend used\n - `languages`: distinct backends used, in first-use order\n - `jsonOutputs`: structured values emitted via `display(...)`\n - `statusEvents`: aggregated helper/tool status events\n - `notice`: backend fallback notice (currently unused; reserved for future per-cell notices)\n - `meta`: truncation metadata\n - `isError`: set on cell failure or cancellation\n\nRenderer behavior in `packages/coding-agent/src/tools/eval.ts`:\n\n- call preview renders each cell's `code` with syntax highlighting based on its declared `language`\n- result view renders each cell separately, including status, duration, and output\n- markdown outputs are rendered with the Markdown component instead of plain text\n- `jsonOutputs` render as a tree, collapsed or expanded depending on UI state\n- timeout / truncation notices render as dim metadata lines\n- images are returned as content image blocks; live updates may also carry `details.images` while execution is in progress\n\nSide-channel artifacts:\n\n- `session.allocateOutputArtifact?.(\"eval\")` may allocate an `artifact://...` backing store for spilled output.\n- Truncated output metadata points at that artifact when available.\n\n## Flow\n\n1. `EvalTool.execute()` in `packages/coding-agent/src/tools/eval.ts` receives `params.cells` already validated by the Zod schema — no string parsing step.\n2. For each cell, `execute()` maps `cell.language` to an `EvalLanguage` (`\"py\"` → `\"python\"`, `\"js\"` → `\"js\"`) and calls `resolveBackend(session, language)`:\n - `python` is gated on `eval.py !== false` and `pythonBackend.isAvailable(session)`.\n - `js` is gated on `eval.js !== false`.\n - A disabled or unavailable requested backend throws `ToolError`; there is no auto-fallback or sniffing.\n3. The tool allocates an `OutputSink`, a `TailBuffer`, per-cell result objects, and a `sessionAbortController`. `session.trackEvalExecution?.(...)` can wrap the whole run for external cancellation tracking.\n4. It resolves the executor session id from `session.getEvalSessionId?.()`, falling back to `defaultEvalSessionId(session)`. Subagents inherit the parent's id so both sides share the same JS VM and Python kernel for each backend.\n5. Cells execute sequentially within one eval tool call. For each cell, `execute()`:\n - clamps `cell.timeout ?? 30` seconds through `clampTimeout(\"eval\", ...)`\n - builds a combined abort signal from the tool signal, the timeout, and the session abort controller\n - marks the cell `running` and emits an update\n - calls the backend’s `execute()` with `cwd`, `sessionId`, `sessionFile`, `kernelOwnerId`, `deadlineMs`, `reset` (defaults to `false`), artifact info, and chunk callback\n6. JS cells dispatch through `packages/coding-agent/src/eval/js/index.ts` into `executeJs()`; Python cells dispatch through `packages/coding-agent/src/eval/py/index.ts` into `executePython()`.\n7. Backend text chunks stream into the shared `OutputSink`; rich outputs are accumulated separately as JSON, images, markdown markers, and status events.\n8. After each cell:\n - text output is trimmed and stored on that cell result\n - multi-cell runs prefix text with `[i/n]` and the optional title\n - cancellations return early with `isError: true` and a cell-specific abort message\n - non-zero exit codes return early with `isError: true` and a message naming the failed cell\n - later cells are skipped after the first error, but earlier cell state persists in the underlying runtime\n9. On success, the tool joins all cell outputs, synthesizes `(no text output)` or `(no output)` when needed, and attaches truncation metadata from `summarizeFinal()`.\n10. The renderer uses `details.cells`, `details.jsonOutputs`, and `details.statusEvents` to build notebook-style output. `mergeCallAndResult = true` and `inline = true`, so call and result render together in the transcript.\n\n## Modes / Variants\n\n### Backend selection\n\nBackend choice is **explicit per cell** — there is no auto-detection.\n\n- `language: \"py\"` → Python (IPython/Jupyter) backend\n- `language: \"js\"` → JavaScript VM backend\n\nIf the requested backend is disabled or unavailable, the tool throws `ToolError` for that cell. The caller chooses; the tool does not silently substitute.\n\n### JavaScript runtime\n\nImplemented in `packages/coding-agent/src/eval/js/worker-core.ts`, `packages/coding-agent/src/eval/js/shared/prelude.txt`, and `packages/coding-agent/src/eval/js/shared/helpers.ts`.\n\n- Persistent worker-backed VM sessions keyed by `js:${sessionId}`\n- `reset: true` calls `resetVmContext(sessionKey)` before the cell executes; reset is destructive for all live runs on that JS session\n- Top-level `await` and bare `return` are supported by wrapping code in an async IIFE when `wrapCode()` sees `await` or `return`\n- Top-level static `import ... from ...` and dynamic `import(...)` calls are routed through `rewriteImports()`, which sends them via `__omp_import__` so the specifier resolves against the session cwd. Dynamic-import call sites are swapped for a guarded shim (`typeof __omp_import__ === \"function\" ? __omp_import__ : (s, o) => import(s, o)`) rather than the bare helper identifier: functions handed to puppeteer (`tab.evaluate`, `page.evaluate`, ...) are serialized with `Function.prototype.toString()` and re-evaluated inside the browser page, where the worker-injected helper does not exist, so the shim falls back to native dynamic import there\n- Module cache is busted for **local** imports between cells so edits to source files are picked up without restarting the runtime. `__omp_import__` deletes `require.cache[absPath]` before re-importing whenever the original specifier is a filesystem path: relative (`./x`, `../x`, `.`, `..`), POSIX-absolute (`/...`), home-prefixed (`~/...`), or Windows drive-letter (`C:\\...` / `C:/...`). Bare specifiers (`react`, `lodash/x`) and URL/scheme specifiers (`node:fs`, `file://...`, `https://...`) are left in cache so package identity stays stable across cells. The cache-bust only fires when the resolved target is an absolute path — unresolved bare-package fallbacks (`resolveImportSpecifier()` returning the original specifier) skip it.\n- The prelude installs globals:\n - `display`, `print`\n - `read`, `write`, `append`, `sort`, `uniq`, `counter`, `diff`, `tree`, `env`, `output`\n - `tool.<name>(args)` proxy for arbitrary session tool calls\n - `completion(prompt, opts?)` for oneshot, stateless model calls (see _Oneshot completion helper_ below)\n - `agent(prompt, opts?)` for a single subagent call, plus `parallel()` / `pipeline()` bounded-pool helpers (see _Subagent helper_ below)\n- JS helpers that touch the host/runtime boundary are async and `await`able; pure text helpers (`sort`, `uniq`, `counter`) return synchronously but may still be safely awaited.\n- JS helper signatures use a trailing options object rather than Python keyword arguments:\n - `await read(path, { offset?, limit? })`\n - `await tree(path = \".\", { maxDepth?, hidden? })`\n - `sort(text, { reverse?, unique? })`, `uniq(text, { count? })`, `counter(items, { limit?, reverse? })`\n - `await agent(prompt, { agentType?, model?, label?, schema? })`\n - `await parallel([() => agent(\"a\"), () => agent(\"b\")])`\n - `await pipeline(items, stage1, stage2)`\n- `display(value)` behavior:\n - plain objects/arrays become JSON outputs\n - `{ type: \"image\", data, mimeType }` becomes an image output\n - scalars become text\n- The VM exposes a restricted `process` subset plus `Buffer`, `fetch`, `Blob`, `File`, `Headers`, `Request`, `Response`, `fs`, `require`, and browser-style globals\n- Concurrent runs on the same VM are not queued end-to-end. Synchronous JS still runs on the single event loop; awaited regions can interleave with sibling runs.\n\n### Python runtime\n\nImplemented in `packages/coding-agent/src/eval/py/executor.ts`, `packages/coding-agent/src/eval/py/kernel.ts`, and `packages/coding-agent/src/eval/py/prelude.py`. See `docs/python-repl.md` for gateway and kernel details.\n\n- Default mode is retained `session` kernels keyed by `python:${sessionId}`\n- Optional `python.kernelMode = \"per-call\"` creates a fresh kernel for each cell and shuts it down afterward\n- `reset: true` disposes the retained kernel for that session before the cell runs; later Python cells in the same tool call reuse the fresh kernel\n- Startup path:\n - availability check\n - create/connect kernel\n - initialize cwd / env / `sys.path`\n - execute `PYTHON_PRELUDE`\n- Python cells run in the runner's persistent asyncio event loop, so top-level `await` works; the prompt warns not to use `asyncio.run(...)`\n- The Python prelude defines helpers with the same surface as JS where practical, including `tool.<name>(args)`, `completion(...)`, and `agent(...)` through a per-run loopback bridge\n- Synchronous statement blocks run in the default executor with ContextVar state copied in; the GIL still serializes bytecode execution, but awaited regions can interleave with sibling cells\n- Kernel `display_data` / `execute_result` messages map to:\n - `application/x-omp-status` → status event\n - `image/png` → image output\n - `application/json` → JSON output\n - `text/markdown` → markdown output\n - `text/plain` → text output\n - `text/html` → HTML converted to markdown with `htmlToBasicMarkdown()`\n- Interactive stdin is rejected: `input_request` sends an empty reply, marks `stdinRequested`, and the executor returns exit code `1`\n\n### Oneshot completion helper (`completion`)\n\nBoth runtimes expose `completion()` — a single stateless completion against a model tier. It is intentionally minimal: no conversation history, no agent-visible tools, pure text in / text (or object) out. Implemented host-side in `packages/coding-agent/src/eval/completion-bridge.ts` and routed through the existing tool bridge under the reserved name `__completion__`.\n\n- Signatures:\n - JS: `await completion(prompt, { model?, system?, schema? })`\n - Python: `completion(prompt, *, model=\"default\", system=None, schema=None)`\n- `model` selects a tier (default `\"default\"`):\n - `\"smol\"` → `pi/smol` role (fast / cheap)\n - `\"default\"` → the session's active model, falling back to the `pi/default` role\n - `\"slow\"` → `pi/slow` role; requests high reasoning effort only on reasoning-capable models\n- `system` (optional) supplies a system prompt.\n- `schema` (optional) is a plain JSON-Schema object. When present, the model is forced to call a single synthetic `respond` tool with that schema (loose, non-strict), and the helper returns the parsed object. When absent, the helper returns the completion string.\n- Errors surface as exceptions: unresolved tier, missing API key, an `error`/`aborted` stop reason, or empty output each raise.\n\n### Subagent helper (`agent`)\n\nBoth runtimes expose `agent()` — a single subagent invocation routed through `packages/coding-agent/src/eval/agent-bridge.ts` into the same `runSubprocess(...)` path used by the `task` tool. It uses the current eval session's spawn policy and inherits the parent eval executor id, so parent and subagent code share JS/Python runtime state.\n\n- Signatures:\n - JS: `await agent(prompt, { agentType?, model?, label?, schema? })`\n - Python: `agent(prompt, *, agent_type=\"task\", model=None, label=None, schema=None)`\n- `agentType` / `agent_type` defaults to the bundled `task` agent and resolves through normal agent discovery, so project and user agents work.\n- `model` overrides the selected agent's model. Without it, normal per-agent settings and the agent frontmatter model apply.\n- Shared background is passed via files: write a `local://` file and reference it in the prompt. `label` controls the `agent://<id>` output label prefix.\n- `schema` passes a JSON Schema to the subagent structured-output path. When present, the helper parses the final JSON text and returns an object.\n- Spawn restrictions use `session.getSessionSpawns()` exactly like the `task` tool. Eval-driven subagent recursion is capped at depth 3.\n- JS and Python both expose `parallel(thunks)` and `pipeline(items, ...stages)`; both use a bounded async/threaded pool whose width tracks the `task.maxConcurrency` setting (the same ceiling the `task` tool uses; `0` = run every item at once), preserve item order, and propagate rejections. The width is fetched live from the host via the `__concurrency__` bridge, so the helpers no longer take a `concurrency` argument.\n- Errors surface as exceptions: unknown or disabled agent, disallowed spawn, recursion cap, subagent failure, or invalid structured output all fail the eval cell.\n\n### Multi-language call behavior\n\nA single tool call can mix Python and JS cells. Persistence is per language runtime:\n\n- `reset: true` on a Python cell does not touch JS state\n- `reset: true` on a JS cell does not touch Python state\n- each backend keeps its own retained session keyed from the same session-derived ID\n\n## Side Effects\n\n- Filesystem\n - JS/Python prelude helpers can read, write, append, diff, and traverse filesystem paths under the session cwd or absolute paths.\n - JS helper `read()` rejects protocol URIs (`://`) and directory paths; use `tool.read(...)` for internal URLs or reader-mode behavior.\n - Output may spill to an artifact file via `OutputSink`.\n- Network\n - Python backend speaks NDJSON to a local `python3` subprocess over stdin/stdout (no network).\n - JS runtime exposes `fetch` and `tool.<name>()`; those tools may perform additional network I/O.\n- Subprocesses / native bindings\n - Python availability check runs `<python> -c ...`.\n - Python backend spawns one `python -u runner.py` subprocess per kernel; cancellation sends `SIGINT`. Details in `docs/python-repl.md`.\n - `agent()` runs one in-process subagent via the task executor; that subagent may use its configured tools.\n- Session state\n - `session.assertEvalExecutionAllowed?.()` can block execution.\n - `session.trackEvalExecution?.(...)` can register cancellable eval work.\n - `session.getSessionFile?.()`, `session.getEvalSessionId?.()`, and `session.getEvalKernelOwnerId?.()` influence VM/kernel reuse and artifact lookup.\n - JS VM contexts persist across eval calls until reset/disposal.\n - Python retained kernels persist until reset, owner cleanup, or process exit.\n - `agent()` allocates `agent://<id>` output artifacts and reuses the parent's eval executor id.\n- User-visible prompts / interactive UI\n - none; stdin requests are rejected programmatically\n- Background work / cancellation\n - Python retained kernels have heartbeat and idle cleanup timers.\n - Cancellation hard-kills/resets the shared executor for that backend: JS terminates the worker, Python sends SIGINT and may escalate to subprocess shutdown.\n\n## Limits & Caps\n\n- Per-cell timeout default: 30s (applied when `timeout` is omitted in `EvalTool.execute()`; clamped through `TOOL_TIMEOUTS.eval.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Schema-level `timeout` range: integer `1..600` seconds (enforced by Zod on the cell schema)\n- Timeout clamp at runtime: 1s minimum, 600s maximum (`TOOL_TIMEOUTS.eval` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Transcript code/output preview: 10 lines by default (`EVAL_DEFAULT_PREVIEW_LINES` in `packages/coding-agent/src/tools/eval.ts`)\n- Output truncation window: 50KB default (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Output line cap inside truncation helpers: 3000 lines (`DEFAULT_MAX_LINES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Streaming tail buffer for live updates: `DEFAULT_MAX_BYTES * 2` = 100KB (`packages/coding-agent/src/tools/eval.ts`)\n- JS/Python `parallel()` / `pipeline()` helper pool width: the `task.maxConcurrency` setting (default 32; `0` = unbounded), resolved live via the `__concurrency__` bridge (`packages/coding-agent/src/eval/concurrency-bridge.ts`)\n- Eval-driven `agent()` recursion cap: task depth 3 (`EVAL_AGENT_MAX_DEPTH`)\n- Python retained kernel idle timeout: 5 minutes (`IDLE_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cap: 4 sessions (`MAX_KERNEL_SESSIONS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cleanup sweep: every 30s (`CLEANUP_INTERVAL_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python owner-cleanup shutdown wait: 2000ms (`OWNER_CLEANUP_KERNEL_SHUTDOWN_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python heartbeat interval: 5s (`ensureKernelHeartbeat()` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python external gateway availability check timeout: 5s (`AbortSignal.timeout(5000)` in `packages/coding-agent/src/eval/py/kernel.ts`)\n- Python auto-restart budget: one restart per retained session before hard failure (`restartCount > 1` in `packages/coding-agent/src/eval/py/executor.ts`)\n\n## Errors\n\n- Zod validation rejects malformed `cells` arrays before `execute()` runs (missing `language`/`code`, out-of-range `timeout`, empty `cells`).\n- Missing session without proxy executor throws `ToolError(\"Eval tool requires a session when not using proxy executor\")`.\n- Disabled/unavailable backends throw `ToolError` from `resolveBackend()`:\n - `eval.py = false` and a `py` cell is requested\n - `eval.js = false` and a `js` cell is requested\n - Python kernel unavailable and a `py` cell is requested\n- JS runtime exceptions are converted into text output plus `exitCode: 1`; cancellations return `cancelled: true` and may append `Command timed out`.\n- Python execution errors from the kernel become text output and `exitCode: 1`; later cells are skipped.\n- Python stdin requests are treated as errors with the message `Kernel requested stdin; interactive input is not supported.`\n- Cancellation is returned, not thrown, once backend execution has started. The tool formats it as a cell failure and sets `details.isError = true`.\n- If output truncates, the tool still succeeds; truncation is surfaced through `details.meta` and artifact-backed full output when available.\n\n## Shared executor trade-offs\n\n- Parent agents and subagents share eval state bidirectionally when a subagent inherits the parent's executor id. Mutations in either direction are visible to the other participant.\n- Async regions of concurrent runs can interleave. Synchronous JS still blocks the VM event loop; synchronous Python still contends on the GIL.\n- Cancelling one run is destructive to the shared backend executor. This is intentional: JS worker termination and Python SIGINT/subprocess shutdown are the only reliable way to interrupt arbitrary user code.\n- `reset: true` is destructive for every live run on that backend session id. New starts on that backend are rejected while reset is in flight.\n\n## Notes\n\n- Backend selection is strictly explicit per cell: `language` must be `\"py\"` or `\"js\"`. The previous `*** Cell` header parser, the `eval.lark` constrained grammar, and the sniffer-based fallback have all been removed.\n- `EvalTool.customFormat` no longer exists. Tool calls flow through the standard JSON schema; there is no Lark-constrained sampling path.\n- `tool.<name>()` exists in both JS and Python. Python calls route through a per-run loopback bridge keyed by the current cell id.\n- JS helper paths reject protocol URIs (`://`) in `resolveRegularFile()` for `read()`, and resolve other paths against the session cwd or absolute filesystem path. Use `tool.read(...)` or another tool explicitly for internal URLs.\n- Python helper `output(...)` depends on `PI_ARTIFACTS_DIR` or `PI_SESSION_FILE`; it fails outside a session-backed run.\n- `display()` can produce text and structured outputs from the same value; the renderer prefers markdown over `text/plain` when both exist.\n- JS static imports are rewritten only at top level. Nested imports stay invalid and surface normal JS syntax/runtime errors.\n- `EvalTool` is `concurrency = \"exclusive\"` within one agent session, but parent and subagent sessions can run eval concurrently when they share an inherited executor id.\n- The tool description shown to the model is templated by backend availability (`getEvalToolDescription()`); if Python is unavailable, the prompt omits Python-specific instructions.\n",
|
|
81
|
+
"tools/find.md": "# find\n\n> Find filesystem paths by glob; use `search` when you need content matches instead of path matches.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/find.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/find.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — normalize inputs; split base path vs glob.\n - `packages/coding-agent/src/tools/list-limit.ts` — apply result-count caps.\n - `packages/coding-agent/src/session/streaming-output.ts` — truncate text output at byte cap.\n - `packages/coding-agent/src/tools/tool-result.ts` — build `content` and `details.meta`.\n - `packages/coding-agent/src/tools/output-meta.ts` — encode limit / truncation metadata.\n - `packages/coding-agent/src/tools/tool-errors.ts` — map user-facing tool errors.\n - `packages/coding-agent/src/tools/index.ts` — register the built-in local implementation.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `paths` | `string[]` | Yes | One or more globs, files, directories, or internal URLs with backing files. Empty strings are rejected. Single entries accidentally joined with comma, semicolon, or whitespace are expanded only after existence validation; existing paths containing delimiters stay intact. Multiple entries may be merged into one brace-union search when their base paths can be resolved together. |\n| `hidden` | `boolean` | No | Whether hidden files are included. Defaults to `true` (`hidden ?? true`). |\n| `gitignore` | `boolean` | No | Whether `.gitignore` is respected during local native globbing. Defaults to `true`; set `false` to include gitignored files. |\n| `limit` | `number` | No | Max returned paths. Defaults to `200`; finite positive inputs are floored then clamped to `1..200`. |\n| `timeout` | `number` | No | Timeout in seconds. Defaults to `5`; clamped to `0.5..60`. On timeout, returns partial matches collected so far with a timeout notice and `truncated: true`. |\n\n## Outputs\nThe tool returns a single text block plus structured `details`.\n\n- Success text: matching paths grouped by directory. Each non-root group starts with `# <dir>/` and then lists basenames; root-level matches are listed without a header. Directory matches carry a trailing `/`. Exact file inputs return that file path as one line.\n- Empty result text: `No files found matching pattern`, optionally followed by a timeout or missing-path notice.\n- Multi-path partial miss: appends `Skipped missing paths: ...` after the result block, or after the empty-result line.\n- `details` may include:\n - `scopePath`: display form of the searched root or merged roots.\n - `fileCount`: number of paths returned after result limiting.\n - `files`: returned paths as an array.\n - `truncated`: whether result count or byte truncation occurred.\n - `resultLimitReached`: reached result limit.\n - `missingPaths`: skipped missing inputs in multi-path calls.\n - `truncation` / `meta.limits`: structured truncation and limit metadata for renderers.\n- Streaming: when the runtime supplies `onUpdate`, the local implementation emits incremental newline-delimited text snapshots during globbing, throttled to 200 ms. Final output is grouped; streaming snapshots are not.\n\n## Flow\n\n1. `FindTool.execute()` expands delimiter-flattened local `paths` entries with `expandDelimitedPathEntries(..., parseFindPattern)` unless custom operations are injected. The splitter validates candidate parts by statting their parsed base paths, keeps existing delimiter-containing paths intact, accepts comma/semicolon splits when at least one part resolves, and accepts whitespace splits only when every part resolves.\n2. The tool normalizes each resulting entry with `normalizePathLikeInput()` and `/\\\\/g -> \"/\"` (`packages/coding-agent/src/tools/find.ts`). Empty normalized entries fail with `` `paths` must contain non-empty globs or paths ``.\n3. For multi-path local calls, `partitionExistingPaths(..., parseFindPattern)` (`packages/coding-agent/src/tools/path-utils.ts`) stats each base path. Missing entries are skipped; if all are missing, the tool throws `Path not found: ...`. Single missing paths still hard-fail.\n4. The tool tries `resolveExplicitFindPatterns()` to merge multiple inputs into one search rooted at a common base path. If that does not apply, it parses one input with `parseFindPattern()`.\n5. `parseFindPattern()` determines `(basePath, globPattern, hasGlob)`:\n - no glob chars (`*`, `?`, `[`, `{`) => search that path with implicit `**/*`.\n - glob in the first segment => search from `.` and, unless the pattern already starts with `**/`, prefix it with `**/`.\n - glob later in the path => split at the first glob-bearing segment.\n6. `resolveToCwd()` converts the base path to an absolute path under the session cwd. A resolved `/` is rejected with `Searching from root directory '/' is not allowed`.\n7. `limit` defaults to `DEFAULT_LIMIT` (`200`), must be positive and finite, is floored, then clamped to `MAX_LIMIT` (`200`). `hidden` and `gitignore` both default to `true`. `timeout` is converted to milliseconds and clamped to `500..60_000` before building an `AbortSignal.timeout(...)`.\n8. Execution then branches:\n - **Custom operations branch**: if `FindToolOptions.operations.glob` exists, the tool checks existence with `operations.exists()`, short-circuits exact-file inputs via `operations.stat()` when available, then calls `operations.glob(globPattern, searchPath, { ignore: [\"**/node_modules/**\", \"**/.git/**\"], limit })`.\n - **Built-in local branch**: the tool stats `searchPath`. Exact-file inputs return immediately. Directory inputs call `natives.glob()` with `hidden`, `maxResults: effectiveLimit`, `sortByMtime: true`, `gitignore: useGitignore`, and the combined abort signal.\n9. In the local branch, optional `onMatch` callbacks convert each match to a cwd-relative display path and emit throttled progress updates.\n10. After native glob returns, JS sorts `result.matches` by `mtime` descending (`(b.mtime ?? 0) - (a.mtime ?? 0)`) before formatting paths.\n11. `buildResult()` applies `applyListLimit()` to cap the array again at `effectiveLimit`, formats paths with `formatFindGroupedOutput()`, appends notices, then runs `truncateHead()` with `maxLines: Number.MAX_SAFE_INTEGER`. In practice this leaves the 50 KB byte cap in place while disabling the default 3000-line cap.\n12. `toolResult()` packages text plus `details`, and records result-limit / truncation metadata for renderers.\n\n## Modes / Variants\n- **Exact file path**: if the parsed input has no glob and the resolved path stats as a file, output is that one path.\n- **Directory path**: if the parsed input has no glob and stats as a directory, the tool searches it with implicit `**/*`.\n- **Single glob path**: one input parsed by `parseFindPattern()`.\n- **Merged multi-path search**: multiple inputs resolved by `resolveExplicitFindPatterns()` into one brace-union glob rooted at a common base path.\n- **Partial multi-path search with missing inputs**: local multi-path calls skip missing base paths and surface them as `missingPaths` / `Skipped missing paths: ...`.\n- **Internal URL input**: supported when the internal router resolves the URL to a backing file. Internal URL globs are rejected.\n- **Custom delegated search**: uses injected `FindOperations` instead of local fs + native glob.\n\n## Side Effects\n- Filesystem\n - Stats the resolved base path, and in local multi-path mode stats every candidate base path up front.\n - Does not write files.\n- Subprocesses / native bindings\n - Built-in local mode calls the native `@oh-my-pi/pi-natives` glob implementation.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Emits structured progress updates when `onUpdate` is provided.\n - Adds truncation / limit metadata to the tool result.\n- Background work / cancellation\n - Local globbing is cancellable through the caller abort signal plus the configured internal timeout.\n\n## Limits & Caps\n- Default result limit: `200` (`DEFAULT_LIMIT` in `packages/coding-agent/src/tools/find.ts`).\n- Maximum result limit: `200` (`MAX_LIMIT`); larger inputs are clamped.\n- Local glob timeout: default `5000` ms, clamped to `500..60_000` ms.\n- Output byte cap: `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`).\n- Default generic line cap in `truncateHead()` is `3000`, but `find` overrides `maxLines` to `Number.MAX_SAFE_INTEGER`, so byte size — not line count — is the practical output truncation cap.\n- Streaming update throttle: `200` ms between `onUpdate` emissions.\n- Sort order: most recent `mtime` first in the built-in local branch and promised in the prompt. The tool re-sorts in JS even though native glob receives `sortByMtime: true` so native code can still stop early at `maxResults`.\n\n## Errors\n- User-facing `ToolError`s from `FindTool.execute()` include:\n - `` `paths` must contain non-empty globs or paths ``\n - `Path not found: ...`\n - `Searching from root directory '/' is not allowed`\n - `Limit must be a positive number`\n - `Path is not a directory: ...`\n - timeout result text is `find timed out after <seconds>s; returning <N> partial matches — increase timeout or narrow pattern` and is returned as a successful, truncated partial result rather than an error.\n- If the caller aborts, the local branch converts `AbortError` into `ToolAbortError`.\n- Non-`ENOENT` stat failures and other unexpected errors are rethrown.\n- Empty matches are not errors; they return the no-files text result.\n\n## Notes\n- Reach for `find` for filename / path discovery. Reach for `search` when the selection criterion is file contents or regex matches; `search` takes a `pattern` and returns anchored content matches, while `find` only returns matching paths (`packages/coding-agent/src/prompts/tools/find.md`, `packages/coding-agent/src/prompts/tools/search.md`).\n- Bare top-level globs are made recursive. `*.ts` is parsed as base `.` plus glob `**/*.ts`; `src/*.ts` stays rooted at `src` with a non-recursive `*.ts` segment; `src/**/*.ts` preserves explicit recursion.\n- `.gitignore` defaults to enabled in the built-in local branch. Use `gitignore: false` to disable it for native traversal.\n- `hidden` defaults to `true`; hidden-file exclusion is opt-out, not opt-in.\n- Multi-path missing-input tolerance only applies in the built-in local branch. The custom-operations branch hard-fails the first missing `searchPath` it checks.\n- The custom `FindOperations.glob()` hook receives `ignore` and `limit`, but not the `hidden` flag or an explicit `.gitignore` toggle. A remote delegate must account for that itself if it wants parity with the local branch.\n- Built-in local globbing does not force `fileType: File`; it can return files and directories from native glob. Directory outputs also occur through exact-path passthrough or custom delegates that return them.",
|
|
82
|
+
"tools/github.md": "# github\n\n> Dispatch GitHub CLI operations for repositories, issues, pull requests, search, and Actions run watching.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/gh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/github.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/gh-format.ts` — shorten commit SHAs for summaries.\n - `packages/coding-agent/src/tools/gh-renderer.ts` — TUI rendering, especially `run_watch` live/result views.\n - `packages/coding-agent/src/utils/git.ts` — `gh`/`git` process wrappers, repo locking, branch config writes.\n - `packages/utils/src/dirs.ts` — base directory for dedicated PR worktrees.\n - `packages/coding-agent/src/sdk.ts` — session artifact allocation hook.\n - `packages/coding-agent/src/session/artifacts.ts` — artifact filename format `<id>.<toolType>.log`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"repo_view\" \\| \"pr_create\" \\| \"pr_checkout\" \\| \"pr_push\" \\| \"search_issues\" \\| \"search_prs\" \\| \"search_code\" \\| \"search_commits\" \\| \"search_repos\" \\| \"run_watch\"` | Yes | Dispatch selector. `GithubTool.execute()` switches only on this field. |\n| `repo` | `string` | No | `owner/repo` override. Ignored when the identifier argument is already a full GitHub URL. For `search_issues`/`search_prs`/`search_code`/`search_commits`, defaults to the current checkout's `owner/repo` when omitted (skipped when the query already contains a `repo:`/`org:`/`user:`/`owner:` qualifier or when current-repo resolution fails). Required in practice when `gh` cannot infer repo context from the current checkout. |\n| `branch` | `string` | No | Used by `repo_view`, `pr_push`, and `run_watch`. `run_watch` falls back to current git branch when `run` is omitted; `pr_push` falls back to current branch. |\n| `pr` | `string \\| string[]` | No | Used by `pr_checkout`. Each item may be a PR number, branch name, or GitHub PR URL. Array form enables batching. Omitted means current branch PR. |\n| `force` | `boolean` | No | Used only by `pr_checkout`. Defaults to `false`; allows resetting an existing `pr-<number>` local branch to the PR head commit. |\n| `forceWithLease` | `boolean` | No | Used only by `pr_push`; passed through to git push. |\n| `title` | `string` | No | Used only by `pr_create`. Required unless `fill` is `true`. |\n| `body` | `string` | No | Used only by `pr_create`. Mutually exclusive with `fill`. Empty/omitted body becomes `--body \"\"` to suppress the interactive editor. Non-empty body is written to a temp file and passed as `--body-file`. |\n| `base` | `string` | No | Used only by `pr_create`; passed as `--base`. |\n| `head` | `string` | No | Used only by `pr_create`; passed as `--head`. |\n| `draft` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. |\n| `fill` | `boolean` | No | Used only by `pr_create`. Defaults to `false`. Mutually exclusive with `title` and `body`. |\n| `reviewer` | `string[]` | No | Used only by `pr_create`; each entry becomes `--reviewer`. |\n| `assignee` | `string[]` | No | Used only by `pr_create`; each entry becomes `--assignee`. |\n| `label` | `string[]` | No | Used only by `pr_create`; each entry becomes `--label`. |\n| `query` | `string` | No | Used by all `search_*` ops. Required by local validation only for `search_code`; the other search ops compose it with optional date/repo/type qualifiers and send the result to GitHub. |\n| `since` | `string` | No | Lower date bound for `search_issues`, `search_prs`, `search_commits`, and `search_repos`. Accepts relative durations (`3d`, `12h`, `2w`, `2mo`, `1y`), `YYYY-MM-DD`, or an ISO datetime. Rejected for `search_code`. |\n| `until` | `string` | No | Upper date bound for `search_issues`, `search_prs`, `search_commits`, and `search_repos`. Same formats as `since`. Rejected for `search_code`. |\n| `dateField` | `\"created\" \\| \"updated\"` | No | Date qualifier field for issue/PR/repo search. Defaults to `created`; repo search maps `updated` to GitHub's `pushed:` qualifier. Ignored for commit search, which always uses `committer-date:`. |\n| `limit` | `number` | No | Used by all `search_*` ops. Defaults to `10`, floored, clamped to `50`, and must be `> 0`. |\n| `run` | `string` | No | Used only by `run_watch`. Must be a numeric run ID or full GitHub Actions run URL. |\n| `tail` | `number` | No | Used only by `run_watch`. Defaults to `15`, floored, clamped to `200`, and must be `> 0`. |\n\n## Outputs\nThe tool returns a single text result built by `buildTextResult()` in `packages/coding-agent/src/tools/gh.ts`.\n\n- `content`: one text block. Multi-item ops join sections with blank lines and `---` separators.\n- `sourceUrl`: set for single repo/PR/run results when a canonical URL is known.\n- `details`: optional structured metadata used by the TUI renderer.\n - Common fields: `artifactId`, `repo`, `branch`, `worktreePath`, `remote`, `remoteBranch`, `headSha`, `runId`, `runIds`, `status`, `conclusion`, `failedJobs`.\n - `pr_checkout` adds `checkouts: GhPrCheckoutSummary[]`.\n - `run_watch` adds `watch: GhRunWatchViewDetails`, which drives the custom live/result renderer in `packages/coding-agent/src/tools/gh-renderer.ts`.\n- Artifact trailer: when `artifactId` is present, the text body gets an appended line like `Full failed-job logs: artifact://<id>`.\n - `run_watch` allocates artifacts with `session.allocateOutputArtifact(\"github\")`; persistent sessions therefore save failed-log bodies as `<artifact-dir>/<id>.github.log`.\n\n`run_watch` is the only streaming op. It emits `onUpdate` snapshots while polling, then returns one final text result.\n\n## Flow\n1. `GithubTool.createIf()` exposes the tool only when `git.github.available()` finds `gh` on `PATH`.\n2. `GithubTool.execute()` wraps dispatch in `untilAborted()` and switches on `params.op`.\n3. Each op normalizes optional strings, arrays, booleans, and numeric caps locally in `packages/coding-agent/src/tools/gh.ts`.\n4. CLI execution goes through `git.github.run/json/text()` in `packages/coding-agent/src/utils/git.ts`:\n - spawns `gh ...` with `Bun.spawn()`;\n - trims stdout/stderr unless `trimOutput: false`;\n - maps common auth/repo-context failures into tool-facing `ToolError` messages;\n - `json()` rejects empty or invalid JSON.\n5. Read-style ops (`repo_view`, `search_*`) fetch JSON and format Markdown-like text summaries. Single-issue and single-PR views were moved out of the tool and now resolve through the `issue://` / `pr://` internal URL schemes, which share the same SQLite cache.\n6. PR diffs moved out of the tool. `pr://<N>/diff` lists changed files, `pr://<N>/diff/<i>` slices a single file, and `pr://<N>/diff/all` returns the full unified diff — see `docs/tools/read.md`. All three variants share one `gh pr diff` invocation through the `pr-diff` cache row.\n7. `pr_checkout` resolves PR metadata first, then enters `git.withRepoLock()` before any git mutation so parallel checkout calls for the same primary repo do not race on shared `.git` state.\n8. `pr_push` reads PR head metadata back from git branch config, derives a refspec, then pushes with `git.push()`.\n9. `pr_create` shells out once, then best-effort re-reads the created PR for a richer summary.\n10. `run_watch` chooses either run mode (`run` supplied) or commit mode (`run` omitted), polls GitHub Actions APIs every 3 seconds, emits streaming updates, and may save a full failed-log artifact before returning.\n11. Final text goes through `toolResult().text(...)`; if `session.allocateOutputArtifact()` returns a slot, failed-log text is persisted with `Bun.write()`.\n\n## Modes / Variants\n\n### `repo_view`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch` |\n| `gh` command | `gh repo view [<repo>] [--branch <branch>] --json <GH_REPO_FIELDS>` |\n| Batching | None |\n| Output | `# <owner/repo>` header, description, URL, default branch, requested branch, visibility, permission, primary language, stars, forks, archive/fork flags, updated timestamp, homepage, topics. `sourceUrl = data.url`. |\n\nIf `repo` is omitted, `gh` repository resolution is used.\n\nSingle-issue and single-PR reads live in the `issue://<N>` / `pr://<N>` URL schemes (see `docs/tools/read.md`). They share `~/.omp/cache/github-cache.db` (override via `OMP_GITHUB_CACHE_DB`) and the `github.cache.softTtlSec` / `github.cache.hardTtlSec` / `github.cache.enabled` settings. The cache retains rendered Markdown plus the raw JSON payload returned by `gh`, including private bodies, comments, reviews, and review comments when comments are enabled; rows are scoped by the local GitHub credential fingerprint. Root and repo-scoped reads (`issue://`, `pr://owner/repo`) issue a live `gh issue list` / `gh pr list` for browsing; query params `state`, `limit`, `author`, `label` pass through to `gh` (`issue://` accepts `state=open|closed|all`; `pr://` also accepts `merged`). PR diffs ride the same cache under `pr://<N>/diff[/…]`: the listing, full diff, and per-file slices all share one `pr-diff` row keyed by repo and PR number.\n\n### `pr_create`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` plus either `fill=true` or `title` |\n| Optional fields | `repo`, `title`, `body`, `base`, `head`, `draft`, `fill`, `reviewer[]`, `assignee[]`, `label[]` |\n| `gh` command | `gh pr create ...` with flags assembled from provided fields |\n| Batching | None |\n| Output | `# Created Pull Request ...` summary with URL, state, draft flag, base/head, author, created time, labels, optional body. `sourceUrl` is the created PR URL. |\n\nBranches:\n- `fill && (title || body !== undefined)` throws.\n- Non-empty `body` is written under a temp dir `gh-pr-body-*` in `os.tmpdir()`, passed as `--body-file`, then removed in `finally`.\n- After creation, the tool parses the returned URL and best-effort runs `gh pr view <number> --repo <repo> --json <GH_PR_FIELDS_NO_COMMENTS>`; failures there are swallowed.\n\n### `pr_checkout`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `pr`, `force` |\n| `gh` command | For each requested PR: `gh pr view [<pr>] [--repo <repo>] --json <GH_PR_CHECKOUT_FIELDS>`; cross-repo PRs may also call `gh repo view <headRepository> --json <GH_REPO_CLONE_FIELDS>`. |\n| Batching | Yes. `pr` may be `string[]`; each PR is resolved in parallel, but git mutations are serialized per primary repo by `git.withRepoLock()`. |\n| Output | Single PR: checkout/worktree summary plus `details.repo`, `details.branch`, `details.worktreePath`, `details.remote`, `details.remoteBranch`, `details.checkouts`. Batched: `# <n> Pull Request Worktrees (...)` plus one section per PR and aggregated `details.checkouts`. |\n\nWorktree and metadata behavior:\n- Local branch name is always `pr-<number>`.\n- Worktree path is `path.join(getWorktreesDir(), encodeRepoPathForFilesystem(primaryRepoRoot), localBranch)`, where `getWorktreesDir()` is `~/.omp/wt`; effective path is `~/.omp/wt/<encoded-primary-repo-root>/pr-<number>`.\n- Existing worktree detection is by branch ref `refs/heads/pr-<number>` from `git.worktree.list()`.\n- New worktree creation calls `git.worktree.add(repoRoot, finalWorktreePath, localBranch, { signal })` after verifying the path is neither already registered nor already present on disk.\n- For same-repo PRs, remote is `origin`. For cross-repo PRs, the tool resolves a clone URL for the head repo, reuses an existing remote with the same URL when possible, or creates `fork-<owner>` / `fork-<owner>-<n>`.\n- The branch push metadata is persisted with `git config` under the repository's shared `.git/config` as:\n - `branch.pr-<number>.remote`\n - `branch.pr-<number>.merge`\n - `branch.pr-<number>.pushRemote`\n - `branch.pr-<number>.ompPrHeadRef`\n - `branch.pr-<number>.ompPrUrl`\n - `branch.pr-<number>.ompPrIsCrossRepository`\n - `branch.pr-<number>.ompPrMaintainerCanModify`\n- If `refs/heads/pr-<number>` already exists at a different commit, checkout fails unless `force=true`, in which case `git branch --force` resets it to the fetched PR head.\n- If a matching worktree already exists, the tool reuses it and reports `reused: true`.\n\n### `pr_push`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `branch`, `forceWithLease` |\n| `gh` command | None. This path uses git, not `gh`. |\n| Batching | None |\n| Output | `# Pushed Pull Request Branch` summary with local branch, remote, remote branch, remote URL, PR URL, and force-with-lease flag. `sourceUrl = prUrl` when known. |\n\nPush target resolution reads the `branch.<name>.ompPrHeadRef`, `pushRemote`/`remote`, `ompPrUrl`, `ompPrMaintainerCanModify`, and `ompPrIsCrossRepository` git-config keys written by `pr_checkout`. If the current checked-out branch matches the target branch, the source ref is `HEAD`; otherwise it pushes `refs/heads/<branch>`. The refspec is `HEAD:refs/heads/<headRef>` or `refs/heads/<branch>:refs/heads/<headRef>`.\n\n### `search_issues`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `query`, `limit`, `since`, `until`, `dateField` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [date qualifier] [repo:<repo>] is:issue\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub issues search`, echoed query, optional repo, result count, then one bullet per issue with repo/state/author/labels/timestamps/URL. |\n\n`repo` defaults to the current checkout's `owner/repo` via `resolveSearchRepoScope()` when omitted. The default is suppressed when the composed query already contains a leading `repo:`/`org:`/`user:`/`owner:` qualifier or when `gh repo view` fails to resolve the current checkout (e.g. outside a github remote).\n\n### `search_prs`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `query`, `limit`, `since`, `until`, `dateField` |\n| `gh` command | `gh api -X GET /search/issues -f q=\"<query> [date qualifier] [repo:<repo>] is:pr\" -F per_page=<limit>` |\n| Batching | None |\n| Output | Same shape as `search_issues`, labeled as pull requests. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_code`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op`, `query` |\n| Optional fields | `repo`, `limit` |\n| `gh` command | `gh api -X GET /search/code -f q=\"<query> [repo:<repo>]\" -F per_page=<limit> -H \"Accept: application/vnd.github.text-match+json\"` |\n| Batching | None |\n| Output | `# GitHub code search`, result count, then one bullet per match with path, repo, short commit SHA, URL, and first normalized text-match fragment line when present. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`. `since` and `until` are explicitly rejected for this op because GitHub code search has no supported date qualifier.\n\n### `search_commits`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `query`, `limit`, `since`, `until`, `dateField` (accepted but ignored; commit searches use `committer-date`) |\n| `gh` command | `gh api -X GET /search/commits -f q=\"<query> [committer-date qualifier] [repo:<repo>]\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub commits search`, result count, then one bullet per commit: short SHA + first commit-message line, repo, author, date, URL. |\n\n`repo` defaults to the current checkout's `owner/repo` as in `search_issues`.\n\n### `search_repos`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `query`, `limit`, `since`, `until`, `dateField` |\n| `gh` command | `gh api -X GET /search/repositories -f q=\"<query> [date qualifier]\" -F per_page=<limit>` |\n| Batching | None |\n| Output | `# GitHub repositories search`, result count, then one bullet per repo with first description line, language, stars, forks, open issues, visibility, archive/fork flags, updated time, URL. |\n\n`repo` is intentionally not used for this op. If `query`, `since`, and `until` are all omitted, the tool sends an empty GitHub repository-search query and the GitHub API may reject it.\n\n### `run_watch`\n\n| Aspect | Value |\n| --- | --- |\n| Required fields | `op` |\n| Optional fields | `repo`, `branch`, `run`, `tail` |\n| `gh` command | Repo resolution: `gh repo view --json nameWithOwner -q .nameWithOwner` when `repo` and run URL repo are both absent. Single-run mode uses `gh api --method GET /repos/<repo>/actions/runs/<runId>` and `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`. Commit mode uses `gh api --method GET /repos/<repo>/branches/<branch>`, `gh api --method GET /repos/<repo>/actions/runs`, `gh api --method GET /repos/<repo>/actions/runs/<runId>/jobs`, and `gh api /repos/<repo>/actions/jobs/<jobId>/logs` for failed jobs. |\n| Batching | Implicit batching only in commit mode: all workflow runs for one commit are tracked together. |\n| Output | Streaming watch snapshots via `onUpdate`, then a final text report. On failure, appends `Full failed-job logs: artifact://<id>` and sets `details.artifactId`. |\n\nWatch flow:\n- `run` parsing accepts either a decimal run ID or a full run URL. URL repo must match explicit `repo` when both are given.\n- Poll interval is fixed at 3 seconds (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Failure grace period is fixed at 5 seconds (`RUN_WATCH_GRACE_DEFAULT`). When any failed job appears before completion, the tool emits a note, waits once, re-fetches state, then collects logs so concurrent failures are included.\n- Failed-job logs are fetched with `gh api /repos/<repo>/actions/jobs/<jobId>/logs` via `git.github.run()`, not `json()`. Non-zero exit leaves `available: false` instead of failing the whole watch.\n- Inline result includes only the last `tail` lines per failed job. The saved artifact contains full logs (`mode: \"full\"`).\n- In commit mode, success is intentionally double-checked: once all known runs are successful, the tool waits one more poll interval and succeeds only if the set of run IDs is unchanged. This avoids returning before late workflow runs appear for the same commit.\n- `details.watch` drives a specialized renderer in `packages/coding-agent/src/tools/gh-renderer.ts`; non-watch results fall back to generic text rendering.\n\n## Side Effects\n- Filesystem\n - `pr_create` may create a temp dir under `os.tmpdir()` named `gh-pr-body-*`, write `body.md`, then remove the dir in `finally`.\n - `pr_checkout` may create directories under `~/.omp/wt/<encoded-primary-repo-root>/` and add git worktrees there.\n - `run_watch` may write a session artifact with full failed-job logs.\n- Network\n - Every op shells out to `gh`, which then talks to GitHub APIs except `pr_push`.\n - `pr_push` uses git network transport to the configured remote.\n- Subprocesses / native bindings\n - All `gh` calls use `Bun.spawn([\"gh\", ...args])`.\n - `pr_checkout` and `pr_push` also invoke git helpers from `packages/coding-agent/src/utils/git.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `run_watch` consumes `session.allocateOutputArtifact()` when failed-job logs are persisted.\n - Returned `details` objects carry run/checkouts metadata for the renderer/UI.\n- User-visible prompts / interactive UI\n - `gh` interactive editor fallback is suppressed for `pr_create` by forcing either `--body-file` or `--body \"\"`.\n - `gh-renderer` provides compact headers for all ops and a custom live watch view for `run_watch`.\n- Background work / cancellation\n - `run_watch` loops until success/failure and uses `scheduler.wait()` between polls.\n - `GithubTool.execute()` is wrapped in `untilAborted()`; `git.github.run()` forwards the abort signal into `Bun.spawn()`.\n\n## Limits & Caps\n- Search result default: `10` (`SEARCH_LIMIT_DEFAULT` in `packages/coding-agent/src/tools/gh.ts`).\n- Search result max: `50` (`SEARCH_LIMIT_MAX`).\n- PR file preview inside the `pr://` view: first `50` files only (`FILE_PREVIEW_LIMIT` in `gh.ts`).\n- Run-watch poll interval: `3s` (`RUN_WATCH_INTERVAL_DEFAULT`).\n- Run-watch failure grace period: `5s` (`RUN_WATCH_GRACE_DEFAULT`).\n- Run-watch failed-log tail default: `15` lines (`RUN_WATCH_TAIL_DEFAULT`).\n- Run-watch failed-log tail max: `200` lines (`RUN_WATCH_TAIL_MAX`).\n- PR review comments page size: `100` (`REVIEW_COMMENTS_PAGE_SIZE`).\n- Actions jobs page size: `100` (`RUN_JOBS_PAGE_SIZE`).\n- Search and tail numeric inputs are floored with `Math.floor()`, clamped to the max, and rejected when non-finite or `<= 0`.\n- `pr_checkout` batch fan-out is unbounded in tool code; all requested PRs are launched with `Promise.all()`.\n\n## Errors\n- Tool creation is skipped entirely when `gh` is not installed.\n- `git.github.run()` throws `ToolError(\"GitHub CLI (gh) is not installed...\")` if `gh` is missing at execution time.\n- `git.github.text/json()` map common failures to model-facing messages:\n - not authenticated → `GitHub CLI is not authenticated. Run \\`gh auth login\\`.`\n - missing repo context without explicit `repo` → `GitHub repository context is unavailable. Pass \\`repo\\` explicitly or run the tool inside a GitHub checkout.`\n - otherwise stderr/stdout text, or fallback `GitHub CLI command failed: gh ...`\n- `json()` also throws on empty stdout or invalid JSON.\n- Local validation errors throw `ToolError`, including:\n - missing required per-op fields (`query` for `search_code`, `title unless fill=true`)\n - invalid numeric `limit` / `tail`\n - invalid `since` / `until` date bound\n - invalid `run` format\n - `fill` combined with `title` or `body`\n - missing git repo / branch / HEAD context for checkout, push, or watch\n - `pr_push` on a branch without `ompPrHeadRef` metadata\n - conflicting existing worktree path or branch without `force`\n- `run_watch` treats failed-job log fetches specially: missing log content does not fail the watch; it marks that log `available: false` and prints `Log tail unavailable.` / `Full log unavailable.`.\n- `pr_create` swallows only the post-create best-effort `gh pr view` refresh; the create step itself still fails normally.\n\n## Notes\n- `appendRepoFlag()` intentionally skips `--repo` when the identifier argument is already a full GitHub URL; that lets `gh` derive repo/number from the URL.\n- `normalizePrIdentifierList()` accepts `reviewer`, `assignee`, and `label` arrays too; the helper name is broader than its callers.\n- `pr_push` depends on `pr_checkout` having run first for that local branch; there is no alternate metadata source.\n- `pr_checkout` stores push metadata in branch config, not in the worktree directory. Reusing the same `pr-<number>` branch reuses those config keys.\n- Worktree write serialization is keyed by the primary repo root, not the current worktree path, because git worktrees share `.git/config`, `packed-refs`, commit-graph, and worktree metadata files.\n- `search_repos` is the only search op that never forwards `repo`; repository scoping must be expressed in the query itself.\n- `run_watch` success on commit mode means “all observed runs succeeded and no additional runs appeared one poll later”, not merely “latest poll looked green”.\n- The TUI renderer collapses failed log previews unless the result view is expanded; the underlying text result still contains the same tailed lines plus any artifact reference.\n",
|
|
83
|
+
"tools/inspect_image.md": "# inspect_image\n\n> Send a local image file to a vision-capable model and return text analysis.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/inspect-image.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/inspect-image.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/inspect-image-renderer.ts` — TUI call/result rendering.\n - `packages/coding-agent/src/utils/image-loading.ts` — path resolution, type detection, size gate, optional resize.\n - `packages/coding-agent/src/utils/image-resize.ts` — downscale and recompress oversized images.\n - `packages/coding-agent/src/tools/path-utils.ts` — resolve input path relative to session cwd.\n - `packages/utils/src/mime.ts` — detect supported image formats from file bytes.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Image path passed to `loadImageInput`; resolved relative to `session.cwd` by `resolveReadPath(...)`. |\n| `question` | `string` | Yes | User prompt sent as a text content block alongside the image. |\n\n## Outputs\nThe tool returns a single `AgentToolResult`:\n\n- `content`: one text block, `[{ type: \"text\", text }]`, where `text` is the concatenated assistant text content from the model response.\n- `details`:\n - `model`: `<provider>/<id>` of the selected model.\n - `imagePath`: resolved filesystem path returned by `loadImageInput(...)`.\n - `mimeType`: MIME type actually sent to the model after optional resize/re-encode.\n\nModel-visible output is single-shot, not streamed by this tool.\n\nTUI rendering adds presentation-only truncation from `packages/coding-agent/src/tools/inspect-image-renderer.ts`:\n\n- call preview truncates `question` to 100 columns,\n- result view shows 4 lines collapsed or 16 lines expanded,\n- each rendered output line is truncated to 120 columns,\n- footer metadata shows `model · mimeType` when present.\n\n## Flow\n1. `InspectImageTool.execute(...)` rejects immediately if `images.blockImages` is enabled in session settings.\n2. It reads `session.modelRegistry`; missing registry, empty registry, missing API key, or unresolved model each raise `ToolError` from `packages/coding-agent/src/tools/inspect-image.ts`.\n3. Model selection tries, in order, `pi/vision`, `pi/default`, the active model string from the session, then `availableModels[0]`. `expandRoleAlias(...)` and `resolveModelFromString(...)` handle each lookup.\n4. The chosen model must advertise `input.includes(\"image\")`; otherwise execution fails before reading the file.\n5. `loadImageInput(...)` in `packages/coding-agent/src/utils/image-loading.ts` resolves the path with `resolveReadPath(...)`, detects MIME type with `readImageMetadata(...)`, and rejects files larger than `MAX_IMAGE_INPUT_BYTES` (`20 * 1024 * 1024`, 20 MiB) using `ImageInputTooLargeError`.\n6. `readImageMetadata(...)` in `packages/utils/src/mime.ts` inspects file headers only. Supported detected MIME types are `image/png`, `image/jpeg`, `image/gif`, and `image/webp`.\n7. If `images.autoResize` is true, `loadImageInput(...)` calls `resizeImage(...)`. Resize failures are swallowed there and the original bytes are kept.\n8. If MIME detection returned no supported image type, `execute(...)` throws `ToolError(\"inspect_image only supports PNG, JPEG, GIF, and WEBP files detected by file content.\")`.\n9. The tool calls `instrumentedCompleteSimple(...)` with one user message containing two content parts in order:\n - `{ type: \"image\", data: imageInput.data, mimeType: imageInput.mimeType }`\n - `{ type: \"text\", text: params.question }`\n10. `systemPrompt` is a one-element array rendered from `packages/coding-agent/src/prompts/tools/inspect-image-system.md`; telemetry is tagged with oneshot kind `inspect_image`.\n11. If the model response stop reason is `error` or `aborted`, the tool maps that to `ToolError`.\n12. `extractResponseText(...)` concatenates only `text` content blocks from the assistant message, trims the result, and fails if nothing remains.\n13. Success returns the text plus `details`; `inspectImageToolRenderer` formats the result for the TUI.\n\n## Modes / Variants\n- **Original image path**: `images.autoResize` disabled. The original file bytes are base64-encoded and sent with the detected MIME type.\n- **Auto-resized path**: `images.autoResize` enabled. `resizeImage(...)` may downscale and re-encode the image before upload.\n- **Unsupported image path**: file exists but header sniffing does not identify PNG/JPEG/GIF/WEBP. The tool returns a `ToolError` before any model call.\n- **Oversize image path**: file size exceeds 20 MiB before upload. The tool returns a `ToolError` before any model call.\n\n## Side Effects\n- Filesystem\n - Resolves and reads the target image from disk.\n - Stats the file once with `Bun.file(...).stat()` and reads it fully with `fs.readFile(...)`.\n- Network\n - Sends the final base64 image payload plus question text to the selected model through `instrumentedCompleteSimple(...)` / the configured simple completion implementation.\n- Session state\n - Reads session settings, active model preferences, cwd, and model registry.\n- Background work / cancellation\n - Passes the caller `AbortSignal` into `instrumentedCompleteSimple(...)` and the configured simple completion implementation.\n - Image preprocessing is local and not cancellation-aware in these helpers.\n\n## Limits & Caps\n- Supported detected input formats: `image/png`, `image/jpeg`, `image/gif`, `image/webp` (`SUPPORTED_IMAGE_MIME_TYPES` in `packages/utils/src/mime.ts`).\n- Metadata sniff cap: `DEFAULT_IMAGE_METADATA_HEADER_BYTES = 256 * 1024` bytes. Format detection only reads up to 256 KiB from the file header.\n- Upload input cap: `MAX_IMAGE_INPUT_BYTES = 20 * 1024 * 1024` bytes (20 MiB) in `packages/coding-agent/src/utils/image-loading.ts`.\n- Auto-resize defaults in `packages/coding-agent/src/utils/image-resize.ts`:\n - `maxWidth: 1568`\n - `maxHeight: 1568`\n - `maxBytes: 500 * 1024` bytes (500 KiB target)\n - `jpegQuality: 75`\n- Resize fast path: if the original image is already within `1568x1568` and within `maxBytes / 4` (125 KiB by default), `resizeImage(...)` returns the original bytes unchanged.\n- Resize quality ladder: after the first encode pass, lossy retries use qualities `[70, 60, 50, 40]`.\n- Resize dimension ladder: if quality reduction still misses the byte target, retries scale dimensions by `[1.0, 0.75, 0.5, 0.35, 0.25]` and stop if either dimension would fall below `100` pixels.\n- First resize pass encodes PNG, JPEG, and WebP, then keeps the smallest encoded buffer. Fallback passes encode JPEG and WebP only, again keeping the smaller output.\n- Renderer caps:\n - `INSPECT_QUESTION_PREVIEW_WIDTH = 100`\n - `INSPECT_OUTPUT_COLLAPSED_LINES = 4`\n - `INSPECT_OUTPUT_EXPANDED_LINES = 16`\n - `INSPECT_OUTPUT_LINE_WIDTH = 120`\n\n## Errors\n- Settings gate:\n - `Image submission is disabled by settings (images.blockImages=true). Disable it to use inspect_image.`\n- Model resolution / capability:\n - `Model registry is unavailable for inspect_image.`\n - `No models available for inspect_image.`\n - `Unable to resolve a model for inspect_image.`\n - `Resolved model <provider>/<id> does not support image input. Configure a vision-capable model for modelRoles.vision.`\n - `No API key available for <provider>/<id>. Configure credentials for this provider or choose another vision-capable model.`\n- Input file:\n - `Image file too large: <size> exceeds <limit> limit.` from `ImageInputTooLargeError`, remapped to `ToolError`.\n - `inspect_image only supports PNG, JPEG, GIF, and WEBP files detected by file content.` when header sniffing fails.\n- Model call:\n - `inspect_image request failed.` if the response stop reason is `error` without a provider message.\n - Provider `errorMessage` is passed through when present.\n - `inspect_image request aborted.` on aborted responses.\n - `inspect_image model returned no text output.` when the assistant message contains no text blocks after filtering.\n\nFailures surface as thrown `ToolError`s from `execute(...)`; the normal success return shape is not used for error reporting.\n\n## Notes\n- The tool schema is not marked strict in `InspectImageTool`; callers should still treat only `path` and `question` as supported inputs because the implementation reads no other fields.\n- The model-facing prompt path on disk is `packages/coding-agent/src/prompts/tools/inspect-image.md`; the assignment's underscore form does not exist.\n- Format support is based on file content, not filename extension. Renaming a non-image file to `.png` does not make it valid.\n- `resolveReadPath(...)` tries macOS-specific path variants: shell-unescaped spaces, AM/PM narrow no-break-space filenames, NFD normalization, and curly-quote variants.\n- `loadImageInput(...)` also computes `textNote`, `dimensionNote`, and final `bytes`, but `inspect_image` does not include those in tool output.\n- Auto-resize can change the MIME type sent to the model. A JPEG or GIF input may be uploaded as PNG, JPEG, or WebP depending on which encoder output is smallest.\n- If `resizeImage(...)` throws or cannot decode the image, `loadImageInput(...)` silently keeps the original base64 payload instead of failing.\n",
|
|
84
|
+
"tools/irc.md": "# irc\n\n> Send and receive messages between agents over a process-global mailbox bus.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/irc.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/irc.md`\n- Key collaborators:\n - `packages/coding-agent/src/irc/bus.ts` — process-global `IrcBus`: per-agent mailboxes, delivery, waiter matching.\n - `packages/coding-agent/src/registry/agent-registry.ts` — process-global agent directory and status.\n - `packages/coding-agent/src/registry/agent-lifecycle.ts` — revival of parked recipients on direct send.\n - `packages/coding-agent/src/session/agent-session.ts` — `deliverIrcMessage(...)`: recipient-side injection and wake turns.\n - `packages/coding-agent/src/prompts/system/irc-incoming.md` — incoming-message rendering for the recipient.\n - `packages/coding-agent/src/config/settings-schema.ts` — `irc.timeoutMs`.\n - `packages/coding-agent/src/modes/controllers/event-controller.ts` — renders IRC events into chat UI.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"send\" \\| \"wait\" \\| \"inbox\" \\| \"list\"` | Yes | Operation. |\n| `to` | `string` | `send` | Recipient agent id, or `\"all\"` for broadcast. Whitespace trimmed; self-send rejected. |\n| `message` | `string` | `send` | Message body. Empty-after-trim is rejected. |\n| `replyTo` | `string` | No | `send`: message id being answered. |\n| `await` | `boolean` | No | `send`: after delivery, block until the next message from that peer arrives (round-trip sugar). Invalid with `to: \"all\"`. |\n| `from` | `string` | No | `wait`: only accept a message from this agent id. |\n| `timeoutMs` | `number` | No | `wait` / `send await:true`: timeout in milliseconds; `0` waits indefinitely. Defaults to `irc.timeoutMs`. |\n| `peek` | `boolean` | No | `inbox`: list messages without consuming them. |\n\n## Outputs\n- Single-shot `AgentToolResult`; no streaming updates.\n- `content` is one text block:\n - `list`: `No other agents.` or `<n> peer(s):` bullets — `id [displayName · kind · status]` plus unread count, parent, and last-activity age; a footer notes that parked agents are revived automatically when messaged.\n - `send`: per-recipient delivery receipts (`injected` / `woken` / `revived` / `failed — <error>`); with `await: true`, the reply body or a clean no-reply timeout note.\n - `wait`: the consumed message as `[<msgId>] <from>: <body>` (with a reply-to tag), or `No message within <duration>.`\n - `inbox`: `Inbox empty.` or `<n> message(s):` bullets.\n- `details: IrcDetails`: `{ op, from?, to?, receipts?, waited?, inbox?, peers? }`. `waited` is `null` when a wait timed out; `receipts` carry `{ to, outcome, error? }`.\n\n## Flow\n1. `IrcTool.createIf` constructs the tool only when `isIrcEnabled` passes and the session has both an `AgentRegistry` and `getAgentId`. There is no `irc.enabled` setting: availability is derived — true for every subagent (`taskDepth > 0`; a parent always exists) and for any session that can still spawn subagents through the task tool. Only a top-level session with task spawning unavailable has no peers, hence no irc.\n2. `execute` resolves the registry and sender id; missing either returns a text error result instead of throwing.\n3. `op: \"list\"`: `registry.list()` minus self and minus `aborted` agents — `parked` peers ARE listed. Each row includes the unread count from `IrcBus.unreadCount(...)` and last activity.\n4. `op: \"send\"` validates `to`/`message`, rejects self-sends, and rejects `await` with `to: \"all\"`.\n5. Target resolution: broadcasts fan out to `registry.listVisibleTo(senderId)` (live peers only — `running`/`idle`; reviving every parked agent on a broadcast would be a stampede). Direct sends go through the bus unfiltered, so a parked recipient is revived.\n6. `IrcBus.send(...)` is fire-and-forget — it never blocks on the recipient generating anything. Delivery by recipient status:\n - `running` → message enqueued and injected as a non-interrupting aside at the recipient's next step boundary (`AgentSession.deliverIrcMessage`, rendered from `irc-incoming.md`, persisted as an `irc:incoming` custom message) — receipt `injected`;\n - `idle` (live session) → enqueued and a real turn is started — the message wakes the agent — receipt `woken`;\n - `parked` → `AgentLifecycleManager.global().ensureLive(to)` revives the session first, then the wake path — receipt `revived`;\n - resolution/revival failure → receipt `failed` with the error; other recipients still complete.\n7. `send` with `await: true` then calls `IrcBus.wait(senderId, { from: to }, timeoutMs, signal)` and appends the reply (or a no-reply note suggesting `inbox`/`wait`) to the result.\n8. `op: \"wait\"` blocks until a message for the caller (optionally filtered by `from`) arrives, consumes it, and returns it. Timeout returns a clean \"no message\" result, not an error.\n9. `op: \"inbox\"` drains pending messages (or peeks with `peek: true`) without blocking.\n10. Timeouts resolve as `params.timeoutMs ?? irc.timeoutMs`, normalized: `0` disables the timeout, negative/non-finite values fall back to the default `120_000`, positive values are truncated and clamped to ≥ 1 ms.\n\n## Modes / Variants\n- `list`: enumerate peers with status (`running`/`idle`/`parked`), unread counts, and last activity.\n- `send` direct: one exact peer id; wakes idle peers, revives parked ones.\n- `send` broadcast: `to: \"all\"` to every live peer; parked peers are skipped.\n- `send` + `await: true`: round-trip convenience — send, then wait for the next message from that peer. Replaces the old `awaitReply` auto-reply semantics without a fake reply.\n- `wait`: block for an incoming message, optionally filtered by sender.\n- `inbox`: non-blocking drain or peek.\n\n## Side Effects\n- Session state\n - Reads the process-global `AgentRegistry`; direct sends to parked agents revive their sessions through the lifecycle manager.\n - Persists `irc:incoming` custom messages into recipient history; replies are ordinary turns in the recipient's own session.\n - Waking an idle/parked recipient starts a real agent turn (model requests, tool use) in that recipient.\n- User-visible prompts / interactive UI\n - IRC events render as transcript cards in the TUI; the Agent Hub shows per-agent unread counts.\n- Background work / cancellation\n - `send` itself never blocks on reply generation; only `wait` (and `await: true`) blocks, bounded by the resolved timeout and the caller's `AbortSignal`.\n- Network\n - No IRC server connection. Woken recipients make their own model-provider calls as part of their turn.\n- Filesystem\n - No direct filesystem writes in the tool itself; recipient turns persist to their session JSONL as usual.\n\n## Limits & Caps\n- Availability gates: `isIrcEnabled` (running as a subagent, or task spawning available — there is no `irc.enabled` setting), an `AgentRegistry`, and a caller agent id.\n- Mailboxes are bounded at 100 messages per agent (`MAILBOX_CAP` in `packages/coding-agent/src/irc/bus.ts`); oldest messages are dropped beyond the cap.\n- `irc.timeoutMs` defaults to `120_000` and is the default `wait` / `send await:true` timeout; `0` disables the timeout, non-finite or negative values fall back to the default, positive values are truncated and clamped to at least `1` ms.\n- Broadcast scope: live peers only (`running`/`idle`) via `listVisibleTo`; direct sends address any non-aborted agent, including parked ones.\n\n## Errors\n- The tool returns text errors (with `isError: true`), not thrown exceptions, for:\n - missing registry: `IRC is unavailable in this session.`\n - missing sender id: `IRC is unavailable: caller has no agent id.`\n - missing `to` / `message` on `send`\n - self-send: `Cannot send an IRC message to yourself.`\n - `await` with `to: \"all\"`\n - unknown op\n- Per-recipient delivery failures surface as `failed` receipts with the error message; `send` is marked `isError` only when no recipient received the message.\n- `wait` timeout is a normal result (`waited: null`), not an error.\n\n## Notes\n- This is IRC-like naming only: no servers, sockets, channels, or join/part state. Addressing is by exact registry agent id.\n- Replies are real turns by the recipient — the old ephemeral no-tools auto-reply (`awaitReply` / `respondAsBackground`) no longer exists. A recipient may keep working before answering; check `inbox` or `wait` again rather than re-sending.\n- Wake-on-message is the only resume primitive: messaging a parked agent revives it (same `ensureLive` path as the Agent Hub). The task tool has no `resume` parameter.\n- Message ids are Snowflakes; pass them as `replyTo` to thread an answer to a specific message.\n- Persistence is per recipient history: the sender gets receipts in the tool result; the recipient sees the injected `irc:incoming` message in its own transcript (visible via `history://<id>`).\n",
|
|
85
|
+
"tools/job.md": "# job\n\n> Wait for or cancel background jobs managed by the session async runtime.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/job.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/job.md`\n- Key collaborators:\n - `packages/coding-agent/src/async/job-manager.ts` — job registry, cancellation, delivery suppression.\n - `packages/coding-agent/src/tools/bash.ts` — explicit async bash and auto-backgrounded bash jobs.\n - `packages/coding-agent/src/task/index.ts` — async task-job scheduling.\n - `packages/coding-agent/src/sdk.ts` — automatic follow-up delivery for unsuppressed completions.\n - `packages/coding-agent/src/config/settings-schema.ts` — `async.pollWaitDuration` options.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `poll` | `string[]` | No | Job ids to watch. Cannot be combined with `list`. If omitted (and `cancel` is also omitted), the tool watches all running jobs. If provided, missing ids are silently filtered out before waiting. |\n| `cancel` | `string[]` | No | Job ids to cancel before any polling. Missing ids are reported as `not_found`; non-running ids as `already_completed`. |\n| `list` | `boolean` | No | Return an immediate snapshot of every job spawned by the calling agent (running + completed within retention) without waiting. Read-only — cannot be combined with `poll` or `cancel`. |\n\n## Outputs\nThe tool returns one text block plus `details`.\n\n- `content[0].text`: markdown-like plain text sections assembled by `#buildResult(...)`:\n - `## Cancelled (N)` for cancel outcomes.\n - `## Completed (N)` for non-running jobs, including stored `resultText` and `errorText`.\n - `## Still Running (N)` for jobs still in `running`.\n- `details.jobs`: array of snapshots:\n - `id: string`\n - `type: \"bash\" | \"task\"`\n - `status: \"running\" | \"completed\" | \"failed\" | \"cancelled\"`\n - `label: string`\n - `durationMs: number`\n - optional `resultText`, `errorText`\n- `details.cancelled` appears only when `cancel` was passed; each item is `{ id, status }` where status is `\"cancelled\" | \"not_found\" | \"already_completed\"`.\n\nStreaming behavior:\n- During a polling wait, `execute(...)` emits `onUpdate(...)` every 500 ms with an empty text block and fresh `details.jobs` snapshots.\n- Final return is single-shot after a completion, timeout, abort, or immediate fast path.\n\nRead-only snapshot path:\n- Calling `job` with `list: true` returns a markdown summary of every job spawned by the calling agent (running + completed within retention) without waiting.\n\n## Flow\n1. `JobTool` is registered unconditionally in `packages/coding-agent/src/tools/index.ts`; there is no `async.enabled` gate (the `task` tool always schedules background jobs).\n2. `execute(...)` fetches `session.asyncJobManager`. If absent, it returns `Async execution is disabled; no background jobs are available.`\n3. `cancel` ids are processed first:\n - `manager.getJob(id)` missing → `not_found`.\n - existing job with `status !== \"running\"` → `already_completed`.\n - running job → `manager.cancel(id)`, which sets `job.status = \"cancelled\"`, aborts the controller, and schedules eviction.\n4. Polling mode is chosen with `const shouldPoll = requestedPollIds !== undefined || cancelIds.length === 0`:\n - only `cancel` present → return immediately, no wait.\n - explicit `poll`, or no args at all → proceed to watch jobs.\n5. Watch set resolution:\n - explicit `poll` → map ids through `manager.getJob(...)` and drop missing ones.\n - no `poll` and no `cancel` → `manager.getRunningJobs()`.\n6. Empty watch set returns immediately:\n - if cancellations happened, return snapshots for the cancelled ids that still exist.\n - else return either `No matching jobs found for IDs: ...` or `No running background jobs to wait for.`\n7. If every watched job is already non-running, `#buildResult(...)` returns immediately without waiting.\n8. Otherwise the tool waits on `Promise.race(...)` across:\n - every watched running job's `job.promise`,\n - a timeout promise for `async.pollWaitDuration`,\n - the tool-call abort signal when present.\n9. Before waiting, it calls `manager.watchJobs(watchedJobIds)`. This suppresses automatic completion delivery for those ids while they are being watched.\n10. If `onUpdate` exists, a 500 ms interval sends progress snapshots from `#snapshotJobs(...)`; one snapshot is emitted immediately before entering the race.\n11. In `finally`, the tool always calls `manager.unwatchJobs(...)`, clears the timeout, and stops the progress interval.\n12. `#buildResult(...)` deduplicates jobs, snapshots current manager state, then calls `manager.acknowledgeDeliveries(...)` for every non-running job in the result. That suppresses later automatic follow-up delivery for the same completions and removes queued deliveries for those ids.\n13. The final text groups jobs by non-running vs still-running state. A timeout is not an error path; it simply returns the current snapshot.\n\n## Modes / Variants\n- Poll all running jobs: call with neither `poll` nor `cancel`.\n- Poll explicit ids: call with `poll` only.\n- Cancel only: call with `cancel` only; cancellations happen and the tool returns immediately.\n- Cancel then poll: call with both. Cancellations are applied first, then the tool watches the remaining resolved `poll` ids.\n- Read-only inspection: call with `list: true` for the same snapshot data without waiting on completion.\n\nSpawn paths that produce jobs:\n- `packages/coding-agent/src/tools/bash.ts`\n - `async: true` always registers a `type: \"bash\"` job with `AsyncJobManager.register(...)` and returns a start message.\n - auto-background mode (`bash.autoBackground.enabled`) starts the same managed job path for non-PTY commands, waits up to `min(bash.autoBackground.thresholdMs, timeoutMs - 1000)`, and if the command is still running returns a background-job start result instead of inline command output.\n- `packages/coding-agent/src/task/index.ts`\n - every `task` call registers one `type: \"task\"` job, unless the session has no job manager or the agent definition declares `blocking: true` (sync fallback).\n\nLifecycle and exact state names:\n- Conceptual scheduling path: `pending` (only task-progress bookkeeping before work starts) → `running` → `completed` / `failed`; cancellation changes a running async job to `cancelled`.\n- Exact `AsyncJob.status` values in `packages/coding-agent/src/async/job-manager.ts`: `\"running\" | \"completed\" | \"failed\" | \"cancelled\"`.\n- Exact per-task progress values in `packages/coding-agent/src/task/types.ts`: `\"pending\" | \"running\" | \"completed\" | \"failed\" | \"aborted\"`.\n\n## Side Effects\n- Filesystem\n - None in `job.ts` itself.\n - Jobs being observed may already have written artifacts/results through their own tool runtimes.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads and mutates `session.asyncJobManager` state.\n - `watchJobs(...)` / `unwatchJobs(...)` toggle delivery suppression for the watched ids.\n - `acknowledgeDeliveries(...)` marks completed ids as suppressed and removes queued deliveries for them.\n - `cancel(...)` aborts running jobs through each job's `AbortController`.\n- User-visible prompts / interactive UI\n - Polling emits periodic `onUpdate` snapshots every 500 ms.\n - Automatic job completion follow-ups are generated by `packages/coding-agent/src/sdk.ts` only for unsuppressed deliveries.\n- Background work / cancellation\n - Waiting uses a timeout plus optional tool-call abort signal.\n - Cancelling a job does not synchronously await teardown; it flips state, aborts, and returns control to the manager/job promise.\n\n## Limits & Caps\n- Poll wait duration comes from `async.pollWaitDuration` in `packages/coding-agent/src/config/settings-schema.ts`:\n - allowed values: `5s`, `10s`, `30s`, `1m`, `5m`\n - default: `30s`\n- Progress update cadence while polling: `PROGRESS_INTERVAL_MS = 500` in `packages/coding-agent/src/tools/job.ts`.\n- Async job retention default: `DEFAULT_RETENTION_MS = 5 * 60 * 1000` in `packages/coding-agent/src/async/job-manager.ts`.\n- Manager fallback max-running limit: `DEFAULT_MAX_RUNNING_JOBS = 15` in `packages/coding-agent/src/async/job-manager.ts`.\n- Session wiring clamps `async.maxJobs` to `1..100` before constructing the manager in `packages/coding-agent/src/sdk.ts`; settings default is `100` in `packages/coding-agent/src/config/settings-schema.ts`.\n- Async completion delivery retry backoff in `packages/coding-agent/src/async/job-manager.ts`:\n - base `500` ms\n - max `30_000` ms\n - jitter `< 200` ms\n - exponent capped at 8 doublings\n\n## Errors\n- Tool-disabled path is returned as normal text, not thrown: `Async execution is disabled; no background jobs are available.`\n- Polling a nonexistent id is not an exception:\n - with `poll` only, missing ids are dropped; if none remain the tool returns `No matching jobs found for IDs: ...`.\n - with `cancel`, each missing id is reported as `not_found` in `details.cancelled` and text.\n- Cancelling a non-running job is not an exception; it reports `already_completed` even if the actual status is `completed`, `failed`, or `cancelled`.\n- Tool-call abort during polling stops waiting and returns a final snapshot through `#buildResult(...)`; it does not cancel watched jobs.\n- Failures inside the underlying async work are stored on the job (`status: \"failed\"`, `errorText`) and reported in normal tool output, not rethrown by `job`.\n- Calling `list: true` against an empty manager returns a normal empty-list result rather than throwing; missing ids passed to `poll` are silently filtered.\n\n## Notes\n- `job` waits for the first watched running job to settle, not for all watched jobs. If others remain `running`, they are reported under `## Still Running`; the caller must invoke `job` again to continue waiting.\n- Delivery suppression is the key difference between snapshot and automatic delivery:\n - snapshots (`job` calls with `poll` or `list: true`) read current manager state;\n - follow-up delivery comes from `AsyncJobManager.#enqueueDelivery(...)` and `sdk.ts` `onJobComplete`;\n - watched or acknowledged ids are suppressed via `isDeliverySuppressed(...)`.\n- `manager.cancel(id)` sets `status = \"cancelled\"` before the underlying promise settles. The job function may later populate `resultText` or `errorText`; `job-manager.ts` preserves that text but does not transition the status away from `cancelled`.\n- Retention eviction removes the job record, suppression flags, and watch flag together. After eviction, both `job` calls and `list: true` snapshots behave as if the id never existed.\n",
|
|
86
|
+
"tools/lsp.md": "# lsp\n\n> Query language servers for diagnostics, navigation, symbols, renames, code actions, capabilities, and raw requests.\n\n## Source\n- Entry: `packages/coding-agent/src/lsp/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/lsp.md`\n- Key collaborators:\n - `packages/coding-agent/src/lsp/client.ts` — client process lifecycle and JSON-RPC\n - `packages/coding-agent/src/lsp/config.ts` — config loading, auto-detect, server selection\n - `packages/coding-agent/src/lsp/lspmux.ts` — optional `lspmux` command wrapping\n - `packages/coding-agent/src/lsp/edits.ts` — apply `WorkspaceEdit` and text edits\n - `packages/coding-agent/src/lsp/utils.ts` — URI conversion, symbol resolution, formatting, glob expansion\n - `packages/coding-agent/src/lsp/types.ts` — tool schema and protocol types\n - `packages/coding-agent/src/lsp/clients/index.ts` — custom linter client cache/factory\n - `packages/coding-agent/src/lsp/clients/lsp-linter-client.ts` — LSP-backed linter adapter\n - `packages/coding-agent/src/lsp/clients/biome-client.ts` — Biome CLI diagnostics/formatting adapter\n - `packages/coding-agent/src/lsp/clients/swiftlint-client.ts` — SwiftLint CLI diagnostics adapter\n - `packages/coding-agent/src/tools/index.ts` — tool registration and `lsp.enabled` gating\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout defaults and clamping\n - `packages/coding-agent/src/lsp/defaults.json` — built-in server definitions for auto-detect\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | string enum | Yes | One of `diagnostics`, `definition`, `references`, `hover`, `symbols`, `rename`, `rename_file`, `code_actions`, `type_definition`, `implementation`, `status`, `reload`, `capabilities`, `request`. |\n| `file` | string | No | File path; for `diagnostics` also a glob; for workspace forms use `\"*\"`; for `rename_file` this is the source path. |\n| `line` | number | No | 1-indexed line number for position-based actions. Defaults to `1` on the single-file action path. |\n| `symbol` | string | No | Substring used to resolve the column on `line`. Supports `name#N` occurrence selectors; `N` is 1-indexed and defaults to `1`. |\n| `query` | string | No | Workspace symbol query, code-action selector/filter, or LSP method name for `action=request`. |\n| `new_name` | string | No | Required for `rename` and `rename_file`. |\n| `apply` | boolean | No | For `rename`/`rename_file`, apply unless explicitly `false`. For `code_actions`, list unless explicitly `true`. |\n| `timeout` | number | No | Seconds, clamped by `clampTimeout(\"lsp\", ...)` to `5..60`, default `20`. |\n| `payload` | string | No | JSON string for `action=request`; overrides auto-built params. |\n\n## Outputs\n- Single-shot `AgentToolResult`.\n- `content` is always one text block: `[{ type: \"text\", text: string }]`.\n- `details` is `LspToolDetails`: `action`, `success`, optional `serverName`, optional original `request`.\n- No streaming updates.\n- No artifact URIs or background jobs.\n- Many validation failures are returned as ordinary text results with `details.success: false`; aborts throw `ToolAbortError` instead.\n\n## Flow\n1. `packages/coding-agent/src/tools/index.ts` registers `lsp: LspTool.createIf`; session creation also gates it behind `session.enableLsp !== false` and `settings.get(\"lsp.enabled\")`.\n2. `LspTool.execute()` in `packages/coding-agent/src/lsp/index.ts` clamps `timeout` with `clampTimeout(\"lsp\", ...)`, builds an `AbortSignal.timeout(...)`, and combines it with the caller signal.\n3. `getConfig()` loads and caches `LspConfig` per cwd, applies idle-timeout config via `setIdleTimeout()`, and reuses the cached config on later calls.\n4. Config loading in `packages/coding-agent/src/lsp/config.ts` merges `defaults.json` with JSON/YAML overrides from project, project config dirs, user config dirs, plugin roots, and home; if there are no overrides it auto-detects servers from root markers plus executable discovery.\n5. Server routing uses `getServersForFile()` / `getServerForFile()` from `config.ts`: extension or basename match, then sort primary servers before linters. `index.ts` further filters custom linter clients out of navigation/refactor paths with `getLspServersForFile()` / `getLspServerForFile()`.\n6. `getOrCreateClient()` in `client.ts` creates one process per `command:cwd`, optionally wraps supported commands with `lspmux`, spawns the server, starts the background message reader, sends `initialize`, stores server capabilities, then sends `initialized`.\n7. The message reader in `client.ts` parses LSP frames, resolves pending requests, caches `publishDiagnostics`, tracks `$/progress` tokens for project-load completion, answers `workspace/configuration`, and applies `workspace/applyEdit` requests through `applyWorkspaceEdit()`.\n8. File-scoped actions call `ensureFileOpen()` before requests. Column resolution uses `resolveSymbolColumn()` from `utils.ts`: read the target file, pick first non-whitespace when `symbol` is omitted, otherwise find the exact or case-insensitive match on the target line and honor `#N` occurrence selectors.\n9. Actions dispatch in `LspTool.execute()` through dedicated branches: workspace-only branches (`status`, some `diagnostics`, workspace `symbols`, workspace `reload`, `capabilities`, `request`) run before the single-file switch; all other single-file actions share one client lookup and `switch(action)`.\n10. Requests go through `sendRequest()` in `client.ts`, which allocates an incrementing JSON-RPC id, installs abort and timeout handling, sends `$/cancelRequest` on abort, and rejects on timeout or process exit.\n11. Actions that return edits either preview with `formatWorkspaceEdit()` or apply with `applyWorkspaceEdit()` from `edits.ts`; `rename_file` also performs the filesystem rename and then sends `workspace/didRenameFiles`.\n12. Non-abort failures inside the single-file action block are converted to `LSP error: ...`; many precondition failures return explicit text without throwing.\n\n## Modes / Variants\n### Routing and workspace scope\n- `file: \"*\"` is only special for `diagnostics`, `symbols`, and `reload`.\n- `status` ignores `file`.\n- `capabilities` with omitted `file` or `\"*\"` inspects all non-custom LSP servers; with a concrete file it scopes to matching non-custom servers.\n- `request` with omitted `file` or `\"*\"` chooses the first available non-custom LSP server; with a concrete file it chooses that file's primary non-linter server.\n- `rename_file` sends `workspace/willRenameFiles` and `workspace/didRenameFiles` to every non-custom LSP server from `getLspServers(config)`, not just one file-scoped server.\n- Diagnostics are the only tool action that queries both normal LSP servers and custom linter clients (`BiomeClient`, `SwiftLintClient`, or `LspLinterClient`).\n\n### `diagnostics`\n**Inputs**\n- Required: `file`, unless using workspace mode with `file: \"*\"`.\n- Optional: `timeout`.\n\n**Execution**\n- `file: \"*\"`: `runWorkspaceDiagnostics()` detects project type from root markers and runs one subprocess command: Rust `cargo check --message-format=short`, TypeScript `npx tsc --noEmit`, Go `go build ./...`, Python `pyright`.\n- Concrete file or glob: `resolveDiagnosticTargets()` treats non-globs as one target, otherwise expands a `Bun.Glob` up to `MAX_GLOB_DIAGNOSTIC_TARGETS`.\n- Per file, every matching server runs: custom clients call `lint(file)`; real LSP servers optionally wait for project load, capture `diagnosticsVersion`, `refreshFile()`, then `waitForDiagnostics()` for fresh `publishDiagnostics` (settles on the latest publish; exact-version match accepted immediately).\n- Results are deduplicated by range+message and severity-sorted.\n\n**Output text**\n- Single target with no issues: `OK`.\n- Single target with issues: `<summary>:\\n<grouped diagnostics>`.\n- Batch/glob target: one section per file, plus an initial truncation warning when the glob exceeds the file cap.\n- Workspace mode: `Workspace diagnostics (<detected description>):\\n<command output>`.\n\n### `definition`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/definition` with `{ textDocument, position }`.\n- Accepts `Location`, `Location[]`, `LocationLink`, or `LocationLink[]`; `normalizeLocationResult()` converts `LocationLink` to `targetSelectionRange ?? targetRange`.\n- Waits for project load before the request.\n\n**Output text**\n- `No definition found` or `Found N definition(s):` followed by `file:line:col` and one context line above/below each location.\n\n### `type_definition`\nSame as `definition`, but sends `textDocument/typeDefinition` and reports `type definition(s)`.\n\n### `implementation`\nSame as `definition`, but sends `textDocument/implementation` and reports `implementation(s)`.\n\n### `references`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/references` with `includeDeclaration: true`.\n- For project-aware servers, retries up to `REFERENCES_RETRY_COUNT` times when the only hit is the queried declaration; between retries it waits for project load and sleeps `REFERENCES_RETRY_DELAY_MS`.\n- First `REFERENCE_CONTEXT_LIMIT` references include surrounding context; the rest are location-only.\n\n**Output text**\n- `No references found` or `Found N reference(s):` with contextual entries first, then `... M additional reference(s) shown without context` when truncated.\n\n### `hover`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/hover`.\n- `extractHoverText()` flattens strings, markup content, marked-string objects, or arrays into plain text.\n\n**Output text**\n- `No hover information` or the extracted hover text.\n\n### `symbols`\n**Inputs**\n- Workspace mode: `file: \"*\"` or omitted file on the early workspace branch, plus required `query`.\n- Document mode: required `file`.\n- Optional: `timeout`.\n\n**Execution**\n- Workspace mode sends `workspace/symbol` to every non-custom LSP server, post-filters matches with `filterWorkspaceSymbols()`, deduplicates with `dedupeWorkspaceSymbols()`, then truncates to `WORKSPACE_SYMBOL_LIMIT`.\n- Document mode sends `textDocument/documentSymbol` to the primary server. If the first item has `selectionRange`, it formats hierarchical `DocumentSymbol`s; otherwise it formats flat `SymbolInformation`s.\n\n**Output text**\n- Workspace mode: `Found N symbol(s) matching \"query\":` plus formatted `name @ file:line:col`, with an omission line when over the limit.\n- Document mode: `Symbols in <file>:` plus hierarchical or flat symbol lines.\n\n### `rename`\n**Inputs**\n- Required: `file`, `new_name`.\n- Optional: `line`, `symbol`, `apply`, `timeout`.\n\n**Execution**\n- Waits for project load, sends `textDocument/rename`, receives a `WorkspaceEdit`.\n- `apply !== false` applies edits immediately with `applyWorkspaceEdit()`.\n- `apply === false` renders a preview with `formatWorkspaceEdit()`.\n\n**Output text**\n- `Rename returned no edits`, `Applied rename:` plus applied change lines, or `Rename preview:` plus summarized edits.\n\n### `rename_file`\n**Inputs**\n- Required: `file` source path, `new_name` destination path.\n- Optional: `apply`, `timeout`.\n\n**Execution**\n- Resolves absolute source and destination, rejects identical paths, missing source, existing destination, empty rename set, or directories with more than `MAX_RENAME_PAIRS` files.\n- `enumerateRenamePairs()` returns one `{oldUri,newUri}` pair for a file or walks every regular file in a directory tree.\n- Sends `workspace/willRenameFiles` with `{ files: pairs }` to every non-custom LSP server; collects returned `WorkspaceEdit`s and server notes.\n- Preview mode (`apply === false`) only formats those edits.\n- Apply mode runs each returned `WorkspaceEdit`, renames the source path on disk, sends `textDocument/didClose` for every renamed open file, deletes those `openFiles` entries, then sends `workspace/didRenameFiles`.\n\n**Output text**\n- Preview: `Rename preview: <file-count label> → <dest>` plus per-server edit summaries and optional server notes.\n- Apply: `Renamed <file-count label> → <dest>` plus applied edit summaries, filesystem rename line, and optional server notes.\n\n### `code_actions`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `query`, `apply`, `timeout`.\n\n**Execution**\n- Reads cached diagnostics for the open URI from `client.diagnostics` and sends `textDocument/codeAction` for a zero-width range at the resolved position.\n- When `apply !== true`, `query` is passed as `context.only: [query]`; this is a server-side kind filter.\n- When `apply === true`, `query` becomes a required client-side selector: either a zero-based numeric index or a case-insensitive substring of the action title.\n- Applying a `CodeAction` uses `applyCodeAction()`: optionally `codeAction/resolve`, then `applyWorkspaceEdit(edit)`, then optional `workspace/executeCommand`.\n- Applying a bare `Command` only runs `workspace/executeCommand`.\n\n**Output text**\n- List mode: `N code action(s):` plus `index: [kind] title` lines.\n- Apply mode success: `Applied \"title\":` plus `Workspace edit:` and/or `Executed command(s):` sections.\n- Apply mode miss: `No code action matches \"query\". Available actions:`.\n- Apply mode with no edit/command: `Action \"title\" has no workspace edit or command to apply`.\n\n### `status`\n**Inputs**\n- None.\n\n**Execution**\n- Reads configured servers from cached `LspConfig`, not `getActiveClients()`.\n- Calls `detectLspmux()` and appends status text when `lspmux` is installed.\n\n**Output text**\n- `Active language servers: ...` or `No language servers configured for this project`, optionally followed by `lspmux: active (multiplexing enabled)` or `lspmux: installed but server not running`.\n\n### `reload`\n**Inputs**\n- Workspace mode: `file: \"*\"` or omitted `file`.\n- Single-file mode: required `file`.\n- Optional: `timeout`.\n\n**Execution**\n- Workspace mode reloads every non-custom LSP server.\n- Single-file mode reloads the primary server for that file.\n- `reloadServer()` tries `rust-analyzer/reloadWorkspace`, then `workspace/didChangeConfiguration` with `{ settings: {} }`; if neither works it kills the process so the next request cold-starts a new client.\n\n**Output text**\n- One line per server: `Reloaded <server>`, `Restarted <server>`, or `Failed to reload <server>: ...`.\n\n### `capabilities`\n**Inputs**\n- Optional: `file`, `timeout`.\n\n**Execution**\n- With a concrete `file`, inspects matching non-custom servers for that file.\n- With omitted `file` or `\"*\"`, inspects every non-custom configured server.\n- Starts servers as needed and dumps `client.serverCapabilities ?? {}` as pretty JSON.\n\n**Output text**\n- Per server: `<server>:` followed by indented `capabilities: { ... }`, or `<server>: failed to start (...)`.\n\n### `request`\n**Inputs**\n- Required: `query` method name.\n- Optional: `file`, `line`, `symbol`, `payload`, `timeout`.\n\n**Execution**\n- Chooses one non-custom server: file-scoped primary server, otherwise the first configured non-custom server.\n- Param building precedence:\n 1. If `payload` is present, parse JSON and use it verbatim.\n 2. Else if `file` is concrete and `line` is present, build `{ textDocument: { uri }, position: { line: line - 1, character } }` using `resolveSymbolColumn()`.\n 3. Else if `file` is concrete, build `{ textDocument: { uri } }`.\n 4. Else use `{}`.\n- Opens the file first when `file` is concrete.\n\n**Output text**\n- Success: `<server> ← <method>:\\n<formatted result>`, where non-string results are `JSON.stringify(..., null, 2)` and nullish values become `null`.\n- Failure: `LSP error from <server> on <method>: ...`.\n\n## Side Effects\n- Filesystem\n - Reads config files, target files, and root markers.\n - `rename` and `code_actions` may edit/create/delete/rename files via `applyWorkspaceEdit()`.\n - `rename_file` always renames the source path on disk in apply mode.\n - Server-initiated `workspace/applyEdit` requests also mutate files through `applyWorkspaceEdit()`.\n- Network\n - None directly; communication is local stdio JSON-RPC to subprocesses.\n- Subprocesses / native bindings\n - Spawns language servers with `ptree.spawn()`.\n - Workspace diagnostics spawns `cargo`, `npx`, `go`, or `pyright`.\n - `BiomeClient` and `SwiftLintClient` spawn CLI tools.\n - Optional `lspmux` detection spawns `lspmux status`; supported servers may be wrapped through `lspmux client`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Caches config per cwd in `configCache`.\n - Caches LSP clients per `command:cwd`, with `pendingRequests`, `diagnostics`, `openFiles`, `serverCapabilities`, and project-load state.\n - Caches custom linter clients by `serverName:cwd`.\n - Updates client `lastActivity`; optional idle-timeout cleanup is driven by `setIdleTimeout()`.\n- Background work / cancellation\n - Every request has an abortable timeout signal.\n - Aborting an in-flight LSP request sends `$/cancelRequest`.\n - Background message readers persist for each live client until process exit/shutdown.\n\n## Limits & Caps\n- Tool timeout clamp: default `20`, min `5`, max `60` seconds — `TOOL_TIMEOUTS.lsp` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- LSP request default timeout inside `sendRequest()`: `30_000ms` — `DEFAULT_REQUEST_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Warmup initialize timeout default: `5_000ms` — `WARMUP_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Project-load wait fallback: `15_000ms` — `PROJECT_LOAD_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Idle-client sweep interval when enabled: `60_000ms` — `IDLE_CHECK_INTERVAL_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Diagnostic message output cap: first `50` messages — `DIAGNOSTIC_MESSAGE_LIMIT` in `packages/coding-agent/src/lsp/index.ts`.\n- Single-file diagnostics wait: `3_000ms` — `SINGLE_DIAGNOSTICS_WAIT_TIMEOUT_MS`.\n- Batch/glob diagnostics wait per file: `400ms` — `BATCH_DIAGNOSTICS_WAIT_TIMEOUT_MS`.\n- Glob diagnostic target cap: first `20` matches — `MAX_GLOB_DIAGNOSTIC_TARGETS`.\n- Workspace symbol cap: first `200` entries — `WORKSPACE_SYMBOL_LIMIT`.\n- Reference context cap: first `50` references include source context — `REFERENCE_CONTEXT_LIMIT`.\n- References retry count: `2` retries, `250ms` backoff — `REFERENCES_RETRY_COUNT`, `REFERENCES_RETRY_DELAY_MS`.\n- Directory rename cap: `1_000` file pairs — `MAX_RENAME_PAIRS`.\n- `detectLspmux()` state cache TTL: `5 * 60 * 1000ms`; liveness check timeout: `1_000ms` — `STATE_CACHE_TTL_MS`, `LIVENESS_TIMEOUT_MS` in `packages/coding-agent/src/lsp/lspmux.ts`.\n- Workspace diagnostics output cap: first `50` lines from the subprocess.\n\n## Errors\n- Missing or invalid inputs are usually returned as text with `details.success: false`, not thrown:\n - missing `file`/`query`/`new_name`\n - invalid JSON in `payload`\n - no matching server\n - invalid `rename_file` source/destination conditions\n- `resolveSymbolColumn()` throws explicit errors for missing files, missing symbols, and out-of-bounds `#N` selectors; these surface as `LSP error: ...` or request-specific error text.\n- `sendRequest()` rejects on timeout with `LSP request <method> timed out after <ms>ms`.\n- Client process exit rejects all pending requests with an exit-code/stderr error assembled in `getOrCreateClient()`.\n- Single-file action failures inside the main `try` become `LSP error: <message>`.\n- `request` has its own error envelope: `LSP error from <server> on <method>: <message>`.\n- Some server failures are intentionally softened:\n - diagnostics continue when one server fails\n - `rename_file` suppresses `workspace/willRenameFiles` “method not found” errors and records other server errors as notes\n - `code_actions` ignores `codeAction/resolve` failures and applies unresolved actions when possible\n- Aborts are not converted to text: `ToolAbortError` is rethrown.\n\n## Notes\n- `status` reports configured/available servers from `LspConfig`, not currently active client processes from `getActiveClients()`.\n- `getLspServerForFile()` excludes `createClient` adapters and linter-only servers; navigation/refactor actions never target Biome/SwiftLint custom clients.\n- `getServersForFile()` matches both file extensions and exact basenames from `fileTypes`; config can target names like `Dockerfile` if present.\n- `symbol` matching is exact first, then case-insensitive, and falls back to the Nth occurrence on the specified line only; it never scans other lines.\n- `code_actions` uses `query` in two different ways: server-side `context.only` filter in list mode, client-side title/index selector in apply mode.\n- `rename` and `rename_file` default to apply. Preview requires `apply: false`.\n- `request` with `file: \"*\"` is treated the same as omitted `file`: it does not build workspace-specific params.\n- `reload` does not recreate a client immediately after killing it; the next request triggers reinitialization.\n- `workspace/applyEdit` can apply edits initiated by the server outside the direct tool action result path.\n- `detectLspmux()` can be disabled with `PI_DISABLE_LSPMUX=1`; only `rust-analyzer` is in `DEFAULT_SUPPORTED_SERVERS`.\n- Startup LSP discovery (`discoverStartupLspServers(cwd)` in `sdk.ts`) runs for `enableLsp && options.hasUI`; the background warmup additionally requires `!settings.get(\"lsp.lazy\")`. `lsp.lazy` defaults to `true`, so by default discovered servers are surfaced with status `\"available\"` (gray dot in the welcome screen) and cold-start through `getOrCreateClient()` on first use (lsp tool call or edit/write on a matching file type). Print/RPC/ACP/script sessions skip discovery and warmup entirely. See `docs/sdk.md` § Startup performance.\n- `configCache` is per-process and never auto-invalidated; config changes require a fresh process to be observed by `getConfig()` callers.",
|
|
87
|
+
"tools/read.md": "# read\n\n> Read files, directories, archives, SQLite databases, internal resources, images, documents, and URLs through one `path` string.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/read.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/read.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — split `path` from trailing selectors; normalize local paths.\n - `packages/coding-agent/src/tools/archive-reader.ts` — detect `archive.ext:inner/path`, index archives, list/read entries.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite targets, parse selectors, render tables.\n - `packages/coding-agent/src/tools/fetch.ts` — URL parsing, fetch/render pipeline, URL cache/artifacts.\n - `packages/coding-agent/src/internal-urls/router.ts` — resolve `agent://`, `artifact://`, `history://`, `issue://`, `local://`, `mcp://`, `memory://`, `omp://`, `pr://`, `rule://`, `skill://`, and `vault://`.\n - `packages/coding-agent/src/edit/notebook.ts` — convert `.ipynb` to editable `# %% [...] cell:N` text.\n - `packages/coding-agent/src/utils/file-display-mode.ts` — decide hashline vs line-number vs raw display.\n - `packages/coding-agent/src/workspace-tree.ts` — render directory trees.\n - `packages/coding-agent/src/edit/file-snapshot-store.ts` — stores read lines for later hashline edit verification/recovery.\n - `packages/coding-agent/src/tools/index.ts` — registers `read: s => new ReadTool(s)`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Filesystem path, internal URL, or web URL. May end with a trailing selector such as `:50-100` or `:raw`. |\n\n### Selector grammar\n\nFor normal file-like reads, `splitPathAndSel()` in `packages/coding-agent/src/tools/path-utils.ts` recognizes the final suffix only when it matches one of these forms:\n\n| Suffix | Meaning |\n| --- | --- |\n| `:raw` | Raw/verbatim mode. Disables structural summaries and line prefixes. |\n| `:conflicts` | Render unresolved Git merge-conflict regions for a local file. |\n| `:N` / `:LN` / `:N-` | Start at 1-indexed line `N`, open-ended. |\n| `:A-B` / `:LA-LB` | Inclusive 1-indexed line range. |\n| `:A+C` / `:LA+LC` | `C` lines starting at `A`; tool converts this to end line `A + C - 1`. |\n| `:R1,R2,...` | Multiple ranges, sorted and merged before reading (for example `:5-16,960-973`). |\n| `:range:raw` or `:raw:range` | Same line selection, but raw output. |\n\nValidation in `parseLineRangeChunk()`:\n- line numbers are 1-indexed; `:0` throws.\n- `+` counts must be `>= 1`.\n- `-` end must be `>= start`.\n\nSelector parsing intentionally falls through for unrecognized trailing `:...`; archive and SQLite paths consume their own colon syntax.\n\nURL selectors are parsed separately in `packages/coding-agent/src/tools/fetch.ts`, but use the same line-range parser for `:raw`, `:N`, `:A-B`, `:A+C`, `:5-10,20-30`, and `:range:raw` / `:raw:range`. Because URL ports also use `:`, add a trailing slash before a selector on a host/port URL, e.g. `https://example.com/:80`.\n\n## Outputs\n- Single-shot `AgentToolResult` built through `toolResult()` in `packages/coding-agent/src/tools/tool-result.ts`.\n- `content` is usually one text block. Image reads may return `[text, image]`.\n- `details` is path-dependent. `ReadToolDetails` may include:\n - `kind: \"file\" | \"url\"` (URL path uses `kind: \"url\"`; file reads usually omit `kind`)\n - `isDirectory`\n - `resolvedPath`\n - `suffixResolution`\n - URL fields: `url`, `finalUrl`, `contentType`, `method`, `notes`\n - `truncation`\n - `displayContent` (unprefixed text + starting line for TUI rendering)\n - `summary` (`lines`, `elidedSpans`, `elidedLines`) for structural summaries\n - `meta` from `packages/coding-agent/src/tools/output-meta.ts`\n- `details.meta.source` is set to the backing path, URL, or internal URL.\n- `details.meta.truncation` carries shown range, total lines/bytes, next offset, and optional `artifactId` for cached URL output.\n- Directory/archive listings and SQLite table lists also set `details.meta.limits` when list limits trigger.\n\n## Flow\n1. `ReadTool.execute()` accepts `{ path }`. `file://...` inputs are expanded first with `expandPath()`.\n2. It tries URL handling first via `parseReadUrlTarget()` from `packages/coding-agent/src/tools/fetch.ts`.\n - Plain URL reads call `executeReadUrl()`.\n - URL reads with line selectors load or refresh the URL cache with `loadReadUrlCacheEntry()` and paginate the cached text locally with `#buildInMemoryTextResult()`.\n3. If not a web URL, it checks `session.internalRouter.canHandle(...)`.\n - Internal URLs are resolved with `internalRouter.resolve()`.\n - `agent://` query extraction (`/path` or `?q=`) bypasses pagination and returns the extracted content directly.\n - Other internal resources are paginated in-memory by `#buildInMemoryTextResult()`.\n4. It tries archive resolution next with `#resolveArchiveReadPath()`.\n - `parseArchivePathCandidates()` scans for `.tar`, `.tar.gz`, `.tgz`, or `.zip` anywhere before `:sub/path`.\n - On success, `#readArchive()` either lists a directory or decodes an entry as UTF-8 text.\n5. It tries SQLite resolution with `#resolveSqliteReadPath()`.\n - `parseSqlitePathCandidates()` scans for `.sqlite`, `.sqlite3`, `.db`, `.db3` before any `:table`, `:key`, or `?query` suffix.\n - `#readSqlite()` dispatches on `parseSqliteSelector()`.\n6. Otherwise it treats the input as a local filesystem path.\n - `resolveReadPath()` expands `~`, resolves relative to session cwd, treats bare `/` as session cwd, and retries macOS screenshot/NFD/curly-quote variants.\n - If the path does not exist, `findUniqueSuffixMatch()` does a workspace glob-based unique suffix lookup (skipped for remote mounts).\n7. Directories go through `#readDirectory()`.\n8. Non-directories branch by content type:\n - image metadata / inline image\n - editable notebook text\n - markit-converted document\n - structural summary for parseable code/prose\n - streamed text/line-range read\n9. Local text reads are streamed by `streamLinesFromFile()` rather than loading the whole file. The tool adds up to 3 lines of context before/after explicit bounded ranges.\n10. Non-empty contiguous local reads are recorded into `getFileReadCache(session)` for later hashline edit recovery.\n11. If suffix resolution happened, the first text block is prefixed with `[Path '...' not found; resolved to '...' via suffix match]`.\n\n## Modes / Variants\n\n### Local text files\n- No selector: if summarization is enabled and the file is small enough, `#trySummarize()` calls `summarizeCode()`.\n - Guards: file size `<= 2 MiB` (`MAX_SUMMARY_BYTES`), line count `<= 20_000` (`MAX_SUMMARY_LINES`).\n - Summary output keeps selected declarations and replaces elided spans with `...` or merged brace-pair lines containing `..`. When at least one span is elided, the text content ends with a footer like `[NN lines elided; re-read needed ranges, e.g. <path>:5-16,40-80]` using concrete ranges from the actual elisions.\n - When an elided block sits between matching brace lines, `#renderSummary()` may merge them into one anchored line rather than emitting separate opener/closer lines.\n- Explicit selector or summarization miss: streamed text read.\n - Default open-ended limit is `min(session setting read.defaultLimit, DEFAULT_MAX_LINES)`.\n - Explicit ranges expand by `RANGE_LEADING_CONTEXT_LINES = 1` / `RANGE_TRAILING_CONTEXT_LINES = 3` on the constrained sides only.\n - Non-raw output uses `resolveFileDisplayMode()`:\n - hashline numbered output when edit mode is hashline, read is not raw, source is mutable, edit tool exists, and `readHashLines !== false`\n - otherwise optional line numbers when `readLineNumbers === true`\n - raw mode suppresses both\n- Prefix format in hashline mode is a `¶PATH#TAG` header followed by `LINE:TEXT`, e.g. `¶src/foo.ts#0A1B` and `41:def alpha():`, from the session snapshot store plus `formatNumberedLine()` / `formatHashlineHeader()`.\n- The `edit`/hashline path consumes that header plus bare line numbers later; the four-hex tag is opaque and only meaningful in the session snapshot store that minted it. Immutable sources and `:raw` intentionally suppress hashline headers.\n\n### Directory listings\n- `#readDirectory()` calls `buildDirectoryTree()` with:\n - `maxDepth = 2`\n - `perDirLimit = 12`\n - `rootLimit = null`\n - `lineCap = limit` when a line selector was present, else unlimited at this layer\n- `buildDirectoryTree()` sorts siblings by recency, shows file sizes and relative ages, and may mark `limits.resultLimit` when the tree truncates.\n- Empty directories render as `(empty directory)`.\n\n### Archives\n- Supported archive containers: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- Syntax: `archive.ext`, `archive.ext:path/inside`, `archive.ext:path/inside:50-60`.\n- `openArchive()` reads the whole archive into memory, then:\n - tar/tgz uses `new Bun.Archive(bytes)`\n - zip uses `fflate.unzipSync()`\n- Archive paths normalize `/`, drop `.` segments, and reject `..`.\n- Directory reads list immediate children; files show `name` plus ` (size)` when size > 0.\n- Directory listing default limit is `500` entries in `#readArchiveDirectory()`.\n- File entries are UTF-8 decoded. Non-UTF-8 entries return `[Cannot read binary archive entry '...' (...)]` instead of bytes.\n- Text archive entries reuse the normal in-memory pagination/anchoring path.\n\n### SQLite databases\n- Database detection requires both a matching extension and a valid SQLite file header (`isSqliteFile()`).\n- Selector forms from `parseSqliteSelector()`:\n\n#### `db.sqlite`\n- `kind: \"list\"`\n- Lists non-`sqlite_%` tables with row counts.\n- `#readSqlite()` caps the rendered list to `500` tables via `applyListLimit()`.\n\n#### `db.sqlite:table`\n- `kind: \"schema\"`\n- Returns `sqlite_master.sql` plus sample rows.\n- Sample size is `DEFAULT_SCHEMA_SAMPLE_LIMIT = 5`.\n\n#### `db.sqlite:table:key`\n- `kind: \"row\"`\n- Resolves by primary key when the table has exactly one PK column; otherwise falls back to `rowid` lookup.\n- No query parameters allowed on row lookups.\n\n#### `db.sqlite:table?limit=...&offset=...&order=...&where=...`\n- `kind: \"query\"`\n- Defaults: `limit = 20`, `offset = 0`.\n- `limit` is capped at `500`.\n- `order` accepts `column` or `column:asc|desc` and must name an existing column.\n- `where` is accepted only after `validateWhereClause()` rejects comments, semicolons, and control keywords like `LIMIT`, `OFFSET`, `UNION`, `ATTACH`, `PRAGMA`.\n- Unknown query parameters throw.\n\n#### `db.sqlite?q=SELECT ...`\n- `kind: \"raw\"`\n- Cannot be combined with table selectors or any other query param.\n- Empty `q` throws.\n- `executeReadQuery()` runs `db.prepare(sql).all()` and rejects bound parameters; it does not verify that the SQL starts with `SELECT`.\n\n- Rendering caps in `packages/coding-agent/src/tools/sqlite-reader.ts`:\n - ASCII table width `120` (`MAX_RENDER_WIDTH`)\n - per-column width `40` (`MAX_COLUMN_WIDTH`)\n- `#readSqlite()` opens Bun SQLite in `{ readonly: true, strict: true }` and sets `PRAGMA busy_timeout = 3000`.\n\n### Documents\n- `CONVERTIBLE_EXTENSIONS` in `packages/coding-agent/src/tools/read.ts` covers `.pdf`, `.doc`, `.docx`, `.ppt`, `.pptx`, `.xls`, `.xlsx`, `.rtf`, `.epub`.\n- `convertFileWithMarkit()` converts the file to text/markdown.\n- Converted output is then head-truncated with normal shared limits; there is no line selector support inside the source document before conversion.\n- Conversion failures return a text block like `[Cannot read .pdf file: ...]`.\n\n### Jupyter notebooks\n- `.ipynb` goes through `readEditableNotebookText()` unless `:raw` was requested.\n- Output is editable plain text with markers like:\n\n```text\n# %% [code] cell:0\n...\n```\n\n- Raw mode bypasses that conversion and falls back to file-text reading.\n\n### Images\n- Image detection is metadata-based (`readImageMetadata()`).\n- Max accepted image size is `20 MiB` (`MAX_IMAGE_INPUT_BYTES`, re-exported as `MAX_IMAGE_SIZE`). Larger files throw.\n- If `inspect_image.enabled` is true, `read` returns metadata only (MIME, bytes, dimensions, channels, alpha) plus a suggestion to call `inspect_image`.\n- Otherwise it calls `loadImageInput()` and returns:\n - a text note from the image loader\n - an inline image block\n- Unsupported/undecodable image formats throw a `ToolError`.\n\n### Internal URLs\n- `read` does not resolve these itself; it delegates to `session.internalRouter.resolve()`.\n- Registered protocols are outside this file, but the router in `packages/coding-agent/src/internal-urls/router.ts` is built for `agent://`, `artifact://`, `history://`, `issue://`, `local://`, `mcp://`, `memory://`, `omp://`, `pr://`, `rule://`, and `skill://`.\n- `#handleInternalUrl()` behavior:\n - parses the URL with `parseInternalUrl()` so colons inside the host segment are legal\n - for `agent://`, treats non-root path extraction or `?q=` extraction as a special no-pagination mode\n - otherwise paginates the resolved text in memory\n - passes `immutable` through to `resolveFileDisplayMode()` so anchors are suppressed for immutable resources such as artifacts, skills, memory, and agent outputs\n - sets `ignoreResultLimits: true` for `skill://` so the full skill text is paginated only by explicit selectors, not by the normal default line limit\n- `issue://<N>` / `pr://<N>` (and the long form `issue://<owner>/<repo>/<N>` / `pr://<owner>/<repo>/<N>`) route through the same SQLite cache the `github` tool writes to; `?comments=0` selects the no-comments rendering. Bare `issue://` / `pr://` (and `issue://<owner>/<repo>` / `pr://<owner>/<repo>`) issue a live `gh issue list` / `gh pr list` for browsing, accepting `?state=`, `?limit=`, `?author=`, `?label=`. PR diffs share the same cache through `pr://<N>/diff` (numbered file listing with per-file hints), `pr://<N>/diff/<i>` (single file slice; 1-indexed), and `pr://<N>/diff/all` (verbatim unified diff); the listing and per-file slices are reconstructed from the cached unified-diff payload, so all three variants share one `gh pr diff` invocation per PR. Diff content is served as `text/plain`. Soft TTL `github.cache.softTtlSec` (default 5 minutes), hard TTL `github.cache.hardTtlSec` (default 7 days). Stale-hit returns the cached row and schedules a background refresh.\n\n### Web URLs\n- `parseReadUrlTarget()` accepts `http://`, `https://`, or `www.` targets.\n- Plain URL reads call `executeReadUrl()` in `packages/coding-agent/src/tools/fetch.ts`.\n- `:raw` means raw HTML/body fallback path; plain URL reads prefer rendered/reader-friendly output.\n- `:N`, `:A-B`, `:A+C`, and comma-separated multi-ranges do not refetch when cached output is usable. They page over cached output from the prior or current URL render.\n- URL render pipeline in `renderUrl()`:\n 1. normalize scheme (`https://` added for bare `www.`)\n 2. try special handlers for known sites unless raw\n 3. fetch with `loadPage()`\n 4. if content is image/PDF/DOCX/etc., try binary fetch + markit/image handling\n 5. handle JSON directly, feeds via feed parser, plain text directly\n 6. for HTML and non-raw mode, try markdown alternates, `URL.md`, content negotiation, feed alternates, HTML-to-text renderers, extracted linked documents, then `llms.txt`\n 7. fall back to raw body text/html\n- URL output is wrapped with a small header:\n\n```text\nURL: ...\nContent-Type: ...\nMethod: ...\nNotes: ...\n\n---\n```\n\n- `method` records the winning path (`json`, `feed`, `text`, `alternate-markdown`, `md-suffix`, `content-negotiation`, `image`, `markit`, `llms.txt`, `raw`, `raw-html`, etc.).\n- URL reads may return an inline image block when the fetched resource is a supported image and survives resizing.\n\n## Side Effects\n- Filesystem\n - Opens and streams local files.\n - Reads entire archives into memory before indexing.\n - May read URL-cache artifact files from the session artifacts directory.\n - Writes URL output artifacts when URL output is truncated or when line-range pagination needs a persisted cache body.\n- Network\n - URL mode performs HTTP fetches, binary refetches, and alternate-endpoint probes.\n- Subprocesses / native bindings\n - Uses Bun SQLite for `.db`/`.sqlite*`.\n - Uses `Bun.Archive` for tar/tgz and `fflate` for zip.\n - URL HTML rendering can delegate into site handlers and HTML-to-text backends from `packages/coding-agent/src/tools/fetch.ts`.\n- Session state\n - Records local text lines into `session.fileReadCache` for later stale-anchor recovery.\n - Uses `session.internalRouter` for internal URLs.\n - Uses `session.allocateOutputArtifact()` for cached/truncated URL output.\n- Background work / cancellation\n - Only the deterministic disk reads are non-abortable: plain-file line/range reads (`streamLinesFromFile`, multi-range) and directory listings (`#readDirectory`) are called with `undefined` instead of the `AbortSignal`, so an interrupt mid-read can't surface a misleading \"Operation aborted\" on a read that would have finished instantly. Every other branch keeps the signal and its helpers call `throwIfAborted(signal)` to stop promptly: URL/internal-URL reads (network), archive, sqlite, document conversion, image decode, structural summary, conflict scan, and the suffix-glob path resolution.\n\n## Limits & Caps\n- Shared text truncation defaults from `packages/coding-agent/src/session/streaming-output.ts`:\n - `DEFAULT_MAX_LINES = 3000`\n - `DEFAULT_MAX_BYTES = 50 * 1024`\n- Local text open-ended default line limit: `read.defaultLimit`, clamped to `[1, DEFAULT_MAX_LINES]`.\n- Explicit line ranges add `1` leading and `3` trailing context lines on the constrained sides (`RANGE_LEADING_CONTEXT_LINES` / `RANGE_TRAILING_CONTEXT_LINES`).\n- File streaming chunk size: `8 * 1024` bytes (`READ_CHUNK_SIZE`).\n- Local streamed byte budget for line reads: `max(DEFAULT_MAX_BYTES, maxLinesToCollect * 512)`.\n- Structural summaries only run when file size `<= 2 MiB` and line count `<= 20_000`.\n- Image input max: `20 MiB`.\n- Directory tree caps for local directories: depth `2`, per-directory children `12`.\n- Archive directory default list cap: `500` entries.\n- SQLite:\n - default row query limit `20`\n - schema sample limit `5`\n - max query limit `500`\n - table list cap `500`\n - render width `120`, column width `40`\n - busy timeout `3000` ms\n- URL read result shown to the model is truncated to `300` lines and `50 KiB` in `executeReadUrl()`; full cached output can be attached as an artifact.\n- Inline fetched URL images:\n - source bytes cap `20 MiB`\n - post-resize inline output cap `300 KiB`\n- Unique suffix auto-resolution glob timeout: `5000` ms.\n- File-read cache holds `30` paths per session.\n\n## Errors\n- Validation and operational failures surface as `ToolError`.\n- Selector errors include:\n - `Line selector 0 is invalid; lines are 1-indexed. Use :1.`\n - invalid `A+B` / `A-B` shapes\n - `Cannot combine query extraction with offset/limit` for `agent://.../path:50`\n- Missing local/archive/sqlite paths first attempt unique suffix resolution; if no unique match exists they error.\n- Out-of-bounds line reads do not throw. They return explanatory text with a suggestion such as `Use :1 ...` or `Use :<last line> ...`.\n- Binary archive entries do not throw; they return a text notice.\n- Document conversion failure returns a text notice.\n- Image oversize/unsupported/invalid cases throw.\n- SQLite parser rejects unsupported parameter combinations early; DB/runtime errors are caught and rethrown as `ToolError(message)`.\n- URL fetch failure does not throw when HTTP fetch succeeds but `response.ok === false`; it returns a failed URL read with `method: \"failed\"` and explanatory notes.\n\n## Notes\n- Hashline anchors are suppressed for raw reads and immutable internal resources because there is no editable backing target for later `edit` consumption.\n- `splitPathAndSel()` intentionally treats unknown trailing `:...` as part of the path so `archive.zip:inner/file` and `db.sqlite:table:key` still work.\n- `resolveReadPath()` contains macOS-specific filename fallbacks for screenshot timestamps, NFD Unicode normalization, and curly apostrophes.\n- A bare `/` resolves to the session cwd, not the filesystem root.\n- URL cache keys are session-scoped and normalized by requested URL + raw/rendered mode; both requested URL and final redirected URL are cached.\n- URL line-range reads request `ensureArtifact: true, preferCached: true` so a later paginated read can reopen the same rendered body from artifact storage.\n- Raw SQLite `q=` execution is not keyword-restricted beyond “no bound parameters”; the read tool relies on the surrounding contract to keep it read-only.\n- The file-read cache is not a read acceleration cache. It exists to recover hashline edits when the file changed after the read.",
|
|
88
|
+
"tools/recall.md": "# recall\n\n> Search the active long-term memory backend and return matching memories.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/memory-recall.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/recall.md`\n- Hindsight collaborators:\n - `packages/coding-agent/src/hindsight/state.ts` — session state, recall query defaults, prompt-side auto-recall.\n - `packages/coding-agent/src/hindsight/content.ts` — result formatting and UTC timestamp formatting.\n - `packages/coding-agent/src/hindsight/client.ts` — HTTP `recall` call and error mapping.\n - `packages/coding-agent/src/hindsight/bank.ts` — bank id and tag-filter scoping.\n- Mnemopi collaborators:\n - `packages/coding-agent/src/mnemopi/state.ts` — scoped local recall and result formatting with ids.\n - `packages/coding-agent/src/mnemopi/config.ts` — local bank scoping and recall limits.\n - `docs/tools/retain.md` — shared backend, storage, scoping, and retention behavior.\n\n## Inputs\n\n| Field | Type | Required | Description |\n|---|---|---:|---|\n| `query` | `string` | Yes | Natural-language search query. The tool passes it through unchanged except Mnemopi `per-project-tagged` may run an internal shared-bank fallback query. |\n\n## Outputs\nReturns a single-shot tool result.\n\nWhen matches exist:\n- `content[0].type = \"text\"`\n- `content[0].text = \"Found <n> relevant memory/memories (as of YYYY-MM-DD HH:MM UTC):\\n\\n<bullet list>\"`\n- `details = {}`\n\nHindsight bullet format comes from `formatMemories(...)`:\n- each bullet is `- <text> [<type>] (<mentioned_at>)`; the type and timestamp suffixes appear only when those fields are present.\n\nMnemopi bullet format comes from `formatScopedRecallWithIds(...)`:\n- each bullet is `- <content> (id: <id>|id unavailable) [<source>] (<YYYY-MM-DD>) c:<score>`; optional source, date, and score suffixes appear only when present.\n\nWhen no matches exist:\n- `content[0].text = \"No relevant memories found.\"`\n- `details = {}`\n\n## Flow\n1. `MemoryRecallTool.createIf(...)` exposes the tool when `memory.backend` is either `\"hindsight\"` or `\"mnemopi\"`.\n2. `execute(...)` wraps the operation in `untilAborted(...)`.\n3. If the backend is `mnemopi`:\n - it reads `session.getMnemopiSessionState()` and throws if the backend was not started;\n - it calls `state.recallResultsScoped(params.query)`;\n - scoped recall queries each configured recall bank with `recallEnhanced(query, recallLimit, { includeFacts: true, channelId: bank })`, merges/deduplicates results by id/content, sorts them, and truncates to `recallLimit`;\n - in `per-project-tagged`, the shared bank may receive one extra fallback query with project-bank literal tokens stripped so broad global memories still match;\n - results are formatted with ids for later `memory_edit` use.\n4. If the backend is `hindsight`:\n - it reads `session.getHindsightSessionState()` and throws if the backend was not started;\n - it calls `state.client.recall(...)` with `bankId`, query, configured `budget`, `maxTokens`, `types`, and bank-scope tag filters;\n - `HindsightApi.recall(...)` POSTs `/v1/default/banks/{bank_id}/memories/recall`;\n - results are formatted into a plain-text list with `formatMemories(...)`.\n5. Backend failures are logged with `logger.warn(\"recall failed\", ...)` and rethrown as `Error` instances when needed.\n\n## Modes / Variants\n- Tool path: explicit query-only recall. It does not compose context from recent turns.\n- Backend auto-recall has a richer query-composition path in `HindsightSessionState.beforeAgentStartPrompt(...)` / `maybeRecallOnAgentStart(...)` and `MnemopiSessionState.beforeAgentStartPrompt(...)` / `maybeRecallOnAgentStart(...)`.\n- Hindsight bank scoping:\n - `global` — no tag filter.\n - `per-project` — separate bank id per cwd basename.\n - `per-project-tagged` — shared bank id plus `project:<cwd basename>` filter with `tagsMatch = \"any\"`, so project-tagged and untagged global memories can both surface.\n- Mnemopi bank scoping:\n - `global` — recall reads the shared bank.\n - `per-project` — recall reads the project bank.\n - `per-project-tagged` — recall reads the project bank and shared bank, then merges results.\n- Session scope: reads cross-session memory data, using the active session's cached config and scope.\n\n## Side Effects\n- Network\n - Hindsight: `POST /v1/default/banks/{bank_id}/memories/recall`.\n - Mnemopi: none unless configured local runtime providers perform embedding/LLM work during recall.\n- Session state\n - None on success for the explicit tool path. Unlike backend auto-recall, this tool does not update `lastRecallSnippet` or refresh the system prompt.\n- Background work / cancellation\n - Aborts through `untilAborted(...)` if the tool call signal is cancelled.\n\n## Limits & Caps\n- Tool availability requires `memory.backend` to be `\"hindsight\"` or `\"mnemopi\"`; default `memory.backend` is `\"off\"`.\n- Hindsight client default budget for raw `HindsightApi.recall(...)` is `\"mid\"`; this tool overrides from config.\n- Hindsight recall settings:\n - `hindsight.recallBudget = \"mid\"`\n - `hindsight.recallMaxTokens = 1024`\n - `hindsight.recallTypes = [\"world\", \"experience\"]`\n- Mnemopi recall settings:\n - `mnemopi.recallLimit = 8`\n - `mnemopi.scoping` selects which local bank(s) are searched\n- The explicit tool path does not apply `hindsight.recallContextTurns`, `hindsight.recallMaxQueryChars`, `mnemopi.recallContextTurns`, or `mnemopi.recallMaxQueryChars`; those caps only affect backend auto-recall query composition.\n\n## Errors\n- Throws `Mnemopi backend is not initialised for this session.` when `memory.backend == \"mnemopi\"` but no state exists.\n- Throws `Hindsight backend is not initialised for this session.` when `memory.backend == \"hindsight\"` but no state exists.\n- Hindsight HTTP and fetch failures become `HindsightError` with `statusCode` and parsed `details` when available.\n- Mnemopi recall target failures inside `collectScopedRecallResults(...)` are caught per bank and logged only when `mnemopi.debug` is enabled; if all targets fail, the tool can return `No relevant memories found.`\n- Non-`Error` failures caught by the tool are normalized to `new Error(String(err))` before rethrow.\n\n## Notes\n- Shared backend details are in `docs/tools/retain.md`: storage, subagent aliasing, bank scoping, mission setup, and mental-model behavior.\n- Hindsight mental models are not fetched by this tool. They may already be present in the agent's developer instructions because the backend caches a `<mental_models>` block separately from recall results.\n- Mnemopi developer instructions may include a `<memories>` block from auto-recall; this explicit tool does not update that block.\n- The tool returns memory hits; it does not synthesize across them. Use `reflect` for that path.\n",
|
|
89
|
+
"tools/reflect.md": "# reflect\n\n> Synthesize an answer over the active long-term memory backend.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/memory-reflect.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/reflect.md`\n- Hindsight collaborators:\n - `packages/coding-agent/src/hindsight/bank.ts` — best-effort bank mission initialization.\n - `packages/coding-agent/src/hindsight/state.ts` — session state, shared bank scope, recall/reflect config.\n - `packages/coding-agent/src/hindsight/client.ts` — HTTP `reflect` call and error mapping.\n- Mnemopi collaborators:\n - `packages/coding-agent/src/mnemopi/state.ts` — scoped local recall and context formatting.\n - `docs/tools/retain.md` — shared backend, storage, scoping, and mental-model behavior.\n\n## Inputs\n\n| Field | Type | Required | Description |\n|---|---|---:|---|\n| `query` | `string` | Yes | Question to answer from long-term memory. |\n| `context` | `string` | No | Extra guidance. Hindsight sends it as `context`; Mnemopi appends trimmed context to the recall query under `Additional context:`. |\n\n## Outputs\nReturns a single-shot tool result.\n\nHindsight:\n- `content[0].type = \"text\"`\n- `content[0].text = response.text?.trim() || \"No relevant information found to reflect on.\"`\n- `details = {}`\n- The tool returns the Hindsight server's synthesized text directly; it does not expose raw recall hits.\n\nMnemopi:\n- if no scoped recall results exist: `content[0].text = \"No relevant information found to reflect on.\"`\n- otherwise: `content[0].text = \"Based on recalled memories:\\n\\n<formatted context>\"`\n- `details = {}`\n- The local path performs recall plus formatting; it does not call a separate synthesis endpoint.\n\n## Flow\n1. `MemoryReflectTool.createIf(...)` exposes the tool when `memory.backend` is either `\"hindsight\"` or `\"mnemopi\"`.\n2. `execute(...)` runs under `untilAborted(...)`.\n3. If the backend is `mnemopi`:\n - it reads `session.getMnemopiSessionState()` and throws if the backend was not started;\n - if `context` has non-whitespace content, it recalls with `<query>\\n\\nAdditional context:\\n<context>`; otherwise it recalls with `query`;\n - it calls `state.recallResultsScoped(...)` using the same local scoping and merge behavior as `recall`;\n - if results exist, it renders them through `state.formatContextScoped(...)` and prefixes `Based on recalled memories:`.\n4. If the backend is `hindsight`:\n - it reads `session.getHindsightSessionState()` and throws if the backend was not started;\n - it calls `ensureBankMission(...)` with the current `bankId`, config, and process-local `missionsSet`;\n - `ensureBankMission(...)` best-effort `PUT`s `/v1/default/banks/{bank_id}` with `reflect_mission` and optional `retain_mission` exactly once per bank/process; failures are swallowed;\n - it calls `state.client.reflect(...)` with `query`, optional `context`, configured recall budget, and bank-scope tag filters;\n - `HindsightApi.reflect(...)` POSTs `/v1/default/banks/{bank_id}/reflect` and defaults its own budget to `\"low\"` when callers omit one; this tool always passes the configured budget;\n - blank or whitespace-only responses are replaced with `No relevant information found to reflect on.`\n5. Backend failures are logged with `logger.warn(\"reflect failed\", ...)` and rethrown as `Error` instances when needed.\n\n## Modes / Variants\n- Hindsight tool path: one remote reflect request, optionally focused by `context`.\n- Mnemopi tool path: one local scoped recall followed by context formatting.\n- Hindsight bank scoping:\n - `global` — no tag filter.\n - `per-project` — separate bank id per cwd basename.\n - `per-project-tagged` — shared bank id plus `project:<cwd basename>` filter with `tagsMatch = \"any\"`.\n- Mnemopi bank scoping:\n - `global` — reads the shared bank.\n - `per-project` — reads the project bank.\n - `per-project-tagged` — reads the project bank and shared bank, then merges results.\n- Session scope: reads cross-session memory data, but does not persist local output.\n\n## Side Effects\n- Network\n - Hindsight: optional `PUT /v1/default/banks/{bank_id}` from `ensureBankMission(...)`, then `POST /v1/default/banks/{bank_id}/reflect`.\n - Mnemopi: none unless configured embedding or LLM providers are used by the local runtime during recall.\n- Session state\n - Reads session-held backend scope and config only. Does not update `lastRecallSnippet`, Hindsight mental-model cache, or retain queues.\n- Background work / cancellation\n - Aborts through `untilAborted(...)` if the tool call signal is cancelled.\n\n## Limits & Caps\n- Tool availability requires `memory.backend` to be `\"hindsight\"` or `\"mnemopi\"`; default `memory.backend` is `\"off\"`.\n- Tool-level params: only `query` is required; `context` is optional.\n- Hindsight budget setting comes from `hindsight.recallBudget`, default `\"mid\"`.\n- Hindsight `reflect` has no client-side token cap parameter here; unlike `recall`, the tool does not pass `maxTokens`.\n- Hindsight mission initialization tracks up to `MISSION_SET_CAP = 10_000` bank ids, then drops the oldest half of the sorted set.\n- Mnemopi result count is capped by `mnemopi.recallLimit`, default `8`.\n\n## Errors\n- Throws `Mnemopi backend is not initialised for this session.` when `memory.backend == \"mnemopi\"` but no state exists.\n- Throws `Hindsight backend is not initialised for this session.` when `memory.backend == \"hindsight\"` but no state exists.\n- Hindsight HTTP and fetch failures become `HindsightError` with `statusCode` and parsed `details` when available.\n- Hindsight `ensureBankMission(...)` failures are silent to the tool caller; only the later reflect request can fail visibly.\n- Mnemopi recall target failures inside `collectScopedRecallResults(...)` are caught per bank and logged only when `mnemopi.debug` is enabled; if all targets fail, the tool can return the no-information text.\n- Non-`Error` failures caught by the tool are normalized to `new Error(String(err))` before rethrow.\n\n## Notes\n- Shared backend details are in `docs/tools/retain.md`: storage, subagent aliasing, bank scoping, seed mental models, and prompt injection.\n- Hindsight `reflect` does not read the cached `<mental_models>` block directly. It queries the Hindsight server over the bank contents. The same session may also have separate mental-model context injected into its developer instructions.\n- Hindsight reflect mission and retain mission are bank-level server settings, not per-request payload. The tool just ensures they are present best-effort before reflecting.\n- Mnemopi `reflect` is local recall plus formatting, so its output shape differs from Hindsight's remote synthesized answer.\n",
|
|
90
|
+
"tools/render_mermaid.md": "# render_mermaid\n\n> Convert Mermaid source into terminal-friendly ASCII/Unicode text.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/render-mermaid.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/render-mermaid.md`\n- Key collaborators:\n - `packages/utils/src/mermaid-ascii.ts` — thin wrapper over renderer package.\n - `packages/coding-agent/src/tools/index.ts` — tool registration and enablement gate.\n - `packages/coding-agent/src/sdk.ts` — session-facing artifact allocation hook.\n - `packages/coding-agent/src/session/session-manager.ts` — persistent-session artifact path allocation.\n - `packages/coding-agent/src/session/artifacts.ts` — artifact filename generation and writes.\n- Related user/runtime doc: `docs/render-mermaid.md`\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `mermaid` | `string` | Yes | Mermaid source text. Schema example: `graph TD; A-->B`. |\n| `config` | `object` | No | Optional renderer options. Sanitized before rendering; numeric fields are floored and clamped to `>= 0`. |\n\n`config` fields:\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `useAscii` | `boolean` | No | `true` for plain ASCII, `false`/omitted for Unicode box-drawing output. Passed through unchanged. |\n| `paddingX` | `number` | No | Horizontal spacing. `Math.floor`, then `Math.max(0, value)`. |\n| `paddingY` | `number` | No | Vertical spacing. `Math.floor`, then `Math.max(0, value)`. |\n| `boxBorderPadding` | `number` | No | Inner box padding. `Math.floor`, then `Math.max(0, value)`. |\n\n## Outputs\nThe tool returns a single text content block:\n\n- inline body: rendered diagram text\n- optional trailer: `Saved artifact: artifact://<id>` when artifact storage is available\n\n`details` may include:\n\n- `artifactId?: string`\n\nNo image path, SVG, PNG, or binary payload is returned. Stored artifacts are plain text `.log` files; artifact filenames are allocated as `<id>.render_mermaid.log` by `packages/coding-agent/src/session/artifacts.ts`.\n\n## Flow\n1. `RenderMermaidTool.execute()` in `packages/coding-agent/src/tools/render-mermaid.ts` receives `mermaid` and optional `config`.\n2. `sanitizeRenderConfig()` normalizes `paddingX`, `paddingY`, and `boxBorderPadding` to non-negative integers; `useAscii` is passed through.\n3. The tool calls `renderMermaidAscii()` from `@oh-my-pi/pi-utils`.\n4. `packages/utils/src/mermaid-ascii.ts` forwards directly to `renderMermaidASCII()` from the `beautiful-mermaid` package.\n5. The tool optionally asks the session for an artifact slot with `allocateOutputArtifact(\"render_mermaid\")`.\n6. If a path is returned, `Bun.write()` persists the full rendered text to that file.\n7. The tool returns the rendered text, plus an `artifact://` line and `details.artifactId` when persistence succeeded.\n\n## Modes / Variants\n- Default render: Unicode box-drawing output when `config.useAscii` is omitted or false.\n- ASCII render: plain ASCII output when `config.useAscii` is true.\n- Persistent-session path: artifact text is written when `allocateOutputArtifact()` returns a path.\n- Ephemeral-session path: no artifact is written; the inline text result is still returned.\n\n## Side Effects\n- Filesystem\n - May write one session artifact via `Bun.write()`.\n - Artifact filename format is `<id>.render_mermaid.log`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Consumes the session artifact allocator hook.\n - Returns `details.artifactId` for the tool result.\n\n## Limits & Caps\n- No tool-local timeout, retry, truncation, or streaming path.\n- Numeric config fields are quantized to integers with `Math.floor()` and clamped to `0` minimum in `sanitizeRenderConfig()`.\n- Renderer engine is `beautiful-mermaid@1.1.3` per root `package.json` / `bun.lock`.\n- The tool is registered as discoverable and gated by `renderMermaid.enabled` in `packages/coding-agent/src/tools/index.ts`.\n\n## Errors\n- `renderMermaidAscii()` is not wrapped in a local `try/catch`; renderer exceptions propagate out of `execute()`.\n- Invalid Mermaid syntax therefore fails the tool call rather than returning partial output.\n- Artifact allocation failures inside the SDK hook are swallowed there and converted to `{}` in `packages/coding-agent/src/sdk.ts`; rendering still succeeds, just without a saved artifact.\n- Artifact write failures from `Bun.write()` are not caught in the tool and will fail the call.\n\n## Notes\n- The tool summary string says `Render a Mermaid diagram to an image`, but the implementation and prompt both produce text, not images.\n- Despite the name, this tool does not use Puppeteer, browser rendering, Mermaid CLI, or native bindings; rendering stays in-process through the JS package wrapper.\n- `docs/render-mermaid.md` covers operator-facing behavior and enablement; keep this file focused on the tool contract and runtime path.\n",
|
|
91
|
+
"tools/resolve.md": "# resolve\n\n> Finalizes a pending action by applying or discarding it.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/resolve.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/resolve.md`\n- Key collaborators:\n - `docs/resolve-tool-runtime.md` — preview/apply runtime reference\n - `packages/coding-agent/src/extensibility/custom-tools/loader.ts` — forwards custom pending actions into the queue\n - `packages/coding-agent/src/tools/ast-edit.ts` — built-in preview producer example\n - `packages/coding-agent/src/session/agent-session.ts` — tool-choice queue, standing resolve handler, and invoker access\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"apply\" | \"discard\"` | Yes | Whether to commit or reject the pending action. |\n| `reason` | `string` | Yes | Required explanation passed through to the handler. |\n| `extra` | `Record<string, unknown>` | No | Free-form metadata passed through to the handler. Plan approval uses this for data such as a title slug; preview-style actions usually ignore it. |\n\n## Outputs\n- Single-shot result.\n- `execute()` returns whatever the queued or standing invoker returns, with `details` wrapped/augmented to include:\n - `action`\n - `reason`\n - `extra?`\n - `sourceToolName?`\n - `label?`\n - `sourceResultDetails?` — original `result.details` from the apply/reject callback when present\n- If `discard` has no custom reject callback, or the reject callback returns `undefined`, the default success payload is `Discarded: <label>. Reason: <reason>`.\n- The TUI renderer is inline and merges call+result into one block.\n\n## Flow\n1. Preview-producing code can call `queueResolveHandler(...)` with a label, source tool name, `apply(reason, extra?)` callback, and optional `reject(reason, extra?)` callback.\n2. Modes can also register a standing resolve handler through `session.setStandingResolveHandler(...)`; `resolve.execute()` consults it only when no queued invoker is active.\n3. `queueResolveHandler(...)` asks the session for a forced `resolve` tool choice and pushes it into the tool-choice queue with `pushOnce(...)`.\n4. The queued entry is marked `now: true`; if the model rejects that forced tool choice, `onRejected` returns `requeue`, so the reminder comes back.\n5. `queueResolveHandler(...)` also injects a `resolve-reminder` steering message:\n\n```text\n<system-reminder>\nThis is a preview. Call the `resolve` tool to apply or discard these changes.\n</system-reminder>\n```\n\n6. When `resolve.execute()` runs, it wraps the call in `untilAborted(...)` and fetches `session.peekQueueInvoker?.() ?? session.peekStandingResolveHandler?.()`.\n7. If no invoker exists, it throws `ToolError(\"No pending action to resolve. Nothing to apply or discard.\")`.\n8. Otherwise it invokes the current handler with the full params object.\n9. `runResolveInvocation(...)` builds base details from `action`, `reason`, `extra`, `sourceToolName`, and `label`.\n10. For `apply`, it calls the producer's `apply(reason, extra)` callback.\n11. If `apply` throws, `runResolveInvocation(...)` calls `onApplyError` when present. The queued preview integration uses this to re-push the resolve directive and steering reminder so the action remains pending. Non-`ToolError` exceptions are wrapped as `ToolError(\"Apply failed: <message>\")`.\n12. For `discard`, it calls `reject(reason, extra)` when provided. If no reject callback exists or it returns `undefined`, `resolve` fabricates the default discard message.\n13. Before returning callback results, it merges resolve metadata into `result.details` so renderer/UI code can show the action, label, and originating tool.\n\n## Modes / Variants\n- `apply`: runs the pending action's `apply(reason, extra?)` callback and returns its content.\n- `discard` with reject callback: runs `reject(reason, extra?)` and returns that callback's content when non-`undefined`.\n- `discard` without reject callback, or with a reject callback returning `undefined`: returns the built-in `Discarded: ...` text payload.\n- Queued handler: one in-flight tool-choice queue invoker, used by preview producers such as `ast_edit`.\n- Standing handler: long-lived mode-owned handler, used as a fallback when no queue invoker is active.\n\n## Side Effects\n- Session state\n - Consumes or invokes the current pending action through the session tool-choice queue or standing handler; `resolve` does not maintain its own stack.\n - Adds a `resolve-reminder` steering message when a queued preview is registered.\n - On queued apply failure, requeues the same pending action before rethrowing so the model can discard or retry instead of losing the pending preview.\n- User-visible prompts / interactive UI\n - The visible effect depends on the preview-producing tool and the resolve renderer.\n - Renderer result blocks show `Accept`, `Discard`, or `Failed`, include the pending action label, and display the reason.\n- Background work / cancellation\n - `untilAborted(...)` lets abort signals interrupt resolution before or while the callback awaits.\n\n## Limits & Caps\n- Hidden tool: `ResolveTool.hidden = true`, and normal requested-tool filtering removes `resolve`; `createTools(...)` adds it separately as a hidden tool.\n- Exactly one active queue invoker is consulted per call via `session.peekQueueInvoker()`; if none exists, one standing handler may be consulted via `session.peekStandingResolveHandler()`.\n- There is no independent queue depth cap in this tool; ordering follows the shared tool-choice queue and mode-owned standing handler lifecycle.\n\n## Errors\n- No pending action or standing handler: throws `ToolError(\"No pending action to resolve. Nothing to apply or discard.\")`.\n- `apply` callback throws `ToolError`: the original `ToolError` propagates.\n- `apply` callback throws any other value: `resolve` wraps it as `ToolError(\"Apply failed: <message>\")` after running `onApplyError` when present.\n- `reject` callback exceptions propagate without the apply-specific wrapper.\n- Aborts during `untilAborted(...)` surface as the underlying abort error from the utility.\n\n## Notes\n- `reason` and `extra` are passed through; `resolve` itself does not interpret them.\n- `queueResolveHandler(...)` is the canonical built-in preview integration point; custom tools use `pushPendingAction(...)`, which the loader forwards into the same mechanism.\n- Standing handlers let modes accept `resolve` invocations without forcing the tool choice every turn.\n- `sourceResultDetails` is added only when the apply/reject callback returned a non-null `details` field; custom pending-action `details` are not forwarded automatically by the loader.\n",
|
|
92
|
+
"tools/retain.md": "# retain\n\n> Store durable facts through the active long-term memory backend.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/memory-retain.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/retain.md`\n- Hindsight collaborators:\n - `packages/coding-agent/src/hindsight/state.ts` — per-session queue, flush, auto-retain.\n - `packages/coding-agent/src/hindsight/backend.ts` — session bootstrap, prompt injection, subagent aliasing.\n - `packages/coding-agent/src/hindsight/bank.ts` — bank id derivation, tag scoping, mission setup.\n - `packages/coding-agent/src/hindsight/client.ts` — HTTP `retain` / `retainBatch` calls.\n - `packages/coding-agent/src/hindsight/content.ts` — retention transcript shaping, memory-tag stripping.\n - `packages/coding-agent/src/hindsight/mental-models.ts` — bank-scoped mental-model seeding and cache rendering.\n - `packages/coding-agent/src/hindsight/seeds.json` — built-in mental-model seed definitions.\n - `packages/coding-agent/src/hindsight/transcript.ts` — extracts user/assistant turns for auto-retain.\n- Mnemopi collaborators:\n - `packages/coding-agent/src/mnemopi/backend.ts` — local backend bootstrap, prompt injection, subagent aliasing, enqueue/clear.\n - `packages/coding-agent/src/mnemopi/state.ts` — scoped recall/retain state and local writes.\n - `packages/coding-agent/src/mnemopi/config.ts` — local SQLite path, bank, scoping, provider settings.\n - `packages/mnemopi/src/core/memory.ts` — local memory runtime used by `remember(...)`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n|---|---|---:|---|\n| `items` | `Array<{ content: string; context?: string }>` | Yes | One or more memories to store. `minItems: 1`. Each item must be self-contained; `context` is optional per-item provenance. |\n\n## Outputs\nThe output depends on the active `memory.backend`.\n\nHindsight:\n- `content[0].type = \"text\"`\n- `content[0].text = \"<count> memory queued.\"` or `\"<count> memories queued.\"`\n- `details = { count: number }`\n- The write is not confirmed before the tool returns. The queue flushes later; flush failures emit a session warning notice and are not returned to the model.\n\nMnemopi:\n- `content[0].type = \"text\"`\n- `content[0].text = \"<count> memory stored.\"` or `\"<count> memories stored.\"`\n- `details = { count: number }`\n- The tool calls the local backend synchronously, but `rememberScoped(...)` catches per-item write failures and returns `undefined`; the tool still reports the requested count.\n\n## Flow\n1. `MemoryRetainTool.createIf(...)` exposes the tool when `memory.backend` is either `\"hindsight\"` or `\"mnemopi\"`.\n2. `execute(...)` re-reads `memory.backend` and dispatches to the matching session state.\n3. If the backend is `mnemopi`:\n - it fetches `session.getMnemopiSessionState()` and throws if the backend was not started;\n - for each item, it calls `state.rememberScoped(item.content, ...)` with `source: \"coding-agent-retain\"`, `importance: 0.75`, `scope: \"bank\"`, `extract: true`, `extractEntities: true`, `veracity: \"tool\"`, `memoryType: \"fact\"`, and metadata `{ session_id, cwd, context, tool: \"retain\" }`;\n - writes go to the scoped retain bank selected by `packages/coding-agent/src/mnemopi/config.ts`.\n4. If the backend is `hindsight`:\n - it fetches `session.getHindsightSessionState()` and throws if the backend was not started;\n - each input item is handed to `HindsightSessionState.enqueueRetain(...)`;\n - `HindsightRetainQueue.enqueue(...)` appends the item and either flushes immediately when the queue reaches `RETAIN_FLUSH_BATCH_SIZE`, or starts a debounce timer for `RETAIN_FLUSH_INTERVAL_MS`;\n - on flush, `HindsightRetainQueue.#doFlush(...)` verifies ownership, best-effort initializes the bank mission, maps items to `MemoryItemInput` with `context ?? config.retainContext`, `metadata.session_id`, and bank-scope tags, then sends one async `retainBatch(...)` request.\n\n## Modes / Variants\n- Hindsight tool path: queued batch write only.\n- Mnemopi tool path: direct local `remember(...)` into the scoped retain bank.\n- Hindsight bank scoping from `computeBankScope(...)`:\n - `global` — one shared bank, no project tags.\n - `per-project` — bank id gets `-<cwd basename>` appended.\n - `per-project-tagged` — shared bank plus `project:<cwd basename>` tags on retained memories.\n- Mnemopi bank scoping from `resolveBankScope(...)`:\n - `global` — retain and recall use the shared bank.\n - `per-project` — retain and recall use the project bank.\n - `per-project-tagged` — retain writes project-local memories; recall also reads the shared bank.\n- Session scope:\n - tool-called retains are per-session work for the active backend;\n - persisted Hindsight memories are cross-session server-side bank data;\n - persisted Mnemopi memories are local SQLite data;\n - subagents alias parent memory state for both supported backends.\n\n## Side Effects\n- Filesystem\n - Hindsight: none for retained memories. No local memory file is written.\n - Mnemopi: writes to local SQLite under `mnemopi.dbPath`, defaulting beneath the agent memories directory (`mnemopi/mnemopi.db`) with one database file per scoped bank when needed.\n- Network\n - Hindsight: `POST /v1/default/banks/{bank_id}/memories` via `retainBatch(...)`, plus optional `PUT /v1/default/banks/{bank_id}` via `ensureBankMission(...)` before first write per bank/process.\n - Mnemopi: none unless configured embedding or LLM providers make calls during extraction.\n- Session state\n - Hindsight: appends to the in-memory `HindsightRetainQueue`, includes `metadata.session_id`, and shares parent state for subagents.\n - Mnemopi: writes through the session's scoped `Mnemopi` instance, includes `session_id`, `cwd`, and optional `context`, and shares scoped resources with subagents.\n- User-visible prompts / interactive UI\n - Hindsight async flush failures emit `session.emitNotice(\"warning\", ...)`; the model is not told.\n - Mnemopi write failures are logged by `rememberInScope(...)`; the tool response does not expose per-item failures.\n- Background work / cancellation\n - Hindsight flush runs later on timer, queue-size threshold, `agent_end`, backend `enqueue(...)`, or backend `clear(...)`.\n - Mnemopi fact/entity extraction may continue in the Mnemopi runtime; backend `enqueue(...)` calls `flushExtractions()` before sleeping sessions.\n\n## Limits & Caps\n- Input schema requires `items.length >= 1`.\n- Tool availability requires `memory.backend` to be `\"hindsight\"` or `\"mnemopi\"`; default `memory.backend` is `\"off\"`.\n- Hindsight queue flush threshold: `RETAIN_FLUSH_BATCH_SIZE = 16`.\n- Hindsight queue debounce: `RETAIN_FLUSH_INTERVAL_MS = 5_000`.\n- Hindsight queue writes use `retainBatch(..., { async: true })`; the client does not wait for server-side consolidation.\n- Hindsight auto-retain settings:\n - `hindsight.retainEveryNTurns` default `3`\n - `hindsight.retainOverlapTurns` default `2`\n - `hindsight.retainContext` default `\"omp\"`\n - `hindsight.retainMode` default `\"full-session\"`\n- Mnemopi retain settings:\n - `mnemopi.retainEveryNTurns` default `4`\n - `mnemopi.autoRetain` controls automatic retention of completed conversation turns\n - `mnemopi.scoping` selects `global`, `per-project`, or `per-project-tagged`\n\n## Errors\n- Throws `Mnemopi backend is not initialised for this session.` when `memory.backend == \"mnemopi\"` but no state exists.\n- Throws `Hindsight backend is not initialised for this session.` when `memory.backend == \"hindsight\"` but no state exists.\n- Hindsight queue enqueue on disposed state throws `Hindsight retain queue is closed.`\n- Hindsight flush-time API failures are caught in `HindsightRetainQueue.#doFlush(...)`, logged, and converted into a warning notice instead of a tool error.\n- Hindsight mission creation failures are swallowed in `ensureBankMission(...)`; writes continue.\n- Mnemopi `remember(...)` failures are caught in `MnemopiSessionState.rememberInScope(...)`, logged, and not rethrown to the tool caller.\n\n## Notes\n- Hindsight storage is server-side. `hindsightBackend.clear(...)` only clears local cache/state and warns that upstream deletion must happen in Hindsight UI or `deleteBank`.\n- Mnemopi storage is local SQLite. `mnemopiBackend.clear(...)` removes the scoped database files for the active configuration.\n- Hindsight auto-retain uses the same bank but a different path than this tool: `retainSession(...)` extracts plain user/assistant transcript, strips `<memories>` / `<mental_models>` blocks, and calls single-item `retain(...)`.\n- Mnemopi auto-retain stores prepared transcripts with `source: \"coding-agent-transcript\"`, `importance: 0.65`, `veracity: \"unknown\"`, and `memoryType: \"episode\"`.\n- Hindsight mental-model bootstrap lives in the shared backend: `HindsightSessionState.runMentalModelLoad(...)` optionally resolves seeds, creates missing models, then caches a rendered `<mental_models>` block for prompt injection.\n- Built-in Hindsight seeds are `user-preferences`, `project-conventions`, and `project-decisions`. `projectTagged: true` seeds inherit the active scope's retain tags; untagged seeds read the whole bank.\n- Hindsight mental-model defaults: `hindsight.mentalModelsEnabled = true`, `hindsight.mentalModelAutoSeed = true`, `hindsight.mentalModelRefreshIntervalMs = 5 * 60 * 1000`, `hindsight.mentalModelMaxRenderChars = 16_000`. First-turn loading waits up to `MENTAL_MODEL_FIRST_TURN_DEADLINE_MS = 1500`.\n- Hindsight seed lifecycle is create-only. Changing `packages/coding-agent/src/hindsight/seeds.json` does not mutate existing server-side models.\n- `recall.md` and `reflect.md` rely on the same backend selection and scoping behavior.\n",
|
|
93
|
+
"tools/rewind.md": "# rewind\n\n> End an active checkpoint by pruning exploratory context and retaining a concise report.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/checkpoint.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/rewind.md`\n- Key collaborators:\n - `packages/coding-agent/src/session/agent-session.ts` — validates pending rewind state, applies the actual rewind, and injects the retained report.\n - `packages/coding-agent/src/session/session-manager.ts` — branches the persisted session tree and appends persisted summary/report entries.\n - `packages/coding-agent/src/session/messages.ts` — converts persisted `branch_summary` entries into LLM-visible branch-summary messages on rebuilt context.\n - `packages/coding-agent/src/tools/index.ts` — registers the tool and shares the `checkpoint.enabled` gate.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `report` | `string` | Yes | Investigation findings. `execute()` trims it and rejects the empty result. |\n\n## Outputs\nThe tool returns a single text result plus structured details:\n\n- text body:\n - `Rewind requested.`\n - `Report captured for context replacement.`\n- `details`:\n - `report: string` — trimmed report text\n - `rewound: true`\n\nThe returned tool result is not the final rewind. `AgentSession` waits until `turn_end`, then applies the rewind side effects asynchronously.\n\n## Flow\n1. `RewindTool.createIf()` in `packages/coding-agent/src/tools/checkpoint.ts` hides the tool from subagents.\n2. `RewindTool.execute()` rejects subagent calls with `ToolError(\"Checkpoint not available in subagents.\")`.\n3. It rejects calls with no active checkpoint using `ToolError(\"No active checkpoint.\")`.\n4. It trims `params.report`; if empty, it throws `ToolError(\"Report cannot be empty.\")`.\n5. It returns a `toolResult()` with `details.report` and `details.rewound = true`.\n6. On `tool_execution_end`, `AgentSession` extracts the report from `details.report` or the first text content block and stores it in `#pendingRewindReport`.\n7. On `turn_end`, if `#pendingRewindReport` is set, `AgentSession.#applyRewind()` runs.\n8. `#applyRewind()` computes `safeCount = clamp(checkpointMessageCount, 0, agent.state.messages.length)` and calls `agent.replaceMessages(agent.state.messages.slice(0, safeCount))`.\n9. It then calls `sessionManager.branchWithSummary(checkpointEntryId, report, { startedAt })`. That moves the persisted session leaf back to the checkpoint entry and appends a new `branch_summary` entry whose `summary` is the rewind report.\n10. If `checkpointEntryId` no longer resolves, it logs a warning and falls back to `branchWithSummary(null, report, { startedAt })`, branching from root instead.\n11. `#applyRewind()` appends a hidden in-memory custom message `{ customType: \"rewind-report\", content: report, display: false }` and persists the same payload through `sessionManager.appendCustomMessageEntry(\"rewind-report\", ...)` with `details = { startedAt, rewoundAt }`.\n12. Finally it clears `#checkpointState` and `#pendingRewindReport`.\n\n## Modes / Variants\n- Normal rewind: checkpoint entry exists; session history branches from that exact entry.\n- Fallback rewind: checkpoint entry ID is missing from the current session tree; rewind branches from root and logs a warning.\n- Immediate turn-end apply: rewind side effects happen only after the surrounding assistant turn finishes, not inside `RewindTool.execute()`.\n\n## Side Effects\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Replaces in-memory conversation history with the prefix ending at the checkpoint tool result.\n - Adds a hidden custom message `rewind-report` carrying the retained report.\n - Clears the active checkpoint state and pending rewind report.\n - Repositions the persisted session leaf to the checkpoint branch point and appends new session entries.\n- Filesystem\n - Persists the new `branch_summary` and `custom_message` entries into the session `.jsonl` file through normal `SessionManager` append persistence.\n - Session files are named `<ISO-timestamp-with-:-and-.-replaced>_<uuidv7>.jsonl` in the session directory; default directory selection is documented in `SessionManager.create()` as `~/.omp/agent/sessions/<encoded-cwd>/` when no override is passed.\n- User-visible prompts / interactive UI\n - The tool result itself is visible.\n - The persisted `branch_summary` becomes an LLM-visible `branchSummary` message when context is rebuilt from `SessionManager.buildSessionContext()`; `messages.ts` renders it as a user-role text message using `packages/agent/src/compaction/prompts/branch-summary-context.md`.\n - The persisted `rewind-report` custom message also participates in rebuilt LLM context because `custom_message` entries are converted through `createCustomMessage()`.\n- Background work / cancellation\n - Rewind application is deferred to `turn_end`. There is no separate job object or cancel handle.\n\n## Limits & Caps\n- Availability is gated by `checkpoint.enabled`, default `false`, in `packages/coding-agent/src/config/settings-schema.ts`.\n- Top-level sessions only.\n- Requires exactly one active checkpoint; there is no path to name or choose among multiple checkpoints.\n- Report text must be non-empty after `trim()`.\n- Rewind restores only the message prefix recorded by `checkpointMessageCount`; there is no file restore, artifact restore, blob restore, or process restore path.\n- Persisted report/summary content is still subject to the global session persistence cap `MAX_PERSIST_CHARS = 500_000` in `packages/coding-agent/src/session/session-manager.ts`.\n\n## Errors\n- `ToolError(\"Checkpoint not available in subagents.\")` — thrown for subagent sessions.\n- `ToolError(\"No active checkpoint.\")` — thrown when no checkpoint state is present.\n- `ToolError(\"Report cannot be empty.\")` — thrown when the trimmed report is empty.\n- Missing checkpoint entry IDs during apply do not fail the tool call; `#applyRewind()` catches the error, logs `Rewind branch checkpoint missing, falling back to root`, and branches from root.\n\n## Notes\n- Checkpoint selection is implicit. `rewind` always targets the single `#checkpointState` captured by the last successful `checkpoint`; there is no checkpoint list, label, or ID parameter.\n- Restored state is transcript/session-tree state only:\n - in-memory `agent.state.messages` prefix up to `checkpointMessageCount`\n - persisted session leaf reset to `checkpointEntryId` or root fallback\n - retained rewind report as `branch_summary` and hidden `rewind-report` custom message\n- Not restored:\n - filesystem contents\n - git state\n - artifacts under `packages/coding-agent/src/session/artifacts.ts`\n - blob-store payloads under `packages/coding-agent/src/session/blob-store.ts`\n - prompt history rows in `packages/coding-agent/src/session/history-storage.ts`\n - auth or other agent storage in `packages/coding-agent/src/session/agent-storage.ts`\n- There is no concurrent-edit reconciliation. If code or session-adjacent state changes during the checkpoint window, rewind does not merge or revert them; it only drops conversation context and rewires the session branch.\n- Rewind is not destructive to persisted session history. `branchWithSummary()` appends a new `branch_summary` entry and moves the leaf; it does not delete the abandoned path from the `.jsonl` session log. The active context is cut over to the new branch, but the old entries remain in session storage.\n",
|
|
94
|
+
"tools/search.md": "# search\n\n> Search file contents with a regex across files, directories, globs, and internal URLs.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/search.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/search.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/match-line-format.ts` — model-facing anchor formatting.\n - `packages/coding-agent/src/tools/path-utils.ts` — path normalization, glob splitting, internal URL resolution.\n - `packages/coding-agent/src/tools/file-recorder.ts` — file ordering for grouped output.\n - `packages/coding-agent/src/tools/grouped-file-output.ts` — grouped per-file text layout.\n - `packages/coding-agent/src/session/streaming-output.ts` — line truncation and final byte truncation.\n - `packages/coding-agent/src/config/settings-schema.ts` — default context lines.\n - `packages/natives/native/index.d.ts` — native `grep()` types exposed to TS.\n - `crates/pi-natives/src/grep.rs` — native regex/file search implementation.\n - `docs/natives-text-search-pipeline.md` — native search pipeline overview.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `pattern` | `string` | Yes | Regex pattern. `search.ts` trims it and rejects empty input. The native matcher enables multiline only when the pattern text contains a literal newline or the two-character sequence `\\\\n`. The model prompt explicitly documents literal-brace escaping such as ``interface\\\\{\\\\}``, although the native layer also auto-escapes braces that cannot be valid repetition quantifiers. |\n| `paths` | `string \\| string[]` | Yes | One file path, directory path, glob-like path, archive member, internal URL, or an array of those. Append a line-range selector such as `:50-100` or `:5-16,960-973` to a single file/archive/internal-resource input to constrain matches. Empty strings are rejected after trimming/quote stripping. Single entries accidentally joined with comma, semicolon, or whitespace are expanded only after existence validation; existing paths containing delimiters stay intact. Filesystem-backed internal URLs search their backing file; virtual internal resources search resolved text in memory. Internal URLs cannot contain glob characters. |\n| `i` | `boolean` | No | Case-insensitive search. Defaults to `false`. Passed to native `ignoreCase` or JS `RegExp` flags for virtual resources. |\n| `gitignore` | `boolean` | No | Respect `.gitignore` during directory scans. Defaults to `true`. Passed to native `gitignore`. |\n| `skip` | `number` | No | File-page offset for multi-file results. Defaults to `0`; `search.ts` floors finite numbers and rejects negative or non-finite values. Single-file searches ignore it because they do not paginate by file. |\n\n## Outputs\nThe tool returns a single text block in `content[0].text` plus structured `details`.\n\n- Match lines are formatted by `formatMatchLine()` as `*LINE:content` for matches and ` LINE:content` for context under a `¶PATH#TAG` header in hashline mode.\n - Hashline mode: `¶src/login.ts#1F2A`, `*5:content`, ` 9:content`.\n - Plain mode: `*5|content`, ` 9|content`.\n- Directory and multi-file results are grouped by file, with `# <path>#TAG` headings when editable hashline anchors are available and `# <path>` headings otherwise.\n- `details` may include:\n - `scopePath` — formatted search scope.\n - `matchCount`, `fileCount`, `files`, `fileMatches` — counts for the returned page.\n - `fileLimitReached` — more matching files remain beyond the current 20-file page.\n - `perFileLimitReached` — a hot file was trimmed to the per-file match cap.\n - `linesTruncated` — one or more matched lines were shortened to `512` chars plus `…`.\n - `truncated` and `meta.truncation` — final text output was head-truncated by `truncateHead()`.\n - `displayContent` — TUI-only rendering text with `│` gutters instead of model anchors.\n - `missingPaths` — multi-path entries skipped because their base path did not exist.\n- No-match result text is `No matches found`, optionally followed by skipped missing-path or unreadable-archive notes.\n\n## Flow\n1. `SearchTool.execute()` validates and normalizes input in `packages/coding-agent/src/tools/search.ts`:\n - trims `pattern`, rejects empty patterns;\n - normalizes `skip` to a non-negative integer;\n - expands delimiter-flattened `paths` entries with `expandDelimitedPathEntries()`, keeping existing delimiter-containing paths intact, accepting comma/semicolon splits when at least one part resolves, and accepting whitespace splits only when every part resolves;\n - peels any line-range selector from each resulting entry;\n - reads `search.contextBefore` and `search.contextAfter` from session settings (`1` and `3` by default);\n - enables multiline only when `pattern` contains `\\n` or an actual newline.\n2. Each `paths` entry is normalized with `normalizePathLikeInput()` again during shared scope resolution; this is a no-op for entries already normalized by delimiter expansion.\n3. Archive member paths such as `bundle.zip:src/foo.ts` are materialized to temporary UTF-8 scratch files before native grep. Binary or non-UTF-8 archive members are reported as skipped/unreadable.\n4. Internal URLs are resolved before filesystem scope resolution:\n - glob metacharacters (`*`, `?`, `[`, `{`) are rejected for internal URLs;\n - resources with `sourcePath` are searched through their backing file;\n - resources without `sourcePath` are searched in memory with JavaScript `RegExp`;\n - `omp://` expands to every embedded documentation file via URL completion;\n - immutable sources are tracked so output can suppress editable hashline numbered output per file.\n5. For multi-path calls, `partitionExistingPaths()` skips only ENOENT entries. If every filesystem entry is missing and no virtual internal resources remain, the tool errors.\n6. Path resolution branches:\n - one entry: `parseSearchPath()` splits `basePath` and optional glob;\n - multiple entries: `resolveExplicitSearchPaths()` computes a common base directory, brace-union glob, exact-file list, or degenerate-root target list.\n7. Line-range selectors are validated after path/archive/internal resolution. They are allowed only for single files, archive members, or virtual resources; glob/directory line-range selectors error.\n8. `search.ts` stats the resolved base path to decide file vs directory behavior.\n9. It calls native `grep()` from `@oh-my-pi/pi-natives` with:\n - `pattern`, `ignoreCase`, `multiline`, `gitignore`;\n - `hidden: true`;\n - `cache: false`;\n - `contextBefore` / `contextAfter` from settings;\n - `maxColumns: DEFAULT_MAX_COLUMN` (`512`);\n - `maxCount: INTERNAL_TOTAL_CAP` (`2000`);\n - `mode: content`.\n10. Native execution happens in `crates/pi-natives/src/grep.rs`:\n - `build_matcher()` sanitizes non-quantifier braces before regex compile;\n - if compile fails with unopened/unclosed-group errors, it retries after escaping previously unescaped parentheses;\n - directory scans use the grep pipeline described in `docs/natives-text-search-pipeline.md`.\n11. Search dispatch differs by resolved path set:\n - exact explicit files or degenerate-root multi-targets: JS loops over targets and merges `grep()` results itself;\n - single file/directory base: one `grep()` call handles native scanning.\n12. Virtual internal resources are searched in JS with `RegExp`; archive scratch paths and virtual paths are remapped back to user-facing selectors before rendering.\n13. JS output shaping then:\n - caps multi-file output to 20 files per page (`DEFAULT_FILE_LIMIT`), using `skip` as the next file offset;\n - caps matches per file to 20 for multi-file scopes and 200 for single-file scopes;\n - round-robins selected per-file matches so one file does not monopolize the page;\n - formats lines through `formatMatchLine()` for the model and `formatCodeFrameLine()` for TUI;\n - records non-truncated matched/context lines into the session file-read cache with `recordSparse()`.\n14. Final text is passed through `truncateHead(rawOutput, { maxLines: Number.MAX_SAFE_INTEGER })`, so the effective cap is the default byte cap from `streaming-output.ts`, not the default line cap.\n15. `toolResult()` attaches text plus limit/truncation metadata.\n\n## Modes / Variants\n1. **Single file path**\n - `grep()` searches one file.\n - Output is a flat list of match/context lines.\n - Visible limit is the first `200` matches after native matching and JS per-file capping.\n2. **Single directory path or single glob-like path**\n - `parseSearchPath()` may split the input into `path` + `glob`.\n - One native `grep()` scans the directory tree with `gitignore` and `hidden:true`.\n - Results are grouped into a 20-file page; use `skip` with the next file offset shown in the limit message.\n - JS round-robins the selected files' matches.\n3. **Multiple explicit paths/globs**\n - `resolveExplicitSearchPaths()` collapses them into a common base and either a brace-union glob, an explicit file list, or per-target searches when the only common base is the filesystem root.\n - Missing entries are skipped non-fatally unless all are missing.\n4. **Archive member paths**\n - Supported for UTF-8 text entries only. The member is extracted to a temporary scratch file for native grep, then displayed as `archive.ext:member`.\n5. **Internal URL paths**\n - Filesystem-backed resources search their resolved `sourcePath`.\n - Virtual resources without `sourcePath` search their resolved content in memory.\n - `omp://` expands to all embedded documentation files so it can be used as a docs search root.\n - No internal-URL globbing.\n - Immutable and virtual sources suppress editable hashline anchors.\n\n## Side Effects\n- Filesystem\n - Stats resolved search roots and input paths.\n - Reads matched files through native `grep()`.\n - Records sparse matched/context lines into the session file-read cache via `getFileReadCache(...).recordSparse(...)`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads session settings for context defaults.\n - Uses `session.internalRouter` to resolve internal URLs.\n - Populates tool `details.meta` with truncation/limit metadata.\n- Background work / cancellation\n - Wrapped in `untilAborted(signal, ...)` at the JS level.\n - `search.ts` does not pass `signal` or `timeoutMs` into native `grep()`, so native grep cancellation/timeouts are not used by this tool.\n\n## Limits & Caps\n- File page limit: `20` files (`DEFAULT_FILE_LIMIT` in `packages/coding-agent/src/tools/search.ts`).\n- Per-file match caps: `20` for multi-file scopes (`MULTI_FILE_PER_FILE_MATCHES`), `200` for single-file scopes (`SINGLE_FILE_MATCHES`).\n- Native/JS preselection cap: `2000` matches (`INTERNAL_TOTAL_CAP`).\n- Line truncation: `512` characters per emitted line (`DEFAULT_MAX_COLUMN` in `packages/coding-agent/src/session/streaming-output.ts`). Native grep marks truncated lines; JS reports `linesTruncated`.\n- Final text truncation: `truncateHead()` default byte cap `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`). `search.ts` overrides `maxLines` to `Number.MAX_SAFE_INTEGER`, so normal search output is byte-capped, not line-capped.\n- Context defaults: `search.contextBefore = 1`, `search.contextAfter = 3` in `packages/coding-agent/src/config/settings-schema.ts`.\n- Pagination: `skip` is a file-page offset for multi-file scopes. The result text says `Use skip=<N> for the next page` when more files remain.\n- Native directory-scan cache: available in `grep.rs`, but this tool always sets `cache: false`.\n\n## Errors\n- `Pattern must not be empty` when trimmed `pattern` is empty.\n- `Skip must be a non-negative number` for negative or non-finite `skip`.\n- `` `paths` must contain non-empty paths or globs `` when any normalized path is empty.\n- `Glob patterns are not supported for internal URLs: ...` for internal URL + glob metacharacters.\n- Line-range selector errors include `Line-range selector requires a single file, not a glob: ...`, `Line-range selector requires a single file: ... is a directory`, and `Path not found for line-range selector: ...`.\n- `Cannot search archive member(s): ...` when all archive selectors are unreadable, binary, or non-UTF-8.\n- `Path not found: ...` when a filesystem-backed resolved base path is missing, or when every multi-path filesystem entry is missing.\n- Virtual internal URL regex compile failures are reported as `Invalid regex: ...` from JavaScript `RegExp`; filesystem-backed regex failures beginning with `regex` or `regex parse error` are normalized to `Invalid regex: ...`.\n- Multi-file native scans skip per-file open/search failures inside `grep.rs`; the scan continues with surviving files.\n\n## Notes\n- The model-facing prompt documents Rust regex syntax for filesystem-backed searches and JavaScript `RegExp` for virtual internal URL content.\n- Native `build_matcher()` already auto-escapes braces that cannot be valid quantifiers, so patterns like `${platform}` become searchable instead of failing. Valid quantifiers like `a{2,4}` remain unchanged.\n- Native compile retry also escapes unescaped literal parentheses only after an unopened/unclosed-group parse error. It is a fallback, not a general parser mode.\n- Internal URLs are resolved before path existence checks. Backed resources become ordinary filesystem paths; virtual resources stay in memory and do not mint editable hashline anchors.\n- `hidden:true` is hard-coded in `search.ts`; there is no model-facing flag to exclude dotfiles.\n- `gitignore:false` only affects native directory traversal. It does not disable the tool's own path normalization or explicit-file handling.\n- When `paths` resolves to multiple exact files, each target uses the `2000` internal cap before JS grouping.\n- The section tag in hashline mode is a four-hex opaque snapshot tag from the session snapshot store; `search` records whole-file snapshots when possible and prints bare line numbers beneath the header.\n",
|
|
95
|
+
"tools/search_tool_bm25.md": "# search_tool_bm25\n\n> Search the hidden tool-discovery index and activate the top matches for the current session.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/search-tool-bm25.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/search-tool-bm25.md`\n- Key collaborators:\n - `packages/coding-agent/src/tool-discovery/tool-index.ts` — discoverable-tool metadata and BM25 index/search.\n - `packages/coding-agent/src/session/agent-session.ts` — session discovery mode, corpus assembly, activation, cache invalidation.\n - `packages/coding-agent/src/sdk.ts` — initial hiding of discoverable built-ins and prompt-time discoverable summary.\n - `packages/coding-agent/src/tools/index.ts` — tool-session discovery hooks, essential/discoverable load modes, registry wiring.\n - `packages/coding-agent/src/config/settings-schema.ts` — `tools.discoveryMode` and legacy `mcp.discoveryMode` settings.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `query` | `string` | Yes | Natural-language or keyword query. Trimmed before search; empty-after-trim is rejected. |\n| `limit` | `integer` | No | Max matches to return and activate. Minimum `1`. Defaults to `8` (`DEFAULT_LIMIT`). |\n\n## Outputs\n- Single-shot `AgentToolResult`.\n- Model-visible `content` is one text part containing JSON with:\n\n```json\n{\"query\":\"...\",\"activated_tools\":[\"...\"],\"match_count\":2,\"total_tools\":17}\n```\n\n- Runtime-only `details` carries the ranked matches used by the TUI renderer:\n - `query`, `limit`, `total_tools`\n - `activated_tools`: tool names activated by this call\n - `active_selected_tools`: cumulative discovered-tool selections still active\n - `tools`: array of match objects with\n - `name`\n - `label`\n - `description` (`tool.summary`; this is the only snippet-like field)\n - optional `server_name`\n - optional `mcp_tool_name`\n - `schema_keys`\n - `score` rounded to 6 decimals\n- The renderer shows a status line plus up to 5 collapsed tree items by default (`COLLAPSED_MATCH_LIMIT`), each with label, optional server name, score to 3 decimals, and truncated description. The ranked match list is not serialized into `content`.\n\n## Flow\n1. `SearchToolBm25Tool.createIf()` in `packages/coding-agent/src/tools/search-tool-bm25.ts` exposes the tool for explicit discovery modes (`\"mcp-only\"` / `\"all\"`) or legacy `mcp.discoveryMode === true`. The default `\"auto\"` mode is resolved later by `createAgentSession()` after MCP/extension tools are registered.\n2. `description` is rendered from `packages/coding-agent/src/prompts/tools/search-tool-bm25.md` via `renderSearchToolBm25Description()`, using the current discoverable-tool list plus per-server summary/count.\n3. `execute()` re-checks capability and settings:\n - missing discovery hooks -> `ToolError(\"Tool discovery is unavailable in this session.\")`\n - discovery disabled -> `ToolError(\"Tool discovery is disabled. Enable tools.discoveryMode or mcp.discoveryMode to use search_tool_bm25.\")`\n4. `query` is trimmed and validated; `limit` is defaulted/validated.\n5. `getDiscoverableToolSearchIndexForExecution()` fetches the cached generic search index from the session when available, otherwise falls back to the legacy MCP cache, otherwise rebuilds an index from the current discoverable-tool list.\n6. `getSelectedToolNames()` reads the current discovered selections so already-selected tools can be excluded from fresh results.\n7. `searchDiscoverableTools()` in `packages/coding-agent/src/tool-discovery/tool-index.ts` tokenizes the query, scores every document with BM25, sorts by descending score then `tool.name`, and returns up to `searchIndex.documents.length` results; `execute()` then filters already-selected names and slices to `limit`.\n8. If any matches remain, `activateTools()` activates all matched tool names through `session.activateDiscoveredTools()` or legacy `activateDiscoveredMCPTools()`.\n9. `details` is assembled from the activated names, current selected names, corpus size, and formatted matches; `content` is reduced to the compact JSON summary from `buildSearchToolBm25Content()`.\n10. `searchToolBm25Renderer` renders either:\n - the structured `details` view, or\n - a fallback text-only warning block if `details` is absent.\n\n## Modes / Variants\n- Discovery-mode gating:\n - `tools.discoveryMode = \"auto\"` (default): when the registered tool set has more than 40 tools, searches hidden MCP tools only; otherwise discovery stays off.\n - `tools.discoveryMode = \"all\"`: searches hidden discoverable built-ins plus hidden MCP tools.\n - `tools.discoveryMode = \"mcp-only\"`: searches hidden MCP tools only.\n - legacy `mcp.discoveryMode = true`: same as MCP-only.\n- Search-index source:\n - generic cached discoverable index from the session\n - legacy cached MCP index, cast to the generic shape\n - rebuilt ad hoc from the current discoverable-tool list if neither cache path works\n- Activation backend:\n - generic `activateDiscoveredTools()`\n - legacy `activateDiscoveredMCPTools()` fallback\n\n## Side Effects\n- Session state\n - Adds matched tools to the active session tool set through `activateDiscoveredTools()` / `activateDiscoveredMCPTools()`.\n - Updates discovered-tool selection state so repeated searches accumulate selections instead of replacing them.\n - Invalidates the cached discoverable search index when newly activated built-ins change the hidden corpus (`packages/coding-agent/src/session/agent-session.ts`).\n - Tool availability changes before the next model call in the same turn; the prompt text says this explicitly.\n- User-visible prompts / interactive UI\n - The tool description includes discoverable server summaries and total discoverable-tool count.\n - The TUI renderer shows ranked matches, but the model-visible text summary does not.\n\n## Limits & Caps\n- Default result cap: `8` (`DEFAULT_LIMIT` in `packages/coding-agent/src/tools/search-tool-bm25.ts`).\n- `limit` must be a positive integer; no tool-level upper bound beyond corpus size.\n- Renderer collapsed list cap: `5` (`COLLAPSED_MATCH_LIMIT`).\n- Renderer truncation widths:\n - label: `72` chars (`MATCH_LABEL_LEN`)\n - description: `96` chars (`MATCH_DESCRIPTION_LEN`)\n- BM25+ parameters in `packages/coding-agent/src/tool-discovery/tool-index.ts`:\n - `BM25_K1 = 1.2`\n - `BM25_B = 0.75`\n - `BM25_DELTA = 1.0`\n- Weighted corpus fields (`FIELD_WEIGHTS`):\n - `name`: `6`\n - `label`: `4`\n - `mcpToolName`: `4`\n - `serverName`: `2`\n - `summary`: `2`\n - each `schemaKey`: `1`\n- Summary fallback length for discoverable metadata: first `200` chars of `description` when no explicit summary exists (`getDiscoverableTool()` in `packages/coding-agent/src/tool-discovery/tool-index.ts`).\n\n## Errors\n- `execute()` throws `ToolError` for unavailable discovery hooks, disabled discovery mode, empty trimmed query, and non-positive/non-integer `limit`.\n- `searchDiscoverableTools()` throws `Error(\"Query must contain at least one letter or number.\")` if tokenization produces no letter/number tokens; `execute()` catches `Error` and rethrows `ToolError(error.message)`.\n- Empty corpus is not an error; search returns `[]`, activation is skipped, and the renderer message becomes either `No discoverable tools are currently loaded.` or `No matching tools found.`\n- `getDiscoverableToolsForDescription()` and `getDiscoverableToolSearchIndexForExecution()` swallow discovery-hook/cache errors and fall back to an empty corpus or rebuilt index.\n\n## Notes\n- The tool wire name stays `search_tool_bm25` for persisted-session back-compat, even though the source file is `search-tool-bm25.ts`.\n- Corpus composition is session-dependent and excludes already-active tools:\n - MCP entries come from `#discoverableMCPTools`, filtered to names not currently active, mapped with `summary = description`.\n - Built-in entries appear only in `\"all\"` mode and only for registry tools whose `loadMode === \"discoverable\"` and are not currently active.\n - Hidden/internal built-ins are intentionally excluded from the built-in corpus: `resolve`, `yield`, `report_finding`, `report_tool_issue` are called out in the `#collectDiscoverableBuiltinTools()` comment.\n- `DiscoverableToolSource` includes `\"extension\"` and `\"custom\"`, but `AgentSession.getDiscoverableTools()` currently assembles only built-in and MCP sources.\n- On startup, `packages/coding-agent/src/sdk.ts` resolves `\"auto\"` after the full registry exists and injects `search_tool_bm25` when the count exceeds 40. It hides non-essential discoverable built-ins only in `tools.discoveryMode = \"all\"`; defaults are `read`, `bash`, and `edit` unless `tools.essentialOverride` changes them.\n- Query tokenization is simple and deterministic: Unicode is NFKD-normalized, combining marks are dropped, acronym/camelCase and digit-to-capital boundaries are split, non-letter/non-number characters become spaces, tokens are lowercased, and only non-empty tokens survive.\n- Scores are rounded differently by surface: `details.tools[].score` keeps 6 decimals; the TUI line renders 3.\n",
|
|
96
|
+
"tools/ssh.md": "# ssh\n\n> Execute one remote command on a discovered SSH host.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ssh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ssh.md`\n- Key collaborators:\n - `packages/coding-agent/src/ssh/ssh-executor.ts` — runs `ssh`, captures output\n - `packages/coding-agent/src/ssh/connection-manager.ts` — master-connection reuse, host probing\n - `packages/coding-agent/src/ssh/sshfs-mount.ts` — optional `sshfs` mount side effect\n - `packages/coding-agent/src/discovery/ssh.ts` — discovers host configs\n - `packages/coding-agent/src/capability/ssh.ts` — canonical host shape\n - `packages/coding-agent/src/session/streaming-output.ts` — tail streaming, truncation, artifacts\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout clamp rules\n - `packages/utils/src/dirs.ts` — user/project ssh config paths\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `host` | `string` | Yes | Host name key from discovered SSH config entries, not an arbitrary hostname/IP. |\n| `command` | `string` | Yes | Remote command string passed to `ssh` as the remote command. |\n| `cwd` | `string` | No | Remote working directory. The tool prepends a shell-specific `cd`/`Set-Location` wrapper. |\n| `timeout` | `number` | No | Timeout in seconds. Default `60`; clamped to `1..3600`. |\n\n## Outputs\nThe tool returns a standard text tool result built in `packages/coding-agent/src/tools/ssh.ts`:\n\n- `content`: one text block containing combined remote stdout+stderr, or `\"(no output)\"` when empty.\n- `details.meta.truncation`: present when output exceeded the in-memory tail window; derived from the executor summary.\n\nStreaming behavior:\n\n- While the command runs, `onUpdate` receives tail-only text snapshots built from `TailBuffer` in `packages/coding-agent/src/session/streaming-output.ts`.\n- Final output is single-shot after process exit.\n\nSide-channel artifacts:\n\n- When session artifact allocation is available and output exceeds the spill threshold, full output is written to a session artifact file and the returned summary carries its `artifactId` internally.\n- The ssh tool itself does not print the `artifact://...` URI into the result text.\n\nFailure behavior:\n\n- Unknown host, missing host config, timeout, cancellation, SSH startup failure, key validation failure, or non-zero remote exit all surface as thrown `ToolError`s.\n- Non-zero remote exit includes captured output plus `Command exited with code N`.\n\n## Flow\n1. `loadSshTool()` in `packages/coding-agent/src/tools/ssh.ts` calls `loadCapability(sshCapability.id, { cwd: session.cwd })` to discover hosts.\n2. `packages/coding-agent/src/discovery/ssh.ts` loads host entries from, in this order: project managed ssh config, user managed ssh config, `ssh.json` in the repo root, `.ssh.json` in the repo root.\n3. `getSSHConfigPath(\"project\")` and `getSSHConfigPath(\"user\")` in `packages/utils/src/dirs.ts` resolve those managed files to `.omp/ssh.json` in the project and `~/.omp/agent/ssh.json` in the user config dir. This tool does not read `~/.ssh/config`.\n4. Capability loading deduplicates by host name with first item winning; provider order is priority-sorted and the SSH JSON provider registers at priority `5`.\n5. `loadHosts()` in `packages/coding-agent/src/tools/ssh.ts` builds `hostsByName` and drops later duplicates again with `if (!hostsByName.has(host.name))`.\n6. Tool description text is built from `packages/coding-agent/src/prompts/tools/ssh.md` plus an `Available hosts:` list. Each host entry calls `getHostInfoForHost()` to show detected shell/OS when cached; otherwise it renders `detecting...`.\n7. On execute, `SshTool.execute()` rejects any `host` not in the discovered host-name set.\n8. `ensureHostInfo()` in `packages/coding-agent/src/ssh/connection-manager.ts` ensures an SSH master connection exists, loads cached host info from disk if present, and probes remote OS/shell when cache is missing or stale.\n9. `buildRemoteCommand()` in `packages/coding-agent/src/tools/ssh.ts` prepends a cwd change when `cwd` is provided:\n - Unix-like or Windows compat shells: `cd -- '<cwd>' && <command>`\n - Windows PowerShell: `Set-Location -Path '<cwd>'; <command>`\n - Windows cmd: `cd /d \"<cwd>\" && <command>`\n10. `clampTimeout(\"ssh\", rawTimeout)` applies the `1..3600` second clamp from `packages/coding-agent/src/tools/tool-timeouts.ts`.\n11. `executeSSH()` in `packages/coding-agent/src/ssh/ssh-executor.ts` calls `ensureConnection(host)` again, opportunistically mounts the remote host root with `sshfs` if available, optionally wraps the command in `bash -c` or `sh -c` for Windows compat mode, then spawns `ssh` with `ptree.spawn`.\n12. Output from both stdout and stderr is piped into one `OutputSink`; chunks are sanitized and forwarded to streaming updates through `streamTailUpdates()`.\n13. On normal exit, the sink returns combined output plus truncation counters. On timeout or abort, `executeSSH()` returns `cancelled: true` and prefixes the output with a notice line such as `[SSH: ...]` or `[Command aborted: ...]`.\n14. `SshTool.execute()` converts `cancelled: true` into `ToolError`, converts non-zero exit codes into `ToolError`, otherwise returns the text result with truncation metadata.\n\n## Modes / Variants\n- **Tool unavailable**: `loadSshTool()` returns `null` when discovery finds no hosts, so the tool is not registered for that session.\n- **Unix-like target**: remote command is passed through directly, with optional `cd -- ... &&` prefix.\n- **Windows native shell**: cwd wrapper uses PowerShell `Set-Location` or cmd `cd /d`; command otherwise runs in the remote default Windows shell.\n- **Windows compat shell**: if host probing finds `bash` or `sh` on Windows, `executeSSH()` wraps the remote command as `bash -c '...'` or `sh -c '...'`. Host config can force compat on/off with `compat`.\n- **Cached vs probed host info**: shell/OS detection comes from in-memory cache, persisted JSON under the remote-host dir, or a fresh probe over SSH.\n- **Truncated vs untruncated output**: small output stays in memory; large output keeps only the last 50 KiB in memory and may spill full output to an artifact file.\n\n## Side Effects\n- Filesystem\n - Reads managed SSH config JSON plus legacy `ssh.json` / `.ssh.json`.\n - Validates private-key path existence and permissions before connecting.\n - Persists probed host info as JSON under the remote-host cache dir via `persistHostInfo()`.\n - May create the SSH control socket dir and, when `sshfs` exists, remote mount dirs.\n - May write full command output to a session artifact file.\n- Network\n - Opens SSH connections to the selected host.\n - May issue extra probe commands to detect OS/shell and compat shells.\n- Subprocesses / native bindings\n - Requires `ssh` on `PATH`; spawns it for connection checks, master startup, probing, and command execution.\n - May call `sshfs`, `mountpoint`, `fusermount`/`fusermount3`, or `umount`.\n - Sanitizes streamed text with `@oh-my-pi/pi-natives` text sanitization.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Uses session artifact allocation when available.\n - Registers postmortem cleanup hooks for SSH master connections and sshfs mounts.\n - Tool concurrency is `exclusive`, so the agent scheduler should not run multiple ssh tool calls concurrently.\n- Background work / cancellation\n - Process spawn receives the tool `AbortSignal`.\n - Cancellation/timeout ends the running ssh process and returns a cancelled result that the tool turns into an error.\n\n## Limits & Caps\n- Timeout defaults/clamps: `default=60`, `min=1`, `max=3600` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- Output tail window: `DEFAULT_MAX_BYTES = 50 * 1024` in `packages/coding-agent/src/session/streaming-output.ts`.\n- Output sink spill threshold defaults to the same `50 KiB`; once exceeded, only the tail remains in memory.\n- SSH master reuse persistence: `ControlPersist=3600` in `packages/coding-agent/src/ssh/connection-manager.ts` and `packages/coding-agent/src/ssh/sshfs-mount.ts`.\n- SSH host info schema version: `HOST_INFO_VERSION = 2` in `packages/coding-agent/src/ssh/connection-manager.ts`; stale cache entries are reprobed.\n- Streaming tail buffer compacts after more than `10` pending chunks (`MAX_PENDING`) before trimming.\n\n## Errors\n- `Unknown SSH host: ... Available hosts: ...` when the model passes a host name not present in discovery.\n- `SSH host not loaded: ...` if the discovered-name set and `hostsByName` map diverge.\n- `ssh binary not found on PATH` when `ssh` is unavailable.\n- `SSH key not found: ...`, `SSH key is not a file: ...`, or `SSH key permissions must be 600 or stricter: ...` from key validation.\n- `Failed to start SSH master for <target>: <stderr>` when control-master startup fails.\n- Non-zero remote command exit becomes `ToolError` with captured output and `Command exited with code N`.\n- Timeout becomes a cancelled result with output notice `[SSH: <timeout message>]`, then `ToolError`.\n- Abort becomes a cancelled result with output notice `[Command aborted: <message>]`, then `ToolError`.\n- `sshfs` mount failures are logged and ignored in `executeSSH()`; they do not fail the tool call.\n- Discovery parse problems do not fail tool loading; they become capability warnings. If all sources are empty/invalid, the tool simply does not load.\n\n## Notes\n- Host discovery is JSON-based only. The tool does not parse OpenSSH config files.\n- Discovery expands environment variables recursively in the parsed JSON and expands `~` in `key`/`keyPath`.\n- Host names are capability keys; the model must pass the config key, not the raw hostname.\n- Commands run without a PTY. `executeSSH()` uses `ptree.spawn(..., { stdin: \"pipe\", stderr: \"full\" })` and does not request an interactive terminal.\n- The tool exposes `cwd` but no `env`, `pty`, upload, download, or explicit file-transfer fields.\n- Lower layers support an `artifactId` for full output and a `remotePath` mount target, but `SshTool.execute()` does not expose those knobs.\n- Both stdout and stderr are merged into one output stream; ordering is whatever arrives through the two streams.\n- `StrictHostKeyChecking=accept-new` and `BatchMode=yes` are always set for connection checks, master startup, and command runs.\n- Connection reuse is keyed by discovered host name, not by raw target tuple alone.\n- `closeAllConnections()` and sshfs unmount cleanup run through postmortem hooks, not per-call teardown.\n",
|
|
97
|
+
"tools/task.md": "# task\n\n> Spawn subagents to work in the background — one per call, or a `tasks[]` batch per call (`task.batch`, default on).\n\n## Source\n- Entry: `packages/coding-agent/src/task/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/task.md`\n- Key collaborators:\n - `packages/coding-agent/src/task/types.ts` — dynamic schema, progress/result types, output caps.\n - `packages/coding-agent/src/task/discovery.ts` — discover project/user/plugin/bundled agents.\n - `packages/coding-agent/src/task/agents.ts` — bundled agent definitions and frontmatter parsing.\n - `packages/coding-agent/src/task/executor.ts` — create child sessions, run subagents, collect output, hand finished sessions to the lifecycle manager.\n - `packages/coding-agent/src/registry/agent-lifecycle.ts` — idle-TTL parking and revival of finished subagents.\n - `packages/coding-agent/src/registry/agent-registry.ts` — process-global agent directory (`running | idle | parked | aborted`).\n - `packages/coding-agent/src/async/job-manager.ts` — background job registration, progress, and result delivery.\n - `packages/coding-agent/src/task/parallel.ts` — `Semaphore` used for the session-scoped concurrency bound.\n - `packages/coding-agent/src/task/isolation-backend.ts` — isolation backend resolution and platform fallback.\n - `packages/coding-agent/src/task/worktree.ts` — worktree / FUSE / ProjFS setup, patch capture, branch merge.\n - `packages/coding-agent/src/task/output-manager.ts` — session-scoped `agent://` id allocation.\n - `packages/coding-agent/src/task/name-generator.ts` — default AdjectiveNoun agent ids.\n - `packages/coding-agent/src/internal-urls/agent-protocol.ts` — resolve `agent://<id>` to saved subagent output.\n - `packages/coding-agent/src/internal-urls/history-protocol.ts` — resolve `history://<id>` to a concise transcript.\n - `packages/coding-agent/src/tools/index.ts` — tool registration and recursion-depth gating.\n - `packages/coding-agent/src/sdk.ts` — child-session router/tool wiring and per-subagent `AgentOutputManager`.\n - `docs/task-agent-discovery.md` — deeper discovery and precedence notes.\n\n## Inputs\n\nThe wire schema is shape-swapped by `task.batch` (default on). One unit of work is the task item `{ id?, description?, assignment, isolated? }` (`isolated` only when `task.isolation.mode` is not `none`):\n\n- **Batch shape** (`task.batch` on): `{ agent, context, tasks: item[] }` — one subagent per item, all spawned in parallel as independent background jobs. `context` is **required** shared background rendered into every spawned subagent's system prompt (`CONTEXT` section); `isolated` is per item.\n- **Flat shape** (`task.batch` off): `{ agent, ...item }` — exactly one spawn per call. Shared background goes into a `local://` file (e.g. `local://ctx.md`) that each assignment references; subagents share the parent's `local://` root.\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `agent` | `string` | Yes | Agent type to spawn (both shapes). |\n| `context` | `string` | Yes (batch) | Shared background prepended to every spawn of the call via the subagent system prompt. Rejected when `task.batch` is off. |\n| `tasks` | `array` | Yes (batch) | One task item per subagent. Provided ids must be unique within the call (case-insensitive). Rejected when `task.batch` is off. |\n| `id` | `string` | No | Stable agent id, schema max length 48. Defaults to a generated AdjectiveNoun name. Uniquified per session by `AgentOutputManager`. Item field in batch shape, top-level in flat shape. |\n| `description` | `string` | No | UI label only; the subagent never sees it. Item field in batch shape, top-level in flat shape. |\n| `assignment` | `string` | Yes | The work — complete, self-contained instructions. Empty-after-trim is rejected. Item field in batch shape, top-level in flat shape. |\n| `isolated` | `boolean` | No | Run in an isolated workspace and return patches. Exists only when `task.isolation.mode` is not `none`; per item in batch shape, top-level in flat shape. Isolated agents are torn down at completion — not revivable. |\n\nRuntime stays permissive: the flat form is accepted even while `task.batch` is on (internal callers such as the commit flow's `analyze_files`, and stale transcripts). The model only ever sees one shape.\n\nThere is no per-call `schema` parameter. Structured output comes from the agent definition's `output` frontmatter, the inherited parent session schema, or — for ad-hoc workflows — the eval bridge's `agent(prompt, schema)`.\n\n## Outputs\n\nThe tool returns one text block plus `details: TaskToolDetails`.\n\nImmediate (async) response — the normal case:\n- `content`: `` Spawned agent `<id>` (job `<jobId>`). The result will be delivered when it yields. ... `` plus a coordination hint (`irc` DM when enabled, otherwise `job`). A batch call instead returns `` Spawned N background agents using <agent>. ... `` with a per-agent `- `<id>` (job `<jobId>`)` listing.\n- `details`: `{ projectAgentsDir: null, results: [], totalDurationMs: 0, progress: [<seeded AgentProgress per spawn>], async: { state: \"running\", jobId, type: \"task\" } }`. A batch call keeps one shared `progress[]` snapshot; `async.jobId` is the first started job and `async.state` aggregates (\"running\" until every job settles, \"failed\" if any spawn failed).\n- Live progress keeps streaming into the same tool block via `onUpdate(...)`; each final result arrives later as an async-result injection into the parent conversation. The delivery text appends a follow-up hint: `` <id> is now idle — message it via `irc` to follow up; transcript at history://<id> `` (aborted variant points at the transcript only).\n\nSettled (sync-fallback or job-body) response:\n- `content`: summary rendered from `packages/coding-agent/src/prompts/tools/task-summary.md` with a preview capped at 5000 chars; `agent://<id>` holds the full output. A sync batch concatenates the per-spawn summaries.\n- `details.results`: one `SingleResult` per spawn; `usage`, `outputPaths` populated (aggregated across spawns for a sync batch).\n\n`SingleResult` includes:\n- identity: `index`, `id`, `agent`, `agentSource`, `description`, optional `assignment`\n- status: `exitCode`, optional `error`, optional `aborted`, optional `abortReason`, optional `retryFailure`\n- output: `output`, `stderr`, `truncated`, `durationMs`, `tokens`, `requests`, optional `contextTokens`/`contextWindow`\n- artifact metadata: `outputPath?`, `patchPath?`, `branchName?`, `nestedPatches?`, `outputMeta?`\n- extracted tool data: `extractedToolData?` from registered subprocess tool handlers such as `yield` and `report_finding`\n\nArtifacts and side channels:\n- Every subagent with an artifacts dir writes `<id>.md`; `agent://<id>` resolves to that file.\n- If the output file is JSON, `agent://<id>/<path>` and `agent://<id>?q=<query>` perform JSON extraction.\n- Each subagent gets `<id>.jsonl` session history when the parent persists artifacts; `history://<id>` renders it as a concise transcript (works for live and parked agents).\n- Isolated patch mode writes `<id>.patch` before merge.\n\n## Flow\n1. `TaskTool.create(...)` discovers agents once per cwd through a process-level memo (`discoverAgentsForCreate`) to render the dynamic prompt description.\n2. `execute(...)` repairs raw params (`repairTaskParams`), then validates: `schema` is always rejected; `tasks`/`context` are rejected unless `task.batch` is on; batch calls need a non-empty `tasks` (per-item assignments, unique provided ids), a non-empty shared `context`, and no top-level `assignment`; flat calls need `assignment`. The call is then normalized into its spawn list (`resolveSpawnItems`).\n3. Sync fallback only when the session has no `AsyncJobManager` (orphaned host) or the selected agent definition declares `blocking: true`; the call then runs `#executeSync(...)` inline under the session-scoped semaphore.\n4. Otherwise execution is always async:\n - agent ids are allocated up front via `AgentOutputManager.allocate(item.id || generateTaskName())`, one per spawn;\n - one `type: \"task\"` job per spawn is registered with `session.asyncJobManager` (`id` = agent id, `queued: true`, `ownerId` = caller agent id) and the tool returns immediately;\n - each job body acquires the session-scoped `Semaphore` (one per `TaskTool` instance, sized from `task.maxConcurrency` at first use), marks the job running, runs `#executeSync(...)` with that spawn's params, and reports progress through the shared `buildAsyncDetails`/`onUpdate`;\n - a failed or aborted run throws `TaskJobError` so the job lands `failed`, but the agent itself stays registered and interrogable.\n5. `#executeSync(...)` runs the spawn path (`#runSpawn`), which rediscovers agents from disk, so runtime resolution can differ from the create-time description.\n6. It resolves the requested agent, rejects unknown or settings-disabled agents, and enforces parent spawn policy plus `PI_BLOCKED_AGENT` self-recursion prevention.\n7. Output schema priority: agent frontmatter `output` → inherited parent session schema (the call itself never carries one).\n8. Plan mode swaps in an `effectiveAgent` with a read-only tool subset and plan-mode prompt; `runSubprocess(...)` receives the effective agent.\n9. If `isolated`, it requires a git repo (`getRepoRoot(...)` / `captureBaseline(...)`) and resolves the backend through isolation-backend resolution with platform fallback.\n10. Artifacts dir comes from the parent session file when available, otherwise a temp dir. When the session is executing an approved plan, the plan reference is handed to the subagent.\n11. Non-isolated spawns call `runSubprocess(...)` directly with parent cwd; isolated spawns run inside the isolation workspace, then commit to a branch (`mergeMode === \"branch\"`) or capture a patch, and always clean up the workspace.\n12. `runSubprocess(...)` creates a child agent session with an isolated settings snapshot (forcing `async.enabled = false` and `bash.autoBackground.enabled = false` — subagents are internally synchronous), child `agentId` equal to the allocated id, child internal URL router/`AgentOutputManager`, output schema, the shared `context` (batch calls) in the system prompt's `CONTEXT` section, and the IRC peer roster in the system prompt.\n13. Child tool availability: explicit `agent.tools` if provided; auto-add `task` when the agent has `spawns` and depth allows; strip `task` at `task.maxRecursionDepth`; expand `exec` to `eval` + `bash`; strip parent-owned `todo`.\n14. The child must finish through the hidden `yield` tool; up to 3 reminder prompts, the last forcing `toolChoice = yield` when supported. `finalizeSubprocessOutput(...)` reconciles raw text, `yield` payloads, structured schemas, `report_finding` data, and abort states.\n15. End-of-run lifecycle (keep-alive, in `runSubprocess`'s finalizer):\n - hard abort (caller signal / wall-clock / budget) → registry status `aborted`, session disposed — terminal;\n - isolated run → status `parked` without a reviver (workspace is merged + cleaned, so the session is not revivable; transcript stays readable via `history://`), then session disposed and detached;\n - everything else (success and failure alike) → status `idle` with the live session attached, and `AgentLifecycleManager.global().adopt(id, { idleTtlMs, revive })` arms the park timer. The reviver reopens the session JSONL (park closed the writer, so the single-writer lock is taken cleanly).\n16. Lifecycle thereafter: `idle` agents are parked after `task.agentIdleTtlMs` (session disposed; `AgentRef` + session file retained); messaging (`irc`) or the Agent Hub revives them back to `idle`. `\"Main\"` is never parked.\n\n## Modes / Variants\n- Execution mode\n - Always-async background job — default; spawns go through `AsyncJobManager`.\n - Sync inline fallback — only when no job manager exists or the agent definition has `blocking: true`.\n- Batch mode (`task.batch`, default on)\n - on — `{ agent, context, tasks[] }`: one independent background job per item, required `context` shared across the call's spawns, `isolated` per item. Lifecycle, revival, and concurrency semantics are identical to N parallel single calls.\n - off — single spawn per call; `tasks`/`context` are rejected and removed from the schema.\n- Isolation backend: `none`, `worktree`, `fuse-overlay`, `fuse-projfs`.\n- Isolation merge strategy: patch mode (capture/apply root patches) or branch mode (commit to `omp/task/<id>`, cherry-pick into parent).\n- Agent source precedence: project custom agents, then user custom agents, then bundled agents (`explore`, `plan`, `designer`, `reviewer`, `task`, `quick_task`, `librarian`, `oracle`).\n\n## Side Effects\n- Filesystem\n - Writes `<id>.jsonl` and `<id>.md` under the session artifacts dir or a temp task dir; isolated patch mode writes `<id>.patch`.\n - Creates/removes worktrees or overlay mount directories; branch mode creates temporary worktrees and task branches.\n- Network\n - Child sessions may use whichever networked tools/models their active tool set permits.\n - MCP proxy tools can call existing parent MCP connections with a 60_000 ms timeout.\n- Subprocesses / native bindings\n - `fuse-overlayfs` and `fusermount`/`fusermount3` for FUSE isolation; ProjFS native bindings on Windows.\n - Git operations for baseline capture, patch apply, worktrees, branches, stash, cherry-pick, commits.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Creates child `AgentSession` instances with isolated settings snapshots; finished sessions stay registered in the process-global `AgentRegistry` as `idle`/`parked` until process teardown or explicit release.\n - Registers one async job per call in `session.asyncJobManager`; completion is injected into the parent as an async-result message.\n - Arms idle-TTL timers in `AgentLifecycleManager` (unref'd; they never hold the process open).\n - Emits `task:subagent:event`, `task:subagent:progress`, and `task:subagent:lifecycle` on the parent event bus.\n - Allocates session-scoped output ids through `AgentOutputManager` so `agent://` stays unique across invocations.\n - Shares the parent `local://` root and `ArtifactManager` with subagents.\n- Background work / cancellation\n - `job cancel` (or parent tool-call abort) cancels the job; a hard-aborted run lands `aborted` and is torn down.\n - Missing-`yield` recovery sends up to three internal reminder prompts to the child session.\n\n## Limits & Caps\n- Concurrency: one session-scoped `Semaphore` sized from `task.maxConcurrency` at first use (later setting changes do not resize it) bounds concurrent subagents across parallel `task` calls — both async job bodies and the sync fallback acquire it.\n- Idle TTL: `task.agentIdleTtlMs`, default `420_000` ms (7 min); `<= 0` disables parking and keeps idle sessions live until exit.\n- Per-subagent output truncation: `MAX_OUTPUT_BYTES = 500_000` and `MAX_OUTPUT_LINES = 5000` in `packages/coding-agent/src/task/types.ts` (overridable via `PI_TASK_MAX_OUTPUT_BYTES` / `PI_TASK_MAX_OUTPUT_LINES`). Full raw output is still written to `<id>.md`.\n- Progress coalescing: `PROGRESS_COALESCE_MS = 150`; recent-output tail: `RECENT_OUTPUT_TAIL_BYTES = 8 * 1024` (last 8 non-empty lines).\n- Missing-`yield` reminder retries: `MAX_YIELD_RETRIES = 3`; MCP proxy timeout: `MCP_CALL_TIMEOUT_MS = 60_000` — both in `packages/coding-agent/src/task/executor.ts`.\n- Agent id schema cap: `id` `maxLength: 48` in `packages/coding-agent/src/task/types.ts`. Prompt text says ids should be `≤32` chars; this mismatch is real.\n- Soft request budget (`task.softRequestBudget`) and wall clock (`task.maxRuntimeMs`) apply to every spawn.\n- Recursion depth gate: `task.maxRecursionDepth`; `packages/coding-agent/src/tools/index.ts` hides the `task` tool at or beyond the limit, and `runSubprocess(...)` also strips child `task` access at max depth.\n- Final inline summary preview uses `fullOutputThreshold = 5000` chars in `packages/coding-agent/src/task/index.ts`; `agent://<id>` points to the full artifact.\n\n## Errors\n- Parameter validation failures are returned as normal tool text with empty `results`:\n - `schema` (never accepted)\n - `tasks` / `context` while `task.batch` is disabled\n - missing/empty `agent`\n - batch calls: missing/empty `tasks`, an item without `assignment`, duplicate provided ids, missing shared `context`, top-level `assignment` alongside `tasks`\n - flat calls: missing/empty `assignment`\n - unknown or settings-disabled agent, spawn-policy denial, requesting `isolated` while isolation mode is `none`\n- Isolated execution without a git repo returns `Isolated task execution requires a git repository. ...`; backend resolution can hard-error (ProjFS init) or warn and fall back to `worktree`.\n- Job registration failure returns `Failed to start background task job(s): ...`; a batch that schedules only some jobs reports the failed ids in the immediate text and keeps the started ones running.\n- Child failures surface as `SingleResult.exitCode = 1` with `stderr`/`error` populated; the async job is marked failed but the delivery text still carries the output plus a follow-up/transcript hint.\n- If the child omits `yield`, `finalizeSubprocessOutput(...)` injects warnings such as `SYSTEM WARNING: Subagent exited without calling yield tool after 3 reminders.`\n- `agent://<id>` resolution errors are model-visible when another tool reads them: no session, no artifacts dir, missing id, conflicting extraction syntax, or invalid JSON for extraction.\n\n## Notes\n- Parallelism is parallel `task` calls in one assistant message — or, with `task.batch`, a `tasks[]` batch in one call; either way the session-scoped semaphore bounds the fan-out and each spawn is an independent background job.\n- Shared background convention without batch mode: write it once to a `local://` file and reference that path in each assignment — subagents share the parent's `local://` root. With `task.batch`, the required `context` parameter carries the shared background directly into each spawn's system prompt.\n- Prefer messaging an existing agent (`irc`) over a fresh spawn for follow-up work: it already holds the relevant context. `irc` op:\"list\" shows idle/parked candidates; messaging a parked agent revives it. `history://<id>` shows what an agent has done.\n- `irc` availability is derived, not configured (`isIrcEnabled` in `packages/coding-agent/src/tools/irc.ts`): it exists exactly when there is someone to message — the session can spawn subagents, or it is a subagent itself. Messaging is the only follow-up path to a finished subagent, so task without irc would strand idle agents.\n- Subagents are internally synchronous: the executor forces `async.enabled = false` and `bash.autoBackground.enabled = false` in the child settings snapshot, so there are no fire-and-forget grandchildren.\n- Agent discovery precedence is first-wins by exact name: project dirs before user dirs within a source family, plugin agent dirs after config dirs, bundled agents last. Create-time discovery is memoized per cwd for the prompt description; execution-time discovery stays fresh.\n- Child sessions do not inherit conversation history. Built-in carry-over is the workspace tree/skills/context files, the shared `local://` root, and the approved-plan reference when one exists.\n- When the parent passes `mcpManager`, child sessions disable standalone MCP discovery and get proxy tools that reuse parent connections.\n- Branch-mode merge temporarily stashes the parent repo before cherry-picking; a stash-pop conflict is treated as merge failure and leaves recovery state behind. Patch mode only applies the combined root patch when `git.patch.canApplyText(...)` succeeds; failures leave the `.patch` artifact for manual handling.\n- Nested git repos are diffed independently inside isolated workspaces and merged separately with `applyNestedPatches(...)`.\n- `agent://` ids are name-based (`Task` first, `Task-2`/`Task-3` only when the name repeats, nested like `Parent.Child`) by `AgentOutputManager`; this is what prevents artifact collisions across repeated or nested invocations.\n",
|
|
98
|
+
"tools/todo.md": "# todo\n\n> Applies ordered mutations to the session todo list and returns a text summary plus the full phase/task state.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/todo.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/todo.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/index.ts` — registers tool, exposes session hooks, gates availability.\n - `packages/coding-agent/src/modes/controllers/event-controller.ts` — updates the visible todo UI on tool completion.\n - `packages/coding-agent/src/session/agent-session.ts` — stores cached phases, auto-clears done/dropped tasks, emits failure reminders.\n - `packages/coding-agent/src/modes/controllers/todo-command-controller.ts` — `/todo` command path, custom-entry persistence, transcript reminder injection.\n - `packages/coding-agent/src/tools/render-utils.ts` — collapsed-preview cap for renderer trees.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `TodoOpEntry[]` | Yes | Ordered operations to apply. `minItems: 1`.\n\n### `TodoOpEntry`\n\n| Op | Required fields | Optional fields | Effect |\n| --- | --- | --- | --- |\n| `init` | `list` | None of the other fields are used | Replaces the entire list with `list`; every new task starts `pending` before normalization. |\n| `start` | `task` | None | Marks one task `in_progress`; any other `in_progress` task is demoted to `pending`. |\n| `done` | `task` or `phase` or neither | None | Marks the target task, phase, or all tasks `completed`. |\n| `drop` | `task` or `phase` or neither | None | Marks the target task, phase, or all tasks `abandoned`. |\n| `rm` | `task` or `phase` or neither | None | Removes the target task, clears the phase's task list, or clears all task lists. |\n| `append` | `phase`, `items` | None | Appends new `pending` tasks to a phase; creates the phase if missing. |\n| `note` | `task`, `text` | None | Appends one trimmed note string to the task's `notes` array. |\n\n### Fields used inside ops\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"init\" | \"start\" | \"done\" | \"rm\" | \"drop\" | \"append\" | \"note\"` | Yes | Operation discriminator. |\n| `list` | `{ phase: string; items: string[] }[]` | For `init` | Full replacement payload. Each `items` array has `minItems: 1`. |\n| `task` | `string` | For `start`; for task-targeted `done`/`drop`/`rm`/`note` | Exact task content match. |\n| `phase` | `string` | For `append`; for phase-targeted `done`/`drop`/`rm` | Exact phase name match, except `append` lazily creates a missing phase. |\n| `items` | `string[]` | For `append` | Tasks to append. `minItems: 1`. |\n| `text` | `string` | For `note` | Note text; trailing whitespace is stripped before storing. Empty-after-trim is rejected. |\n\n## Outputs\nThe tool returns a single-shot `AgentToolResult`:\n\n- `content`: one text part containing the summary from `formatSummary(...)`.\n - Empty final state with no errors: `Todo list cleared.`\n - Non-empty final state: remaining-item list, current phase progress, then a per-phase tree.\n - If the active `in_progress` task has notes, the summary includes the note bodies inline.\n - If any op produced validation/runtime errors, the summary starts with `Errors: ...`; the returned tool result is marked `isError: true` and still includes the mutated state.\n- `details`:\n - `phases: TodoPhase[]`\n - `storage: \"session\" | \"memory\"`\n - `completedTasks?: TodoCompletionTransition[]` when a task changed from non-completed to `completed` during the batch\n\n`TodoPhase` / `TodoItem` state model:\n\n- `TodoPhase`: `{ name: string, tasks: TodoItem[] }`\n- `TodoItem`: `{ content: string, status: \"pending\" | \"in_progress\" | \"completed\" | \"abandoned\", notes?: string[] }`\n\nThe TUI renderer (`todoToolRenderer`) merges call and result into one transcript block, renders phases as a tree, shows note counts as superscripts, and renders the note bodies only for the current `in_progress` task. Collapsed transcript previews cap tree items at `PREVIEW_LIMITS.COLLAPSED_ITEMS` (`8`).\n\n## Flow\n1. `TodoTool.execute(...)` clones the current cached phases from `session.getTodoPhases?.() ?? []` (`packages/coding-agent/src/tools/todo.ts`).\n2. `applyParams(...)` walks `params.ops` in order and applies each entry with `applyEntry(...)`.\n3. Each op mutates the working phase array:\n - `initPhases(...)` rebuilds the list from scratch.\n - `start` resolves a task by exact `content`, demotes every other `in_progress` task to `pending`, then marks the target `in_progress`.\n - `done` / `drop` use `getTaskTargets(...)` to target one task, one phase, or every task.\n - `rm` removes one task, clears one phase's `tasks`, or clears all phases' task arrays.\n - `appendItems(...)` resolves or creates the target phase and pushes new `pending` tasks unless the same task content already exists anywhere.\n - `note` trims trailing whitespace, rejects empty text, and appends the note to `task.notes`.\n4. Missing task/phase references are recorded in an `errors` array by `resolveTaskOrError(...)` / `resolvePhaseOrError(...)`; execution continues through the rest of the batch.\n5. After the full batch, `normalizeInProgressTask(...)` enforces the single-active-task invariant:\n - if multiple tasks are `in_progress`, only the first stays active and the rest become `pending`;\n - if none are `in_progress`, the first `pending` task in phase/task order is auto-promoted to `in_progress`.\n6. `execute(...)` stores the normalized phases with `session.setTodoPhases?.(...)` and reports `storage` as `\"session\"` when `session.getSessionFile()` exists, else `\"memory\"`.\n7. `getCompletionTransitions(...)` compares the previous and updated phases; newly completed tasks are returned in `details.completedTasks`.\n8. The agent runtime also watches `todo` tool results in `packages/coding-agent/src/session/agent-session.ts`; successful results refresh cached todos, failed results inject a hidden next-turn reminder telling the model that todo progress is not visible until it retries.\n9. The event controller updates the visible todo UI from `result.details.phases` on success, or shows a warning on error (`packages/coding-agent/src/modes/controllers/event-controller.ts`).\n\n## Modes / Variants\n### State transitions\n\n| Current status | `start` | `done` | `drop` | `rm` | `append` | `note` |\n| --- | --- | --- | --- | --- | --- | --- |\n| `pending` | `in_progress` on target | `completed` | `abandoned` | Removed | New tasks enter as `pending` | No status change |\n| `in_progress` | Target stays `in_progress`; non-target active tasks become `pending` | `completed` | `abandoned` | Removed | No status change | No status change |\n| `completed` | Can be set back to `in_progress` if targeted | Stays `completed` | Becomes `abandoned` if targeted | Removed | No status change | No status change |\n| `abandoned` | Can be set back to `in_progress` if targeted | Becomes `completed` if targeted | Stays `abandoned` | Removed | No status change | No status change |\n\nNormalization then re-applies the single-active-task rule after the full op batch.\n\n### Op targeting rules\n- `done`, `drop`, `rm`:\n - `task` set: affect one exact-content task.\n - else `phase` set: affect every task in that exact-name phase.\n - else: affect every task in every phase.\n- `append` is the only op that creates a missing phase.\n- `note` only targets a single task.\n- `init` discards previous phases entirely.\n\n### Markdown round-trip helpers\nThe same file also exposes non-tool helpers used by `/todo`:\n- `phasesToMarkdown(...)` serializes phases as headings plus checklist items (`[ ]`, `[/]`, `[x]`, `[-]`) with blockquote note bodies.\n- `markdownToPhases(...)` parses that format, defaults orphan tasks into a `Todos` phase, accepts `>` as an `in_progress` marker and `~` as `abandoned`, and runs the same normalization step.\n\n## Side Effects\n- Filesystem\n - None in the tool itself.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Mutates the session todo cache through `setTodoPhases`.\n - `storage` reports whether the session has a backing session file, but the tool does not append a custom session entry itself.\n - Successful tool-result messages carry `details.phases`; `getLatestTodoPhasesFromEntries(...)` can reconstruct state later from those transcript entries.\n - Failed `todo` results cause `agent-session` to enqueue a hidden next-turn reminder (`customType: \"todo-error-reminder\"`).\n- User-visible prompts / interactive UI\n - Transcript block is rendered by `todoToolRenderer` and merged with the call line.\n - `event-controller` updates the visible todo panel from successful results.\n - On error, `event-controller` shows `Todo update failed...`; the visible panel may stay stale until a later successful call.\n- Background work / cancellation\n - `AgentSession.setTodoPhases(...)` schedules auto-clear timers for `completed` / `abandoned` tasks via `tasks.todoClearDelay`.\n\n## Limits & Caps\n- `ops` array: `minItems: 1` (`todoSchema`).\n- `init.list[*].items`: `minItems: 1`.\n- `append.items`: `minItems: 1`.\n- Renderer collapsed preview: `PREVIEW_LIMITS.COLLAPSED_ITEMS = 8` (`packages/coding-agent/src/tools/render-utils.ts`).\n- Auto-clear delay: `tasks.todoClearDelay` default `60` seconds; `< 0` disables auto-clear, `0` clears on the next microtask (`packages/coding-agent/src/session/agent-session.ts`).\n- Tool execution mode: `concurrency = \"exclusive\"`, `strict = true`, `loadMode = \"discoverable\"`.\n\n## Errors\n- Ordinary bad op payloads are accumulated as human-readable strings in `errors`; the tool still returns the mutated state, but marks the result `isError: true`.\n- Error strings come from the helpers in `packages/coding-agent/src/tools/todo.ts`, including:\n - `Missing list for init operation`\n - `Missing task content`\n - `Task \"...\" not found` with an extra empty-list hint when applicable\n - `Missing phase name`\n - `Phase \"...\" not found`\n - `Missing phase name for append operation`\n - `Missing items for append operation`\n - `Task \"...\" already exists`\n - `Missing text for note operation`\n- Because ops are processed in order, earlier errors do not roll back later ops.\n- Runtime-level tool failure is handled outside the tool body: `agent-session` injects a hidden reminder and the event controller warns the user that visible progress may be stale.\n- Idempotency is op-specific:\n - `init` is a full replacement; replaying the same payload yields the same state.\n - `start`, `done`, and `drop` are effectively idempotent on an existing target state, but `start` also demotes any other active task.\n - `rm` is not idempotent for targeted removals: the second call errors because the task or phase is gone.\n - `append` is not idempotent: duplicate task content is rejected with `Task \"...\" already exists`.\n - `note` is append-only and never idempotent; replaying it adds another note entry.\n\n## Notes\n- Task lookup is exact string equality inside the tool. The model-facing prompt says task content and phase names are identifiers and should stay unique; `append` enforces task uniqueness globally, but `init` does not validate duplicate task or phase names.\n- `findTaskByContent(...)` returns the first matching task across phases. Duplicate task contents make later targeted ops ambiguous.\n- `normalizeInProgressTask(...)` runs after the whole batch, not after each op. A single call can intentionally build an intermediate invalid state and rely on final normalization.\n- `storage: \"session\"` means the session has a session-file backing; it does not mean this tool wrote a durable custom entry.\n- Reload persistence differs by path:\n - plain `todo` calls survive in transcript tool-result details;\n - `/todo` command edits additionally append `customType: \"user_todo_edit\"` entries and inject a visible-to-model `<system-reminder>` developer message describing the manual edit.\n- On session resume, `AgentSession.#syncTodoPhasesFromBranch()` strips `completed` and `abandoned` tasks before restoring the cached list. The `/todo` command works around that by reading the latest transcript/custom-entry state so historical done/dropped tasks still appear to the user.\n- Tool availability is gated by `todo.enabled`, and the registry excludes it when `includeYield` is enabled (`packages/coding-agent/src/tools/index.ts`).\n- Subagents do not inherit `todo`; `packages/coding-agent/src/task/executor.ts` filters it out as a parent-owned tool.\n",
|
|
99
|
+
"tools/web_search.md": "# web_search\n\n> Run one web query through the first available search provider and return LLM-formatted answer, source URLs, and optional citations.\n\n## Source\n- Entry: `packages/coding-agent/src/web/search/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/web-search.md`\n- Key collaborators:\n - `packages/coding-agent/src/web/search/provider.ts` — lazy provider registry; availability chain.\n - `packages/coding-agent/src/web/search/types.ts` — unified `SearchResponse` / `SearchProviderError` types.\n - `packages/coding-agent/src/web/search/render.ts` — TUI renderer details type.\n - `packages/coding-agent/src/web/search/providers/base.ts` — provider interface and shared params contract.\n - `packages/coding-agent/src/web/search/providers/utils.ts` — credential lookup; source normalization.\n - `packages/coding-agent/src/web/search/providers/anthropic.ts` — Claude web-search provider.\n - `packages/coding-agent/src/web/search/providers/brave.ts` — Brave Search API adapter.\n - `packages/coding-agent/src/web/search/providers/codex.ts` — OpenAI Codex SSE adapter.\n - `packages/coding-agent/src/web/search/providers/exa.ts` — Exa API or MCP adapter.\n - `packages/coding-agent/src/web/search/providers/gemini.ts` — Gemini grounding SSE adapter.\n - `packages/coding-agent/src/web/search/providers/jina.ts` — Jina Reader search adapter.\n - `packages/coding-agent/src/web/search/providers/kagi.ts` — Kagi provider wrapper.\n - `packages/coding-agent/src/web/search/providers/kimi.ts` — Kimi search adapter.\n - `packages/coding-agent/src/web/search/providers/parallel.ts` — Parallel provider wrapper.\n - `packages/coding-agent/src/web/search/providers/perplexity.ts` — Perplexity API / OAuth adapter.\n - `packages/coding-agent/src/web/search/providers/searxng.ts` — self-hosted SearXNG adapter.\n - `packages/coding-agent/src/web/search/providers/synthetic.ts` — Synthetic search adapter.\n - `packages/coding-agent/src/web/search/providers/tavily.ts` — Tavily search adapter.\n - `packages/coding-agent/src/web/search/providers/zai.ts` — Z.AI remote MCP adapter.\n - `packages/coding-agent/src/web/parallel.ts` — Parallel search/extract HTTP client.\n - `packages/coding-agent/src/web/kagi.ts` — Kagi HTTP client.\n - `packages/coding-agent/src/tools/index.ts` — built-in tool registration and enable flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `query` | `string` | Yes | Search query. `executeSearch()` rewrites any `2020`-`2029` substring to the current year before dispatch. |\n| `recency` | `\"day\" \\| \"week\" \\| \"month\" \\| \"year\"` | No | Time filter. Only providers that implement it use it. Prompt text says Brave and Perplexity; code also maps it for Tavily and SearXNG. |\n| `limit` | `number` | No | Max results to return. Usually becomes the provider request's result-count parameter when `num_search_results` is absent. |\n| `max_tokens` | `number` | No | Passed through as `maxOutputTokens` / `max_tokens` only by Anthropic, Gemini, and Perplexity API-key mode. Ignored by the other providers. |\n| `temperature` | `number` | No | Passed through only by Anthropic, Gemini, and Perplexity API-key mode. Ignored by the other providers. |\n| `num_search_results` | `number` | No | Requested upstream search breadth. For most providers this is the same count used for returned sources. Perplexity is the only adapter that keeps it distinct from `limit`. |\n\n## Outputs\nThe tool returns a single text content block plus structured `details`.\n\n- `content`: `[{ type: \"text\", text: string }]`\n- `details`: `SearchRenderDetails` from `packages/coding-agent/src/web/search/render.ts`\n - `response: SearchResponse`\n - `error?: string`\n\n`text` is produced by `formatForLLM()` in `packages/coding-agent/src/web/search/index.ts`:\n\n- If `response.answer` exists, it is emitted first.\n- If sources exist, a `## Sources` section follows with a source count, then one entry per source:\n - `[n] <title> (<formatted age or published date>)`\n - ` <url>`\n - optional snippet line truncated to 240 chars.\n- If citations exist, a `## Citations` section follows with URL/title plus optional cited text truncated to 240 chars.\n- If related questions exist, a `## Related` bullet list follows.\n- If search queries exist, a `Search queries: <n>` section follows, capped to the first 3 queries and 120 chars each.\n\nFailure output is not thrown at the tool boundary when providers are unavailable or provider attempts fail. Instead the tool returns:\n\n- `content[0].text = \"Error: ...\"`\n- `details.response.provider = <last attempted provider> | \"none\"`\n- `details.error = ...`\n\nStreaming: none. `WebSearchTool.execute()` forwards its `AbortSignal` into `executeSearch()`, and `executeSearch()` passes it to providers. If the signal is aborted during fallback handling, `throwIfAborted(signal)` rethrows the cancellation instead of returning an `\"Error: ...\"` text result.\n\n## Flow\n1. `WebSearchTool.execute()` in `packages/coding-agent/src/web/search/index.ts` delegates directly to `executeSearch()`.\n2. `executeSearch()` chooses a provider list:\n - if `params.provider` is set and not `\"auto\"`, it loads that provider with `getSearchProvider()`; if `isAvailable()` returns true, the list is `[that provider]`, otherwise it falls back to `resolveProviderChain(\"auto\")`.\n - otherwise it calls `resolveProviderChain()` with the module-global preferred provider from `packages/coding-agent/src/web/search/provider.ts`.\n3. `resolveProviderChain()` lazily loads each provider module on demand, checks `isAvailable()`, and returns only available providers. If a preferred provider is set, it is tried first, then the static `SEARCH_PROVIDER_ORDER` excluding that provider.\n4. If no providers are available, `executeSearch()` returns `Error: No web search provider configured.` with `details.response.provider = \"none\"`.\n5. For each provider in order, `executeSearch()` calls `provider.search()` with:\n - `query` after year-rewrite,\n - `limit`, `recency`, `temperature`, `maxOutputTokens`, `numSearchResults`,\n - `systemPrompt` from `packages/coding-agent/src/prompts/tools/web-search.md`.\n6. On the first successful `SearchResponse`, `formatForLLM()` renders answer/sources/citations/related/search-queries into one text block and returns it with `details.response`.\n7. If a provider throws, `executeSearch()` records the error and tries the next provider. There is no provider-level parallel fan-out; fallback is sequential.\n8. After all candidates fail, `formatProviderError()` normalizes each error:\n - Anthropic `404` becomes `Anthropic web search returned 404 (model or endpoint not found).`\n - `401`/`403` become `<Provider> authorization failed ...` except Z.AI, which preserves its raw message.\n - other `SearchProviderError`s surface `error.message`.\n9. If more than one provider was attempted, the final message is `All web search providers failed: <provider/error>; ...`; otherwise it is just the normalized last error.\n\n## Modes / Variants\n- **Provider selection**\n - **Forced provider**: internal callers may pass `provider`; unavailable forced providers fall back to the auto chain instead of hard-failing (`packages/coding-agent/src/web/search/index.ts`). This field is not in the model-facing schema.\n - **Preferred provider**: `setPreferredSearchProvider()` sets a module-global default used by `resolveProviderChain()`. `packages/coding-agent/src/sdk.ts` and `packages/coding-agent/src/modes/controllers/selector-controller.ts` wire this from settings.\n - **Auto chain order**: `tavily`, `perplexity`, `brave`, `jina`, `kimi`, `anthropic`, `gemini`, `codex`, `zai`, `exa`, `parallel`, `kagi`, `synthetic`, `searxng` (`SEARCH_PROVIDER_ORDER` in `packages/coding-agent/src/web/search/types.ts`).\n- **Provider adapters**\n - **Tavily** — `packages/coding-agent/src/web/search/providers/tavily.ts`\n - Availability: API key from env or `agent.db` via `findCredential()`.\n - Querying: POST `https://api.tavily.com/search`.\n - `recency` maps to Tavily `time_range`; code explicitly keeps `topic` at default general scope instead of narrowing to news.\n - `limit` / `num_search_results`: adapter uses `params.numSearchResults ?? params.limit`, clamped to `5..20` with default `5`.\n - Output: `answer`, `sources`, `requestId`, `authMode: \"api_key\"`.\n - **Perplexity** — `packages/coding-agent/src/web/search/providers/perplexity.ts`\n - Availability: auth precedence is `PERPLEXITY_COOKIES` -> OAuth token in `agent.db` -> `PERPLEXITY_API_KEY` / `PPLX_API_KEY`.\n - OAuth/cookie mode: POSTs to `https://www.perplexity.ai/rest/sse/perplexity_ask`, consumes SSE, merges partial events, extracts answer and source URLs, sets `authMode: \"oauth\"`.\n - API-key mode: POSTs to `https://api.perplexity.ai/chat/completions` with `model: \"sonar-pro\"`, `search_mode: \"web\"`, `num_search_results`, optional `search_recency_filter`, `max_tokens`, `temperature`.\n - `num_search_results` controls upstream API breadth only in API-key mode. `limit` is preserved separately as `num_results` and slices returned `sources` after parsing in both auth modes.\n - Output may include `answer`, `sources`, `citations`, `usage`, `model`, `requestId`, `authMode`.\n - **Brave** — `packages/coding-agent/src/web/search/providers/brave.ts`\n - Availability: `BRAVE_API_KEY` only.\n - Querying: GET `https://api.search.brave.com/res/v1/web/search` with `count`, `extra_snippets=true`, and `freshness=pd|pw|pm|py` for `recency`.\n - `limit` / `num_search_results`: `params.numSearchResults ?? params.limit`, clamped to `1..20`, default `10`.\n - Output: `sources`, `requestId`.\n - **Jina** — `packages/coding-agent/src/web/search/providers/jina.ts`\n - Availability: `JINA_API_KEY` only.\n - Querying: GET-like fetch to `https://s.jina.ai/<encoded query>` with bearer auth.\n - Ignores `recency`, `max_tokens`, and `temperature`.\n - `limit` / `num_search_results`: adapter slices sources to `params.numSearchResults ?? params.limit` when provided; otherwise returns all payload items.\n - Output: `sources` only.\n - **Kimi** — `packages/coding-agent/src/web/search/providers/kimi.ts`\n - Availability: `MOONSHOT_SEARCH_API_KEY`, `KIMI_SEARCH_API_KEY`, `MOONSHOT_API_KEY`, or `agent.db` credentials for `moonshot` / `kimi-code`.\n - Querying: POST to `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` / default `https://api.kimi.com/coding/v1/search` with `text_query`, `limit`, `enable_page_crawling`, `timeout_seconds: 30`.\n - `limit` / `num_search_results`: `params.numSearchResults ?? params.limit`, clamped to `1..20`, default `10`.\n - Output: `sources`, `requestId`.\n - **Anthropic** — `packages/coding-agent/src/web/search/providers/anthropic.ts`\n - Availability: `ANTHROPIC_SEARCH_API_KEY` env var, otherwise `authStorage.hasAuth(\"anthropic\")`; search credentials come from `authStorage.getApiKey(\"anthropic\")` when no search-specific key is set.\n - Env overrides specific to search (do not affect chat completions):\n - `ANTHROPIC_SEARCH_API_KEY` — highest-priority search auth; overrides `ANTHROPIC_API_KEY` / OAuth / `ANTHROPIC_FOUNDRY_API_KEY` for the search call only.\n - `ANTHROPIC_SEARCH_BASE_URL` — search-only base URL for either `ANTHROPIC_SEARCH_API_KEY` or fallback Anthropic credentials; overrides `ANTHROPIC_BASE_URL` (and `FOUNDRY_BASE_URL` in Foundry mode); defaults to `https://api.anthropic.com`.\n - `ANTHROPIC_SEARCH_MODEL` — search model; defaults to `claude-haiku-4-5`.\n - Querying: Claude Messages API with web-search tool enabled.\n - `max_tokens` and `temperature` pass through.\n - `limit` and `num_search_results` are collapsed together before dispatch: `num_results = params.numSearchResults ?? params.limit`.\n - Output may include `answer`, `sources`, `citations`, `searchQueries`, `usage.searchRequests`, `model`, `requestId`.\n - **Gemini** — `packages/coding-agent/src/web/search/providers/gemini.ts`\n - Availability: OAuth credentials in `agent.db` for `google-gemini-cli` or `google-antigravity`.\n - Querying: SSE `streamGenerateContent` call with Google Search grounding enabled. Antigravity auth tries two fallback endpoints and retries `401/403/400 invalid auth` once after token refresh; `429/5xx` retry with exponential backoff and server-provided retry delay, capped by a `5 * 60 * 1000` ms rate-limit budget.\n - `max_tokens` and `temperature` pass through as `generationConfig.maxOutputTokens` / `generationConfig.temperature`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include `answer`, `sources`, `citations`, `searchQueries`, `usage`, `model`.\n - **Codex** — `packages/coding-agent/src/web/search/providers/codex.ts`\n - Availability: non-expired OAuth credential for `openai-codex` in `agent.db`.\n - Querying: SSE POST to `https://chatgpt.com/backend-api/codex/responses` with `tool_choice: { type: \"web_search\" }` and `search_context_size: \"high\"` by default.\n - Ignores `recency`, `max_tokens`, and `temperature` in this tool path.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include `answer`, `sources`, `usage`, `model`, `requestId`. If the streamed response has no `url_citation` annotations, the adapter falls back to scraping markdown links and bare URLs from the answer text.\n - **Z.AI** — `packages/coding-agent/src/web/search/providers/zai.ts`\n - Availability: env or `agent.db` credential for `zai`.\n - Querying: JSON-RPC `tools/call` against `https://api.z.ai/api/mcp/web_search_prime/mcp` for remote MCP tool `web_search_prime`.\n - Fallback chain inside the provider: tries `{query,count}`, then `{search_query,count}`, then `{search_query, search_engine:\"search-prime\", count}` when earlier attempts fail with argument-shape errors.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include parsed free-text `answer`, `sources`, `requestId`.\n - **Exa** — `packages/coding-agent/src/web/search/providers/exa.ts`\n - Availability: env or `agent.db` credential for `exa` admits Exa to the auto chain; settings must not explicitly disable `exa.enabled` or `exa.enableSearch`. Explicit selection (`providers.webSearch: exa`) reaches Exa even without a credential and falls back to public MCP.\n - Querying: POST `https://api.exa.ai/search` with the resolved Exa API key, otherwise JSON-RPC `tools/call` against `https://mcp.exa.ai/mcp` for remote MCP tool `web_search_exa`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output: synthesized `answer` from up to 3 result summaries, `sources`, `requestId`.\n - **Parallel** — `packages/coding-agent/src/web/search/providers/parallel.ts`, `packages/coding-agent/src/web/parallel.ts`\n - Availability: env or `agent.db` credential for `parallel`.\n - Querying: POST `https://api.parallel.ai/v1beta/search` with `objective=query`, `search_queries=[query]`, `mode:\"fast\"`, `max_chars_per_result: 10000`, beta header `search-extract-2025-10-10`.\n - There is no provider fan-out here despite the name; the current adapter always sends a one-element `search_queries` array.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..40`, default `10`.\n - Output: `sources`, `requestId`.\n - **Kagi** — `packages/coding-agent/src/web/search/providers/kagi.ts`, `packages/coding-agent/src/web/kagi.ts`\n - Availability: env or `agent.db` credential for `kagi`.\n - Querying: POST `https://kagi.com/api/v1/search` with `Authorization: Bearer <key>` and JSON body `{ query, workflow: \"search\", limit, filters?: { after } }`. `recency` maps to `filters.after` as a UTC `YYYY-MM-DD` string (`day`/`week`/`month`/`year`).\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..40`, default `10`.\n - Output: `sources` (concatenated `data.search` + `data.video` + `data.news` + `data.infobox`, with video/news/infobox results tagged in the title), `relatedQuestions` (`data.adjacent_question` + `data.related_search` `props.question`), `answer` (`data.direct_answer[0].snippet ?? title`), `requestId` (`meta.trace`).\n - **Synthetic** — `packages/coding-agent/src/web/search/providers/synthetic.ts`\n - Availability: env or `agent.db` credential for `synthetic`.\n - Querying: POST `https://api.synthetic.new/v2/search` with `{ query }`.\n - Ignores `recency`, `max_tokens`, and `temperature`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output: `sources` only.\n - **SearXNG** — `packages/coding-agent/src/web/search/providers/searxng.ts`\n - Availability: endpoint from `searxng.endpoint` setting or `SEARXNG_ENDPOINT` env.\n - Querying: GET `<endpoint>/search?format=json&q=...`; optional settings add `categories` and `language`.\n - Auth precedence: Basic auth (`searxng.basicUsername` / `searxng.basicPassword` or env equivalents) over bearer token (`searxng.token` / `SEARXNG_TOKEN`). Basic credentials are validated for RFC 7617 restrictions.\n - `recency` maps to `time_range`; `week` is downgraded to `month` because SearXNG does not support week.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..20`, default `10`.\n - Output: `sources`, `relatedQuestions` from `suggestions`.\n\n## Side Effects\n- Network\n - Calls one or more external search providers over HTTPS until one succeeds or all fail.\n - Provider-specific transports include JSON POST, JSON GET, SSE streaming (Perplexity OAuth/API, Gemini, Codex), and JSON-RPC over HTTP (Z.AI).\n- Subprocesses / native bindings\n - None.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Uses a module-global provider-instance cache in `packages/coding-agent/src/web/search/provider.ts`.\n - Uses a module-global preferred-provider setting in the same file.\n - `packages/coding-agent/src/tools/index.ts` gates tool availability behind `session.settings.get(\"web_search.enabled\")`.\n- Background work / cancellation\n - Many provider adapters accept `AbortSignal`; `WebSearchTool.execute()` passes the tool call signal into `executeSearch()`, which forwards it as `params.signal` to providers and rethrows cancellation during fallback.\n\n## Limits & Caps\n- Provider auto-order length: 14 providers (`SEARCH_PROVIDER_ORDER` in `packages/coding-agent/src/web/search/types.ts`).\n- `formatForLLM()` truncates source snippets and citation text to 240 chars (`packages/coding-agent/src/web/search/index.ts`).\n- `formatForLLM()` emits at most 3 search queries, each truncated to 120 chars (`packages/coding-agent/src/web/search/index.ts`).\n- Brave result count: default `10`, max `20` (`DEFAULT_NUM_RESULTS`, `MAX_NUM_RESULTS` in `packages/coding-agent/src/web/search/providers/brave.ts`).\n- Tavily result count: default `5`, max `20` (`packages/coding-agent/src/web/search/providers/tavily.ts`).\n- Kimi result count: default `10`, max `20`; request timeout field fixed to `30` seconds (`packages/coding-agent/src/web/search/providers/kimi.ts`).\n- Parallel result count: default `10`, max `40`; per-result excerpt cap `10_000` chars (`packages/coding-agent/src/web/search/providers/parallel.ts`, `packages/coding-agent/src/web/parallel.ts`).\n- Kagi result count: default `10`, max `40` (`packages/coding-agent/src/web/search/providers/kagi.ts`).\n- SearXNG result count: default `10`, max `20` (`packages/coding-agent/src/web/search/providers/searxng.ts`).\n- Perplexity API-key mode defaults: `max_tokens = 8192`, `temperature = 0.2`, `num_search_results = 10` (`packages/coding-agent/src/web/search/providers/perplexity.ts`).\n- Anthropic defaults: model `claude-haiku-4-5`, `DEFAULT_MAX_TOKENS = 4096` when the provider omits `max_tokens` (`packages/coding-agent/src/web/search/providers/anthropic.ts`).\n- Gemini retries: up to `3` retries per endpoint, base delay `1000` ms, rate-limit delay budget `5 * 60 * 1000` ms (`packages/coding-agent/src/web/search/providers/gemini.ts`).\n\n## Errors\n- Tool-level no-provider case returns a normal tool result with `Error: No web search provider configured.`; it does not throw.\n- Tool-level all-failed case also returns a normal tool result with `Error: ...`; the message is either the single normalized provider error or a semicolon-separated summary of all failed providers.\n- Provider adapters usually throw `SearchProviderError(provider, message, status)` for HTTP or protocol failures.\n- Availability probes intentionally swallow lookup errors and report `false` in many providers via `isApiKeyAvailable()`.\n- Per-provider notable failures:\n - Anthropic: missing credentials throw a plain `Error`; a `404` is remapped to a special final message by `formatProviderError()`.\n - Perplexity: missing auth throws a plain `Error`; OAuth stream `error_code` events become `SearchProviderError(\"perplexity\", ...)`.\n - Gemini: auth refresh, endpoint fallback, and retry logic are internal; final exhausted failures surface as `SearchProviderError(\"gemini\", ...)`.\n - Codex and Gemini both fail if the HTTP response has no body after a `200`.\n - Z.AI treats malformed SSE/JSON-RPC payloads as provider errors and retries only argument-shape failures across request variants.\n - SearXNG `findAuth()` can throw configuration errors before any HTTP call if Basic auth fields are incomplete or invalid.\n\n## Notes\n- The model-facing schema does not expose `provider`, but internal callers can force one through `SearchQueryParams`.\n- `resolveProviderChain()` lazily imports provider modules and caches singleton instances. Just asking for labels via `getSearchProviderLabel()` does not trigger those imports.\n- Most providers treat `limit` and `num_search_results` as the same number because adapters pass `params.numSearchResults ?? params.limit`. Perplexity is the only implementation that preserves both concepts.\n- The prompt says `recency` is for Brave and Perplexity, but code also implements it for Tavily and SearXNG.\n- The year rewrite in `executeSearch()` is blunt: any `2020`-`2029` substring is replaced with the current year.\n- `packages/coding-agent/src/config/settings-schema.ts` uses the shared `SEARCH_PROVIDER_PREFERENCES` / `SEARCH_PROVIDER_OPTIONS` metadata, so the settings selector and setup wizard expose `auto` plus every provider in the auto chain.\n- Exa uses `authStorage.getApiKey(\"exa\")`, then `EXA_API_KEY`, then unauthenticated `https://mcp.exa.ai/mcp` fallback.\n",
|
|
100
|
+
"tools/write.md": "# write\n\n> Create or overwrite a file, writable internal resource, archive entry, SQLite row, or merge-conflict resolution.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/write.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/write.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/archive-reader.ts` — parse `archive.ext:entry` selectors.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite paths and perform row insert/update/delete.\n - `packages/coding-agent/src/lsp/index.ts` — format-on-write and diagnostics writethrough.\n - `packages/coding-agent/src/tools/auto-generated-guard.ts` — block overwriting generated files.\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts` — invalidate shared FS scan caches after writes.\n - `packages/coding-agent/src/tools/plan-mode-guard.ts` — resolve paths and enforce plan-mode write policy.\n\n## Inputs\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Target path. Plain file path writes a filesystem file. Writable internal URLs are delegated to their handler. `archive.ext:inner/path` writes an archive entry for `.tar`, `.tar.gz`, `.tgz`, or `.zip`. `db.sqlite:table` inserts a row. `db.sqlite:table:key` updates or deletes a row. `conflict://<id>` resolves a recorded merge conflict. |\n| `content` | `string` | Yes | Full replacement file content, archive entry content, internal-resource content, conflict replacement, or SQLite row payload. SQLite non-delete writes must parse as a JSON5 object. Empty or whitespace-only content deletes a SQLite row when `path` includes a row key. |\n\nWorked examples:\n\n```text\npath: \"src/generated/config.json\"\ncontent: \"{\\n \\\"enabled\\\": true\\n}\\n\"\n```\n\n```text\npath: \"fixtures/archive.zip:templates/email.txt\"\ncontent: \"hello\\n\"\n```\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"{name: 'Ada', active: true}\"\n```\n\n## Outputs\nSingle-shot result.\n\n- Success always returns a text block.\n - Plain file write: `Successfully wrote <chars> bytes to <relative-path>` (the count is `cleanContent.length`, not encoded byte length).\n - Internal URL write: `Successfully wrote <chars> bytes to <url>`.\n - Archive write: `Successfully wrote <chars> bytes to <relative-archive-path>:<entry-path>`.\n - SQLite write: one of `Inserted row into <table>`, `Updated row '<key>' in <table>`, `No row updated ...`, `Deleted row ...`, `No row deleted ...`.\n - Conflict resolution: conflict-specific success text, with fresh hashline snapshot headers when applicable.\n- If hashline prefixes were copied from `read` output and stripped first, the first text block gets an extra note.\n- In hashline display mode, plain file writes (including ACP bridge writes) and conflict resolutions prepend a fresh `¶<relative-path>#TAG` header so the next `edit` has a current snapshot tag without an extra `read`. Bulk conflict resolutions append a `Snapshots:` block listing one header per successfully written file.\n- Plain file writes may also return `details.diagnostics` plus `details.meta.diagnostics` when LSP diagnostics-on-write is enabled, and `details.madeExecutable` when a newly written shebang file is chmodded executable.\n- SQLite writes use `toolResult(...).sourcePath(...)`, so `details.meta.sourcePath` points at the database file.\n- Archive and internal URL writes return empty `details`.\n\n## Flow\n1. `WriteTool.execute()` in `packages/coding-agent/src/tools/write.ts` strips pasted `¶PATH#HASH` headers and `LINE:` hashline prefixes from `content` when the session is in hashline display mode.\n2. If `path` is an internal URL whose handler exposes `write`, the tool delegates directly to `handler.write(...)` and returns.\n3. `conflict://...` paths are handled next by the merge-conflict resolver. Scope reads such as `conflict://<id>/ours` are rejected as read-only; writable conflict URIs must omit the scope.\n4. It calls `#resolveArchiveWritePath()` next. That uses `parseArchivePathCandidates()` from `packages/coding-agent/src/tools/archive-reader.ts`, checks candidate archive files on disk, and falls back to the longest matching archive suffix even when the archive file does not exist yet.\n5. Archive writes call `enforcePlanModeWrite(..., { op: exists ? \"update\" : \"create\" })`, then `#writeArchiveEntry()`.\n - The parent directory of the archive file is created with `fs.mkdir(..., { recursive: true })`.\n - `.zip` archives are read with `fflate.unzipSync()`, the target entry is replaced in an in-memory map, and the archive is rewritten with `fflate.zipSync()` + `Bun.write()`.\n - `.tar`, `.tar.gz`, and `.tgz` archives are read with `Bun.Archive`, existing entries are copied into an object map, the target entry is replaced, and `Bun.Archive.write()` rewrites the archive.\n - `invalidateFsScanAfterWrite()` runs on the archive file path.\n6. If the path is not treated as an archive, `execute()` calls `#resolveSqliteWritePath()`. That uses `parseSqlitePathCandidates()` and `isSqliteFile()` from `packages/coding-agent/src/tools/sqlite-reader.ts`. Existing non-SQLite files suppress the SQLite path interpretation.\n7. SQLite writes call `enforcePlanModeWrite(..., { op: \"update\" })`, then `#writeSqliteRow()`.\n - The database must already exist; missing DBs throw `SQLite database '<path>' not found`.\n - The tool opens `new Database(..., { create: false, strict: true })` and sets `PRAGMA busy_timeout = 3000`.\n - Whitespace-only `content` with a row key deletes a row.\n - Non-empty `content` is parsed with `Bun.JSON5.parse()`, must be a JSON object, and is routed to insert/update helpers from `packages/coding-agent/src/tools/sqlite-reader.ts`.\n - `invalidateFsScanAfterWrite()` runs on the DB path and the connection is closed in `finally`.\n8. Otherwise the tool treats `path` as a plain filesystem file.\n - `enforcePlanModeWrite(..., { op: \"create\" })` runs before path resolution.\n - Existing files are checked by `assertEditableFile()` to block overwriting detected generated files.\n - ACP bridge writeTextFile is tried first when available; otherwise the session’s writethrough callback writes content. With LSP enabled and `lsp.formatOnWrite` / `lsp.diagnosticsOnWrite` settings on, `createLspWritethrough()` may format content, sync it through LSP servers, save it, and collect diagnostics. Otherwise `writethroughNoop()` writes directly with `Bun.write()` or `file.write()`.\n - `maybeMarkExecutableForShebang()` may chmod the file executable when content starts with `#!`.\n - `invalidateFsScanAfterWrite()` runs on the file path.\n9. The tool returns a text result and optional diagnostics / executable metadata.\n\n## Modes / Variants\n### Plain file path\n- Target is any path that does not resolve as an archive selector and does not resolve as an existing-or-new SQLite selector.\n- Existing files are overwritten.\n- `write.ts` does not call `fs.mkdir()` on this path; parent-directory creation is only implemented in the archive branch.\n\nExample:\n\n```text\npath: \"tmp/output.txt\"\ncontent: \"hello\\n\"\n```\n\n### Archive entry write\n- Selector syntax: `archive.ext:inner/path`.\n- Supported archive suffixes come from `parseArchivePathCandidates()`: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- The inner path is normalized to `/`, strips empty and `.` segments, rejects `..`, and rejects directory targets ending in `/`.\n- Rewrites the whole archive file after replacing one entry.\n- Creates the parent directory for the archive file if needed.\n\nExample:\n\n```text\npath: \"build/assets.tar.gz:css/app.css\"\ncontent: \"body { color: black; }\\n\"\n```\n\n### SQLite table insert\n- Selector syntax: `db.sqlite:table`.\n- `content` must parse as a JSON5 object.\n- Empty object is allowed and becomes `INSERT INTO <table> DEFAULT VALUES`.\n- Query parameters are rejected for SQLite writes.\n\nExample:\n\n```text\npath: \"data/app.db:users\"\ncontent: \"{name: 'Ada', active: true}\"\n```\n\n### SQLite row update / delete\n- Selector syntax: `db.sqlite:table:key`.\n- Non-empty `content` updates the row.\n- Empty or whitespace-only `content` deletes the row.\n- Row lookup uses the single-column primary key if present; otherwise it falls back to `rowid`. Composite primary keys and `WITHOUT ROWID` tables are rejected for key-based writes.\n\nExample update:\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"{email: 'ada@example.com'}\"\n```\n\nExample delete:\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"\"\n```\n\n## Side Effects\n- Filesystem\n - Creates or overwrites plain files.\n - Rewrites entire archive files when writing an archive entry.\n - Creates parent directories for archive files only.\n - Mutates existing SQLite databases; never creates a new SQLite DB.\n - Resolves conflict markers in files for `conflict://...` writes.\n - May chmod a shebang file executable after a successful plain-file write.\n- Subprocesses / native bindings\n - Uses Bun SQLite bindings via `bun:sqlite`.\n - Uses Bun archive APIs and lazily imports `fflate` for ZIP reads/writes.\n - May talk to configured LSP servers through `packages/coding-agent/src/lsp/index.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Invalidates shared filesystem scan cache entries through `invalidateFsScanAfterWrite()`.\n - Enforces plan-mode write restrictions before mutating the target.\n- Background work / cancellation\n - Marks the tool `concurrency = \"exclusive\"` in `WriteTool`.\n - LSP writethrough can schedule deferred diagnostics fetches after a timeout, but plain `write.ts` only consumes the immediate return value.\n\n## Limits & Caps\n- `WriteTool` itself exposes no byte cap beyond storing `content` in memory and, for archives, rebuilding the archive in memory.\n- Generated-file detection reads at most `CHECK_BYTE_COUNT = 1024` bytes and `HEADER_LINE_LIMIT = 40` header lines from an existing file in `packages/coding-agent/src/tools/auto-generated-guard.ts`.\n- SQLite writes set `PRAGMA busy_timeout = 3000`.\n- LSP writethrough uses a `5_000` ms operation timeout in `runLspWritethrough()` and may schedule a deferred diagnostics fetch with `AbortSignal.timeout(25_000)` in `scheduleDeferredDiagnosticsFetch()`.\n- Shebang executable handling depends on host filesystem chmod support.\n\n## Errors\n- Invalid archive subpaths throw `ToolError` with messages such as:\n - `Archive write path must target a file inside the archive`\n - `Archive write path must target a file, not a directory`\n - `Archive path cannot contain '..'`\n- SQLite path parsing throws on unsupported forms:\n - `SQLite write paths do not support query parameters`\n - `SQLite write path must target a table`\n - `SQLite row writes require a non-empty row key`\n- Missing SQLite DBs surface as `SQLite database '<path>' not found`.\n- SQLite content errors are model-visible `ToolError`s, including invalid JSON5, non-object payloads, unknown columns, non-scalar values, empty update objects, composite primary keys, and `WITHOUT ROWID` tables.\n- Existing plain files may be rejected by `assertEditableFile()` when they look generated.\n- Conflict scope writes such as `conflict://<id>/ours` are rejected as read-only; invalid conflict IDs or missing conflict history surface as `ToolError`s from the conflict resolver.\n- Archive read/write failures and unexpected SQLite exceptions are wrapped in `ToolError(error.message)`.\n- If no LSP server matches or LSP formatting/diagnostics times out, file writes still fall back to writing content; diagnostics may be omitted.\n\n## Notes\n- Archive path detection runs before SQLite detection. A path that matches an archive selector is never treated as SQLite.\n- SQLite detection declines when an existing file with a `.sqlite` / `.db` suffix is present but does not have SQLite magic bytes; then the path falls back to a plain file write.\n- ZIP entry content is encoded with `new TextEncoder().encode(content)` in `#writeArchiveEntry()`. Non-ZIP archive writes pass the string directly to `Bun.Archive.write()`.\n- The prompt forbids two common anti-patterns: using `write` for routine edits that should use `edit`, and creating `*.md` / `README` files unless explicitly requested. It also forbids emojis unless requested.\n- Plain file and internal URL writes report `cleanContent.length` as “bytes”, which is UTF-16 code units in JS, not an on-disk byte measurement.\n- `stripWriteContent()` only removes hashline prefixes when the session’s file display mode has `hashLines` enabled; otherwise content is written unchanged.\n",
|
|
101
|
+
"tree.md": "# `/tree` Command Reference\n\n`/tree` opens the interactive **Session Tree** navigator. It lets you jump to any entry in the current session file and continue from that point.\n\nThis is an in-file leaf move, not a new session export.\n\n## What `/tree` does\n\n- Builds a tree from current session entries (`SessionManager.getTree()`)\n- Opens `TreeSelectorComponent` with keyboard navigation, filters, and search\n- On selection, calls `AgentSession.navigateTree(targetId, { summarize, customInstructions })`\n- Rebuilds visible chat from the new leaf path\n- Optionally prefills editor text when selecting a user/custom message\n\nPrimary implementation:\n\n- `src/modes/controllers/input-controller.ts` (`/tree`, keybinding wiring, double-escape behavior)\n- `src/modes/controllers/selector-controller.ts` (tree UI launch + summary prompt flow)\n- `src/modes/components/tree-selector.ts` (navigation, filters, search, labels, rendering)\n- `src/session/agent-session.ts` (`navigateTree` leaf switching + optional summary)\n- `src/session/session-manager.ts` (`getTree`, `branch`, `branchWithSummary`, `resetLeaf`, label persistence)\n\n## How to open it\n\nAny of the following opens the same selector:\n\n- `/tree`\n- configured keybinding action `tree`\n- double-escape on empty editor when `doubleEscapeAction = \"tree\"` (default)\n- `/branch` when `doubleEscapeAction = \"tree\"` (routes to tree selector instead of user-only branch picker)\n\n## Tree UI model\n\nThe tree is rendered from session entry parent pointers (`id` / `parentId`).\n\n- Children are sorted by timestamp ascending (older first, newer lower)\n- Active branch (path from root to current leaf) is marked with a bullet\n- Labels (if present) render as `[label]` before node text\n- If multiple roots exist (orphaned/broken parent chains), they are shown under a virtual branching root\n\n```text\nExample tree view (active path marked with •):\n\n├─ user: \"Start task\"\n│ └─ assistant: \"Plan\"\n│ ├─ • user: \"Try approach A\"\n│ │ └─ • assistant: \"A result\"\n│ │ └─ • [milestone] user: \"Continue A\"\n│ └─ user: \"Try approach B\"\n│ └─ assistant: \"B result\"\n```\n\nThe selector recenters around current selection and shows up to:\n\n- `max(5, floor(terminalHeight / 2))` rows\n\n## Keybindings inside tree selector\n\n- `Up` / `Down`: move selection (wraps)\n- `Left` / `Right`: page up / page down\n- `Enter`: select node\n- `Esc`: clear search if active; otherwise close selector\n- `Ctrl+C`: close selector\n- `Type`: append to search query\n- `Backspace`: delete search character\n- `Shift+L`: edit/clear label on selected entry\n- `Ctrl+O`: cycle filter forward\n- `Shift+Ctrl+O`: cycle filter backward\n- `Alt+D/T/U/L/A`: jump directly to specific filter mode\n\n## Filters and search semantics\n\nFilter modes (`TreeList`):\n\n1. `default`\n2. `no-tools`\n3. `user-only`\n4. `labeled-only`\n5. `all`\n\n### `default`\n\nShows conversational nodes plus any entry types not explicitly suppressed. It hides these setting/bookkeeping entry types:\n\n- `label`\n- `custom`\n- `model_change`\n- `thinking_level_change`\n\nOther internal entry types that are not rendered specially may appear as blank rows in current code.\n\n### `no-tools`\n\nSame as `default`, plus hides `toolResult` messages.\n\n### `user-only`\n\nOnly `message` entries where role is `user`.\n\n### `labeled-only`\n\nOnly entries that currently resolve to a label.\n\n### `all`\n\nEverything in the session tree, including bookkeeping/custom entries.\n\n### Tool-only assistant node behavior\n\nAssistant messages that contain **only tool calls** (no text) are hidden by default in all filtered views unless:\n\n- message is error/aborted (`stopReason` not `stop`/`toolUse`), or\n- it is the current leaf (always kept visible)\n\n### Search behavior\n\n- Query is tokenized by spaces\n- Matching is case-insensitive\n- All tokens must match (AND semantics)\n- Searchable text includes label, role, and type-specific content (message text, branch summary text, custom type, tool command snippets, etc.)\n\n## Selection outcomes (important)\n\n`navigateTree` computes new leaf behavior from selected entry type:\n\n### Selecting `user` message\n\n- New leaf becomes selected entry’s `parentId`\n- If parent is `null` (root user message), leaf resets to root (`resetLeaf()`)\n- Selected message text is copied to editor for editing/resubmit\n\n### Selecting `custom_message`\n\n- Same leaf rule as user messages (`parentId`)\n- Text content is extracted and copied to editor\n\n### Selecting non-user node (assistant/tool/summary/compaction/custom bookkeeping/etc.)\n\n- New leaf becomes selected node id\n- Editor is not prefilled\n\n### Selecting current leaf\n\n- No-op; selector closes with “Already at this point”\n\n```text\nSelection decision (simplified):\n\nselected node\n │\n ├─ is current leaf? ── yes ──> close selector (no-op)\n │\n ├─ is user/custom_message? ── yes ──> leaf := parentId (or resetLeaf for root)\n │ + prefill editor text\n │\n └─ otherwise ──> leaf := selected node id\n + no editor prefill\n```\n\n## Summary-on-switch flow\n\nSummary prompt is controlled by `branchSummary.enabled` (default: `false`).\n\nWhen enabled, after picking a node the UI asks:\n\n- `No summary`\n- `Summarize`\n- `Summarize with custom prompt`\n\nFlow details:\n\n- Escape in summary prompt reopens tree selector\n- Custom prompt cancellation returns to summary choice loop\n- During summarization, UI shows loader and binds `Esc` to `abortBranchSummary()`\n- If summarization aborts, tree selector reopens and no move is applied\n\n`navigateTree` internals:\n\n- Collects abandoned-branch entries from old leaf to common ancestor\n- Emits `session_before_tree` (extensions can cancel or inject summary)\n- Uses default summarizer only if requested and needed\n- Applies move with:\n - `branchWithSummary(...)` when summary exists\n - `branch(newLeafId)` for non-root move without summary\n - `resetLeaf()` for root move without summary\n- Replaces agent conversation with rebuilt session context\n- Emits `session_tree`\n\nNote: if user requests summary but there is nothing to summarize, navigation proceeds without creating a summary entry.\n\n## Labels\n\nLabel edits in tree UI call `appendLabelChange(targetId, label)`.\n\n- non-empty label sets/updates resolved label\n- empty label clears it\n- labels are stored as append-only `label` entries\n- tree nodes display resolved label state, not raw label-entry history\n\n## `/tree` vs adjacent operations\n\n| Operation | Scope | Result |\n| --------- | ------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `/tree` | Current session file | Moves leaf to selected point (same file) |\n| `/branch` | Usually current session file -> new session file | By default branches from selected **user** message into a new session file; if `doubleEscapeAction = \"tree\"`, `/branch` opens tree navigation UI instead |\n| `/fork` | Whole current session | Duplicates session into a new persisted session file |\n| `/resume` | Session list | Switches to another session file |\n\nKey distinction: `/tree` is a navigation/repositioning tool inside one session file. `/branch`, `/fork`, and `/resume` all change session-file context.\n\n## Operator workflows\n\n### Re-run from an earlier user prompt without losing current branch\n\n1. `/tree`\n2. search/select earlier user message\n3. choose `No summary` (or summarize if needed)\n4. edit prefilled text in editor\n5. submit\n\nEffect: new branch grows from selected point within same session file.\n\n### Leave current branch with context breadcrumb\n\n1. enable `branchSummary.enabled`\n2. `/tree` and select target node\n3. choose `Summarize` (or custom prompt)\n\nEffect: a `branch_summary` entry is appended at the target position before continuing.\n\n### Investigate hidden bookkeeping entries\n\n1. `/tree`\n2. press `Alt+A` (all)\n3. search for `model`, `thinking`, `custom`, or labels\n\nEffect: inspect full internal timeline, not just conversational nodes.\n\n### Bookmark pivot points for later jumps\n\n1. `/tree`\n2. move to entry\n3. `Shift+L` and set label\n4. later use `Alt+L` (`labeled-only`) to jump quickly\n\nEffect: fast navigation among durable branch landmarks.\n",
|
|
102
|
+
"ttsr-injection-lifecycle.md": "# TTSR Injection Lifecycle\n\nThis document covers the current Time Traveling Stream Rules (TTSR) runtime path from rule discovery to stream interruption, retry injection, extension notifications, and session-state handling.\n\n## Implementation files\n\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/export/ttsr.ts`](../packages/coding-agent/src/export/ttsr.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/prompts/system/ttsr-interrupt.md`](../packages/coding-agent/src/prompts/system/ttsr-interrupt.md)\n- [`../src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`../src/extensibility/extensions/types.ts`](../packages/coding-agent/src/extensibility/extensions/types.ts)\n- [`../src/extensibility/hooks/types.ts`](../packages/coding-agent/src/extensibility/hooks/types.ts)\n- [`../src/extensibility/custom-tools/types.ts`](../packages/coding-agent/src/extensibility/custom-tools/types.ts)\n- [`../src/modes/controllers/event-controller.ts`](../packages/coding-agent/src/modes/controllers/event-controller.ts)\n\n## 1. Discovery feed and rule registration\n\nAt session creation, `createAgentSession()` loads discovered rules, constructs a `TtsrManager`, and buckets rules through `bucketRules(...)`:\n\n```ts\nconst ttsrSettings = settings.getGroup(\"ttsr\");\nconst ttsrManager = new TtsrManager(ttsrSettings);\nconst rulesResult = await loadCapability<Rule>(ruleCapability.id, { cwd });\nconst { rulebookRules, alwaysApplyRules } = bucketRules(\n rulesResult.items,\n ttsrManager,\n {\n builtinRules: ttsrSettings.builtinRules,\n disabledRules: ttsrSettings.disabledRules,\n },\n);\n```\n\n`bucketRules(...)` drops names listed in `ttsr.disabledRules`, drops embedded `builtin-defaults` rules when `ttsr.builtinRules === false`, registers accepted TTSR rules, and then routes the remaining rules to always-apply/rulebook buckets.\n\n### Pre-registration dedupe behavior\n\n`loadCapability(\"rules\")` deduplicates by `rule.name` with first-wins semantics (higher provider priority first). Shadowed duplicates are removed before TTSR registration.\n\n### `TtsrManager.addRule()` behavior\n\nRegistration is skipped when:\n\n- both `rule.condition` (regex) and `rule.astCondition` (ast-grep patterns) are absent, or every regex condition fails to compile and there are no AST conditions\n- a rule with the same `rule.name` was already registered in this manager\n- the rule scope excludes all monitored streams\n\nInvalid regex conditions and unreachable scopes are logged as warnings and ignored; session startup continues. If a TTSR rule defines `globs`, those globs are compiled as a global file-path gate for matching.\n\n### AST conditions (`astCondition`)\n\nA rule may carry `astCondition`: a list of [ast-grep](https://ast-grep.github.io/) patterns (OR'd, same as regex `condition`), matched structurally instead of textually. A repeated metavariable inside one pattern requires both occurrences to be equal (`if ($X) clearTimeout($X)` matches but `if ($X) clearTimeout($Y)` does not).\n\nAST conditions only evaluate on **edit/write tool-argument streams** — they need a language, which is inferred from the file extension on the tool's path argument, and they match against the tool's reconstructed source snapshot (`matcherDigest`), not the raw wire delta. Matching is performed in memory by the native `astMatch` engine (no temp files) with Smart strictness. Streams without a usable file path (prose, thinking, path-less tool calls) skip AST conditions entirely. A rule may mix `condition` and `astCondition`; the regex paths keep working on every scope while AST paths apply only to those tool streams.\n\n### Setting caveat\n\n`TtsrSettings.enabled` is loaded into the manager but is not currently checked in runtime gating. If TTSR rules exist, matching still runs.\n\n## 2. Streaming monitor lifecycle\n\nTTSR detection runs inside `AgentSession.#handleAgentEvent`.\n\n### Turn start\n\nOn `turn_start`, the stream buffer is reset:\n\n- `ttsrManager.resetBuffer()`\n\n### During stream (`message_update`)\n\nWhen assistant updates arrive and rules exist:\n\n- monitor `text_delta`, `thinking_delta`, and `toolcall_delta`\n- append delta into a source/tool scoped manager buffer\n- call `checkDelta(delta, matchContext)` (synchronous regex matching)\n- for edit/write tool streams, when `hasAstRules()` is true, `await checkAstSnapshot(snapshot, matchContext)` (asynchronous AST matching)\n\n`checkDelta()` iterates registered rules and returns all matching rules that pass scope, global path-glob, regex condition, and repeat policy checks. `checkAstSnapshot()` applies the same scope/path/repeat gates, then runs each candidate rule's `astCondition` patterns against the snapshot via the native `astMatch` engine. It is throttled per stream key: an identical consecutive snapshot (common when only non-source arguments change between deltas) is skipped without re-running the matcher. Both paths feed their matches through the same trigger-decision handler.\n\n## 3. Trigger decision and immediate abort path\n\nWhen one or more rules match and at least one matched rule allows interruption:\n\n1. Matched rules are deduplicated into `#pendingTtsrInjections`.\n2. `#ttsrAbortPending = true` and a TTSR resume gate is created.\n3. `agent.abort()` is called immediately.\n4. `ttsr_triggered` event is emitted asynchronously (fire-and-forget).\n5. retry work is scheduled via the post-prompt task scheduler with a 50ms delay.\n\nAbort is not blocked on extension callbacks.\n\n## 4. Retry scheduling, context mode, and reminder injection\n\nAfter the 50ms timeout:\n\n1. `#ttsrAbortPending = false`\n2. read `ttsrManager.getSettings().contextMode`\n3. if `contextMode === \"discard\"`, drop the targeted partial assistant output with `agent.replaceMessages(...slice(0, targetAssistantIndex))`\n4. build injection content from pending rules using `ttsr-interrupt.md` template\n5. append and persist a hidden `custom_message`/runtime custom message with `customType: \"ttsr-injection\"` and `details.rules`\n6. mark those rule names injected, persist a `ttsr_injection` entry, and call `agent.continue()` to retry generation\n\nTemplate payload is:\n\n```xml\n<system-interrupt reason=\"rule_violation\" rule=\"{{name}}\" path=\"{{path}}\">\n...\n{{content}}\n</system-interrupt>\n```\n\nPending injections are cleared after content generation.\n\n### `contextMode` behavior on partial output\n\n- `discard`: partial/aborted assistant message is removed before retry.\n- `keep`: partial assistant output remains in conversation state; reminder is appended after it.\n\n### Non-interrupting matches\n\nNon-interrupting matches split by `matchContext.source`:\n\n- **`source === \"tool\"` (tool-source match).** The rule is bucketed into `#perToolTtsrInjections`, keyed by the matched tool call's `id`. There is **no** deferred follow-up turn and the stream is not aborted. When the tool actually produces a result, the `afterToolCall` hook prepends a rendered `ttsr-tool-reminder.md` block to `ctx.result.content` (a single `text` block inserted ahead of the tool's own content), and persists a `ttsr_injection` entry with the consumed rule names. The template payload is:\n\n ```xml\n <system-reminder reason=\"rule_violation\" rule=\"{{name}}\" path=\"{{path}}\">\n ...\n {{content}}\n </system-reminder>\n ```\n\n- **`source === \"text\"` / `\"thinking\"` (prose-source match).** Behavior is unchanged: the rule is queued in `#pendingTtsrInjections` and, after a successful non-error, non-aborted assistant message, `AgentSession` injects the hidden `ttsr-injection` custom message as a follow-up and schedules continuation.\n\nWithin a single matching batch, each rule is attached to exactly one sibling tool call — if multiple sibling tool calls would satisfy the same rule, deduplication picks one and the others are left untouched. Multiple distinct rules can still fold onto the same tool call.\n\n#### Implications for tool authors and transcript readers\n\n- The tool's own `toolResult` content is preserved verbatim; the reminder is **prepended** as an additional leading text block. Renderers that assume `content[0]` is the tool's primary output must scan past any block whose text begins with `<system-reminder reason=\"rule_violation\"` (or filter on the wrapper tag) to find the real payload.\n- The reminder is in-band on the tool result, not a separate `custom_message`/`ttsr-injection` entry. Transcript readers looking for non-interrupting TTSR activity on tool-source rules MUST inspect tool results (and the persisted `ttsr_injection` entry list), not just synthetic injection entries.\n- A single tool result may carry reminders for several rules concatenated with a blank line between rendered templates.\n- If the assistant message ends with `stopReason === \"aborted\"` or `\"error\"` before the matched tools run, the pending per-tool buckets are cleared — those rules are **not** persisted as injected and remain eligible to re-trigger on a future turn (subject to repeat policy).\n\n## 5. Repeat policy and gap logic\n\n`TtsrManager` tracks `#messageCount` and per-rule `lastInjectedAt`.\n\n### `repeatMode: \"once\"`\n\nA rule can trigger only once after it has an injection record.\n\n### `repeatMode: \"after-gap\"`\n\nA rule can re-trigger only when:\n\n- `messageCount - lastInjectedAt >= repeatGap`\n\n`messageCount` increments on `turn_end`, so gap is measured in completed turns, not stream chunks.\n\n## 6. Event emission and extension/hook surfaces\n\n### Session event\n\n`AgentSessionEvent` includes:\n\n```ts\n{ type: \"ttsr_triggered\"; rules: Rule[] }\n```\n\n### Extension runner\n\n`#emitSessionEvent()` routes the event to:\n\n- extension listeners (`ExtensionRunner.emit({ type: \"ttsr_triggered\", rules })`)\n- local session subscribers\n\n### Hook and custom-tool typing\n\n- extension API exposes `on(\"ttsr_triggered\", ...)`\n- hook API exposes `on(\"ttsr_triggered\", ...)`\n- custom tools receive `onSession({ reason: \"ttsr_triggered\", rules })`\n\n### Interactive-mode rendering difference\n\nInteractive mode uses `session.isTtsrAbortPending` to suppress showing the aborted assistant stop reason as a visible failure during TTSR interruption, and renders a `TtsrNotificationComponent` when the event arrives.\n\n## 7. Persistence and resume state (current implementation)\n\n`SessionManager` persists injected-rule state:\n\n- entry type: `ttsr_injection`\n- append API: `appendTtsrInjection(ruleNames)`\n- query API: `getInjectedTtsrRules()`\n- context reconstruction includes `SessionContext.injectedTtsrRules`\n\n`TtsrManager` supports restoration via `restoreInjected(ruleNames)`.\n\n### Current wiring status\n\nIn the current runtime path:\n\n- interrupted injections append a hidden `custom_message` with `customType: \"ttsr-injection\"` and append a `ttsr_injection` entry via `appendTtsrInjection(...)`\n- deferred non-interrupting prose-source injections are marked/persisted when their queued custom message reaches `message_end`\n- non-interrupting tool-source injections are marked at match time and persisted via `appendTtsrInjection(...)` from the `afterToolCall` hook when the matched tool's result is produced\n- `createAgentSession()` restores `existingSession.injectedTtsrRules` into `ttsrManager`\n\nNet effect: injected-rule suppression is persisted/restored across session reload/resume for the current branch path.\n\n## 8. Race boundaries and ordering guarantees\n\n### Abort vs retry callback\n\n- abort is synchronous from TTSR handler perspective (`agent.abort()` called immediately)\n- retry is deferred by timer (`50ms`)\n- extension notification is asynchronous and intentionally not awaited before abort/retry scheduling\n\n### Multiple matches in same stream window\n\n`checkDelta()` returns all currently matching eligible rules for that scoped buffer. Pending injections are deduplicated by rule name before injection.\n\n### Between abort and continue\n\nDuring the timer window, state can change (user interruption, mode actions, additional events). The retry call is best-effort: `agent.continue().catch(() => {})` swallows follow-up errors.\n\n## 9. Edge cases summary\n\n- Invalid `condition` regex: skipped with warning; other conditions/rules continue.\n- Duplicate rule names at capability layer: lower-priority duplicates are shadowed before registration.\n- Duplicate names at manager layer: second registration is ignored.\n- `ttsr.disabledRules`: listed names are dropped before TTSR registration and are not surfaced through always-apply/rulebook buckets.\n- `ttsr.builtinRules: false`: embedded `builtin-defaults` rules are dropped before TTSR registration; user/project rules still load.\n- `globs` on a TTSR rule require the stream match context to include at least one matching file path.\n- `contextMode: \"keep\"`: partial violating output can remain in context before reminder retry.\n- `interruptMode: \"never\"`: prose-source matches queue a deferred hidden injection after a successful assistant message; tool-source matches fold an in-band `<system-reminder>` into the matched tool call's `toolResult` content via the `afterToolCall` hook (no mid-stream abort, no separate follow-up turn).\n- Tool-source non-interrupting buckets are cleared when the parent assistant message ends with `stopReason === \"aborted\"` or `\"error\"`, so rules whose target tool never produced a result remain eligible to re-trigger.\n- Repeat-after-gap depends on turn count increments at `turn_end`; mid-turn chunks do not advance gap counters.\n",
|
|
103
|
+
"tui-core-renderer.md": "# TUI core renderer — the append-only contract\n\nWhat you are dealing with before you touch the rendering engine. This is the\ncompanion to [`tui-runtime-internals.md`](./tui-runtime-internals.md): that doc\nmaps the *flow* (input → component tree → render); this doc explains the\n**render contract, why it is shaped this way, and the invariants you must not\nviolate**. Scope is the core engine only:\n\n- [`packages/tui/src/tui.ts`](../packages/tui/src/tui.ts) — frame pipeline, commit ledger, window math, emitters, cursor placement.\n- [`packages/tui/src/terminal.ts`](../packages/tui/src/terminal.ts) — `ProcessTerminal`, capability probes, private-CSI reassembly.\n- [`packages/tui/src/terminal-capabilities.ts`](../packages/tui/src/terminal-capabilities.ts) — `TERMINAL` profile, sync-output / DECCARA / image detection.\n- [`packages/tui/src/stdin-buffer.ts`](../packages/tui/src/stdin-buffer.ts) — escape-sequence reassembly.\n- [`packages/tui/src/utils.ts`](../packages/tui/src/utils.ts) — width/slice/wrap (the width model).\n- [`packages/tui/src/kitty-graphics.ts`](../packages/tui/src/kitty-graphics.ts) + [`components/image.ts`](../packages/tui/src/components/image.ts) — inline images.\n- [`packages/tui/src/deccara.ts`](../packages/tui/src/deccara.ts) — rectangular-fill optimizer.\n\nApplication-layer renderers (transcript, tool calls, session tree, editor,\nwidgets) are **out of scope** — they live in `packages/coding-agent`. The one\napp-layer file that is load-bearing for this contract is\n[`transcript-container.ts`](../packages/coding-agent/src/modes/components/transcript-container.ts),\nwhich implements the commit-boundary seam described below.\n\n---\n\n## 1. The one thing to understand first\n\n> **The renderer cannot observe the terminal's scroll position** (ConPTY's\n> probe lies; POSIX has no API at all). The previous engine tried to *guess*\n> when it was safe to rewrite native scrollback, and every policy choice over\n> that unobservable variable traded one failure family for another (yank ↔\n> flash ↔ corruption ↔ invisible-until-resize — see the git history of this\n> file for the full war journal). The current engine removes the guess\n> entirely: **native scrollback is append-only.**\n\nWe keep the transcript on the **normal screen** (native scrollback, native\nselection, transcript persists after exit). The engine maintains one ledger:\n\n- **`committedRows` (C)** — frame rows `[0, C)` have been physically scrolled\n into terminal history. They are **immutable**: the engine never rewrites\n them, and components must never change them.\n- **`windowTopRow` (W)** — the frame row mapped to grid row 0. The visible\n window is frame rows `[W, W + height)`, repainted in place with relative\n cursor moves.\n- **commit boundary (B)** — reported by the component tree per frame\n (`NativeScrollbackLiveRegion`): `B = commitSafeEnd ?? liveRegionStart ??\n frame.length`. Rows below B may still re-layout and must not enter history.\n\nPer ordinary frame: `W = max(C, L − height)`, `C' = max(C, min(B, W))`, and the\nonly bytes that ever touch history are the **chunk** `frame[C, C')` written at\nthe scrollback seam. Scrollback therefore equals `frame[0..C)` — every row\nexactly once, in order, with its content at commit time. There is nothing to\nguess, nothing to defer, and nothing to reconcile: the scroll position is\nirrelevant because ordinary updates never rewrite anything a scrolled reader\ncould be looking at.\n\n### What this costs (the accepted tradeoffs)\n\n- A block that has scrolled past the window top cannot reflow in place. Blocks\n stay in the live region (below B) until they are final; a late mutation of\n committed content is ignored (the stale committed copy stays in history).\n- A component tree that reports **no seam** gets shell semantics: whatever\n scrolls off is final. Shrinking such a frame into its committed prefix\n re-anchors the window and leaves the stale copy in history (§3).\n- Inside multiplexers, a resize leaves the pane history wrapped at the old\n width (same as any shell output).\n\n---\n\n## 2. The frame pipeline (what you are editing)\n\n`#doRender` per frame:\n\n1. Compose the frame (`render(width)`), collecting `liveRegionStart` /\n `commitSafeEnd` from the root children (absolute row indices).\n2. **Audit the committed prefix** (`findCommittedPrefixResync`, skipped on\n geometry frames). Components must never re-layout rows below C, but real\n flows violate it (a TTSR rewind truncating a streamed block, an image-cap\n demotion shrinking a committed image) and the violation must not become\n content loss. The detector samples the prefix *tail* (up to 8 non-blank\n rows in the last 24, SGR-stripped): an in-place edit or restyle disturbs\n only the touched rows (≤1 mismatch ⇒ aligned ⇒ ignored — stale styling in\n history is the accepted artifact), while any insertion/deletion shifts\n every row below it including the tail (⇒ re-anchor C at the first changed\n row and recommit from there: history keeps the stale copy and gains a\n fresh one — **duplication, never loss**).\n3. Classify: **fullPaint** (first paint, `clearScrollback` session replace, or\n geometry change outside a multiplexer — all user gestures) or **update**.\n4. Window math as in §1. Two special rules:\n - **Overlays freeze commits** (`C' = C`): composited rows must never enter\n history; the hidden gap backfills via the chunk after the overlay closes.\n - **Shrink into the committed prefix** (`L ≤ C`): re-anchor\n `W = max(0, L − height)`, reset `C = min(B, W)`, keep the stale history\n above (no gesture, no erase).\n5. Extract the cursor marker (strip-first: markers never reach the terminal,\n the prefix ledger, or the audit), prepare lines (width fitting), slice the\n window, composite overlays **into the window slice only** (screen\n coordinates — an overlay never touches the frame or the ledger).\n6. Emit:\n\n| Emitter | Bytes | When |\n|---|---|---|\n| `#emitFullPaint` | clears + `frame[0, C')` + window rows | gestures only. `clearScrollback` ⇒ `\\x1b[2J\\x1b[H\\x1b[3J`; otherwise ED22 (when supported) + `\\x1b[2J\\x1b[H` |\n| `#emitUpdate` scroll-append | `\\r\\n` + new bottom rows + changed-row range | the rows leaving the screen are exactly the chunk, content untouched since painted |\n| `#emitUpdate` in-window diff | relative move + changed-row range rewrite | nothing scrolls, nothing commits (cursor-only when nothing changed) |\n| `#emitUpdate` seam rewrite | chunk rows + full window rewrite | commit advance, window re-anchor, hidden-gap backfill, mux resize |\n\n**ED3 (`CSI 3 J`) is emitted in exactly one place** — `#emitFullPaint` with\n`clearScrollback: true` — and is reached only by user gestures: session\nreplace/branch/resume (`requestRender(true, { clearScrollback: true })`),\nresize outside a multiplexer, `resetDisplay()` (Ctrl+L). A gesture pins the\nuser to the tail, so the snap is acceptable; multiplexers never get ED3 (it is\na no-op there and a replay would duplicate pane history).\n\nThe ordinary update path never emits ED2/ED3 or an absolute cursor home —\nseveral terminal families snap a scrolled reader to the bottom on those.\n\n### The commit-boundary seam (the load-bearing app contract)\n\n`NativeScrollbackLiveRegion` (tui.ts) is how a component keeps mutable rows out\nof history:\n\n- `getNativeScrollbackLiveRegionStart()` — first row that may still mutate\n (everything below it, including root chrome rendered after it, stays in the\n window).\n- `getNativeScrollbackCommitSafeEnd()` — optional deeper boundary: the\n append-only prefix of the live region (a streaming assistant message's\n settled rows). Without it, a single live block taller than the window would\n hold its head out of history until it finalizes.\n\n`TranscriptContainer` implements this for the coding agent: finalized blocks\nfreeze (their render is snapshotted, so their content can never drift after\nthe engine may have committed it), still-mutating blocks\n(`isTranscriptBlockFinalized?.() === false`) anchor the live region, and\n`deriveLiveCommitState` derives the commit-safe end of the first live block\nfrom two independent signals:\n\n- **append-only detection** — a block observed growing without visibly\n rewriting an interior row commits its full body; a rewrite suspends this\n for `VOLATILE_REARM_FRAMES` clean frames.\n- **stable-prefix ratchet** — rows that stayed visibly identical for a full\n `STABLE_PREFIX_COMMIT_FRAMES` window commit even while the block's tail\n keeps rewriting (a task tool's static prompt above a ticking progress\n tree). Without it, one perpetually animating row holds the whole block out\n of history, so a block taller than the window reads as cut off (head\n neither committed nor on screen) for the entire run. The ratchet tracks the\n window-minimum common prefix; a rewrite above the promoted run retreats it\n to the divergence, and rows that already committed are the engine audit's\n problem (recommit → duplication, never loss). That retreat also arms a\n permanent **rewrite floor** at the divergence: a row that mutates *after*\n surviving a full promotion window is a slow ticker (an agent row's tool/cost\n counter updating every few seconds), not settling content — without the\n floor, every quiet stretch re-promoted it and every later tick forced an\n audit recommit, spraying stale snapshots of the block into scrollback for\n the whole run. Rows at/after the floor never re-promote while the block\n lives (the floor index travels with append-shaped insertions above it);\n one-off re-layouts before any promotion never arm it, and the append-only\n path commits the full block regardless.\n\nFreezing is unconditional — it is the engine's required guarantee, not a\nper-terminal optimization.\n\n---\n\n## 3. Invariants — MUST / NEVER\n\n1. **NEVER add a new `CSI 3 J` (ED3) callsite.** ED3 flows only through\n `#emitFullPaint({ clearScrollback: true })`, only for gestures, never inside\n multiplexers.\n2. **NEVER rewrite a committed row.** No emitter may touch frame rows `< C`,\n and `W ≥ C` always (re-showing a committed row on the grid duplicates it\n for a scrolling reader — the historical corruption family). When a\n *component* violates immutability, the audit (§2) degrades to duplication —\n never silently skip rows, never erase history.\n3. **Commits are exactly the chunk.** Any byte shape that scrolls the screen\n must scroll *only* rows accounted for by `C' − C` — that is what makes\n scrollback provably `frame[0..C)`.\n4. **NEVER probe the viewport position or fork on platform in the update\n path.** win32 behaves like POSIX. The probe APIs are gone; do not\n reintroduce them.\n5. **Mutable content stays below the commit boundary.** App-layer renderers\n must finalize-before-commit; the engine trusts B and clamps, it does not\n verify content.\n6. **Park the hardware cursor at real content bottom**, not the padded window\n bottom, or height shrinks scroll live rows into history and duplicate them\n per resize step.\n7. **Cursor writes live inside the synchronized-output frame**, before ESU —\n never as a second frame after it.\n8. **NEVER throw in the render hot path.** Clamp over-wide lines\n (`truncateToWidth`); a width mismatch is cosmetic, not fatal.\n9. **Multiplexers get no destructive clear and no history rewrap on resize** —\n repaint the window in place; pane history keeps its old wrap.\n10. **Any change to the ledger math, the emitters, or the seam must be\n validated by the stress harness (§6)** across its full scenario matrix,\n not by a single-terminal smoke test.\n\n---\n\n## 4. Terminal capability detection\n\n`TERMINAL` (`terminal-capabilities.ts`) is resolved once at import from\n`TERMINAL_ID` plus environment sniffing; detection helpers are pure over\n`(env, platform)` and unit-testable.\n\n- `shouldEnableSynchronizedOutputByDefault(env, id)` → DEC 2026 default.\n Precedence: user opt-out (`PI_NO_SYNC_OUTPUT`/`PI_TUI_SYNC_OUTPUT=0`) → user\n force-on (`PI_FORCE_SYNC_OUTPUT=1`/`PI_TUI_SYNC_OUTPUT=1`) → `TERM_FEATURES`\n advertises `Sy` → `WT_SESSION` → known direct terminals → off for risky\n multiplexers and unknowns. Reconciled at runtime by the DECRQM mode-2026\n report; a user override still wins.\n- `detectRectangularSgrSupport(id, env)` → DECCARA fills: kitty only, off in\n multiplexers and under `PI_NO_DECCARA`.\n- `supportsScreenToScrollback` → kitty's ED22 (used once, on the initial\n paint, to preserve the pre-existing shell screen).\n\nThe old ED3-risk classifier (`eagerEraseScrollbackRisk`, `PI_TUI_ED3_SAFE`,\n`submitPinsViewportToTail`) is gone: behavior no longer depends on which\nterminal is rendering, so there is no risk class to detect. Env sniffing now\nonly selects *optimizations* (sync output, DECCARA, images), where a miss is\ncosmetic, not corrupting.\n\n---\n\n## 5. Width model\n\n`visibleWidth` / `truncateToWidth` / `sliceByColumn` / `wrapTextWithAnsi`\n(`utils.ts`) all route through **one native UAX#11 engine**\n(`@oh-my-pi/pi-natives`, Rust `unicode-width`). `Bun.stringWidth` was dropped\ndeliberately — mixing two width models in measure-vs-slice produced crashes.\n\n- Fast path: printable ASCII is one cell per code unit.\n- ZWJ pictographic emoji take the `visibleWidthByGrapheme` override.\n- OSC 66 sized text takes the native path.\n\n**Rule:** any new measuring code routes through these helpers, and the hot\npath clamps instead of throwing. Known residual: combining-heavy scripts\n(Arabic harakat) survive painting verbatim, but ghostty-web's cell readback can\nmigrate non-spacing marks across cells — the stress harness compares those rows\nwith marks stripped (`sameLinesAllowingMarkDrift`).\n\n---\n\n## 6. The fidelity gate (use it)\n\n`packages/tui/test/render-stress-harness.ts` drives the renderer's **real\nemitted ANSI** into a ghostty-web `VirtualTerminal` across randomized op\nsequences and parameterized terminal shapes, and validates the contract with a\n**shadow commit ledger**: an independent reimplementation of §1's math, fed\nonly by observed frames (a `render` wrap) and observed bytes (a `write` wrap).\nPer op it asserts:\n\n- the whole tape (scrollback + grid) equals `shadowTape + window slice`, row\n for row, including across resizes;\n- scrolled readers stay pinned and visible history rows are never rewritten;\n- multiplexer pane history grows by exactly the committed chunk;\n- sync-output/autowrap bracket discipline, cursor parking, background columns,\n duplicate accounting.\n\nRun it — plus `render-regressions.test.ts`,\n`streaming-scrollback-defer.test.ts`, and the `issue-*-repro.test.ts` files —\nbefore changing ledger math, emitters, or the seam. A change that passes one\nterminal and one seed is not verified.\n\n---\n\n## 7. Capability probes & stdin reassembly\n\n`ProcessTerminal` fuses capability queries with a bare DA1 (`CSI c`) sentinel so\na non-answering terminal is detected when DA1 returns first. Replies can arrive\n**split across a stdin flush**, so:\n\n- `#privateCsiResponseBuffer` accumulates `\\x1b[?…` partials while a sentinel is\n outstanding, rejoins on the terminator byte, then runs the handlers on the\n **complete** reply. A new `\\x1b` mid-reassembly or >256 bytes abandons the\n partial so real keys still reach input.\n- `#da1SentinelOwners` is a **typed FIFO** discriminated by `kind` so a\n keyboard DA1 cannot be mistaken for an OSC 11 / DECRQM / graphics-probe\n sentinel.\n- DECRQM probes (2026/2048/2031) drive runtime feature gating.\n\n**Rule:** any new probe must own a typed sentinel and survive a split reply\n(feed the reply byte-by-byte in a test and assert nothing leaks to input).\n\n---\n\n## 8. Inline images & memory\n\nKitty images are **transmit-once, place-many** (`kitty-graphics.ts`).\n`ImageBudget` keeps only the most-recent N images live; when the cap is\nexceeded the demoted image's pixels are deleted by id (`a=d,d=I`) and its\nvisible rows re-render as the text fallback through the ordinary window diff —\n**no destructive replay**. A demoted placement already committed to history\nsimply loses its pixels (committed rows are immutable), and the text fallback\nis **height-preserving** once a graphic has rendered (reserved rows + fallback\nline), so demotion never shrinks the block and never shifts committed content\nbelow it.\n\n**Rule:** never re-emit full base64 per frame. Kitty Unicode placeholders are\ndefault-on only for kitty/ghostty (`PI_NO_KITTY_PLACEHOLDERS` /\n`PI_KITTY_PLACEHOLDERS`).\n\n---\n\n## 9. Escape hatches (env vars)\n\n| Var | Effect |\n|---|---|\n| `PI_NO_SYNC_OUTPUT=1` | Disable DEC 2026 BSU/ESU wrappers (autowrap discipline stays on). |\n| `PI_TUI_SYNC_OUTPUT=0\\|1` / `PI_FORCE_SYNC_OUTPUT=1` | Force sync output off / on. |\n| `PI_NO_DECCARA` | Disable Kitty DECCARA rectangular-fill optimization. |\n| `PI_FORCE_IMAGE_PROTOCOL=kitty\\|iterm2\\|sixel\\|off` | Override image protocol detection. |\n| `PI_NO_KITTY_PLACEHOLDERS=1` / `PI_KITTY_PLACEHOLDERS=1` | Force Kitty Unicode placeholders off / on. |\n| `PI_HARDWARE_CURSOR=1` | Show the real hardware cursor instead of a rendered one. |\n| `PI_NOTIFICATIONS=off\\|0\\|false` | Suppress terminal notifications. |\n| `PI_DEBUG_REDRAW=1` | Log the chosen render intent + ledger state per frame to the debug log. |\n\nRemoved with the old engine: `PI_TUI_ED3_SAFE` (no ED3-risk lever exists),\n`PI_CLEAR_ON_SHRINK` (shrinks always clear exactly), `PI_TUI_DEBUG` (per-render\ndump superseded by `PI_DEBUG_REDRAW` ledger logging and the stress harness\nreplay/reduce tooling).\n\n---\n\n## 10. Before you touch the render core — checklist\n\n- [ ] Are you about to emit `CSI 3 J` anywhere other than the gesture-driven\n `clearScrollback` full paint? **Stop.**\n- [ ] Could any code path rewrite, or re-show on the grid, a frame row below\n `committedRows`? **Stop.**\n- [ ] Does your byte shape scroll rows that are not the commit chunk? That\n breaks `scrollback == frame[0..C)`.\n- [ ] Are you adding a viewport probe, a platform fork, or a terminal-brand\n branch to the update path? The contract exists so none are needed.\n- [ ] New mutable UI above the editor? It must report (or live inside) the\n live-region seam, or it will freeze at first commit.\n- [ ] Did you run the stress harness and the repro suite across the full\n scenario matrix — not just one terminal and one seed?\n- [ ] New probe? Typed sentinel owner + split-reply test.\n- [ ] New width path? Routed through the shared native engine, clamped (never\n thrown) in the hot path.\n",
|
|
104
|
+
"tui-runtime-internals.md": "# TUI runtime internals\n\nThis document maps the non-theme runtime path from terminal input to rendered output in interactive mode. It focuses on behavior in `packages/tui` and its integration from `packages/coding-agent` controllers.\n\n> **Editing the rendering engine itself?** Read\n> [`tui-core-renderer.md`](./tui-core-renderer.md) first — it documents the\n> failure modes (yank / corruption / flash / width crashes) and the invariants\n> the render planner, native-scrollback bookkeeping, and capability detection\n> must not violate.\n\n## Runtime layers and ownership\n\n- **`packages/tui` engine**: terminal lifecycle, stdin normalization, focus routing, render scheduling, differential painting, overlay composition, hardware cursor placement.\n- **`packages/coding-agent` interactive mode**: builds component tree, binds editor callbacks and keymaps, reacts to agent/session events, and translates domain state (streaming, tool execution, retries, plan mode) into UI components.\n\nBoundary rule: the TUI engine is message-agnostic. It only knows `Component.render(width)`, `handleInput(data)`, focus, and overlays. Agent semantics stay in interactive controllers.\n\n## Implementation files\n\n- [`packages/coding-agent/src/modes/interactive-mode.ts`](../packages/coding-agent/src/modes/interactive-mode.ts)\n- [`packages/coding-agent/src/modes/controllers/event-controller.ts`](../packages/coding-agent/src/modes/controllers/event-controller.ts)\n- [`packages/coding-agent/src/modes/controllers/input-controller.ts`](../packages/coding-agent/src/modes/controllers/input-controller.ts)\n- [`packages/coding-agent/src/modes/components/custom-editor.ts`](../packages/coding-agent/src/modes/components/custom-editor.ts)\n- [`packages/tui/src/tui.ts`](../packages/tui/src/tui.ts)\n- [`packages/tui/src/terminal.ts`](../packages/tui/src/terminal.ts)\n- [`packages/tui/src/editor-component.ts`](../packages/tui/src/editor-component.ts)\n- [`packages/tui/src/stdin-buffer.ts`](../packages/tui/src/stdin-buffer.ts)\n- [`packages/tui/src/components/loader.ts`](../packages/tui/src/components/loader.ts)\n\n## Boot and component tree assembly\n\n`InteractiveMode` constructs `TUI(new ProcessTerminal(), settings.get(\"showHardwareCursor\"))`, applies `tui.maxInlineImages` and Kitty text-sizing settings, then creates persistent containers:\n\n- `chatContainer`\n- `pendingMessagesContainer`\n- `statusContainer`\n- `todoContainer`\n- `btwContainer`\n- `omfgContainer`\n- `errorBannerContainer`\n- `statusLine`\n- `hookWidgetContainerAbove`\n- `editorContainer` (holds `CustomEditor`)\n- `hookWidgetContainerBelow`\n\n`init()` wires the tree in that order after any startup warnings/welcome/changelog, focuses the editor, registers input handlers via `InputController`, starts TUI, pushes terminal title state, updates the editor border, and requests a forced render.\nA forced render (`requestRender(true)`) queues a viewport repaint or explicit session replacement; it does **not** throw away previous-line history by default.\n\n## Terminal lifecycle and stdin normalization\n\n`ProcessTerminal.start()`:\n\n1. Enables raw mode and bracketed paste.\n2. Attaches resize handler and refreshes dimensions.\n3. Enables Windows VT input mode when running on win32.\n4. Creates a `StdinBuffer` to split partial escape chunks into complete sequences.\n5. Queries Kitty keyboard protocol support (`CSI ? u`), then enables protocol flags if supported; otherwise enables modifyOtherKeys fallback after a short timeout.\n6. Queries OSC 11 background color and Mode 2031 appearance notifications for dark/light theme detection.\n7. Queries OSC 99 notification capabilities.\n8. Starts periodic OSC 11 polling only where safe, then probes DEC private modes 2026/2048/2031 via DECRQM.\n\n`StdinBuffer` behavior:\n\n- Buffers fragmented escape sequences (CSI/OSC/DCS/APC/SS3).\n- Emits `data` only when a sequence is complete or timeout-flushed.\n- Detects bracketed paste and emits a `paste` event with raw pasted text.\n\nThis prevents partial escape chunks from being misinterpreted as normal keypresses.\n\n## Input routing and focus model\n\nInput path:\n\n`stdin -> ProcessTerminal -> StdinBuffer -> TUI.#handleInput -> focusedComponent.handleInput`\n\nRouting details:\n\n1. TUI runs registered input listeners first (`addInputListener`), allowing consume/transform behavior.\n2. TUI handles global debug shortcut (`shift+ctrl+d`) before component dispatch.\n3. If focused component belongs to an overlay that is now hidden/invisible, TUI reassigns focus to next visible overlay or saved pre-overlay focus.\n4. Key release events are filtered unless focused component sets `wantsKeyRelease = true`.\n5. After dispatch, TUI schedules render.\n\n`setFocus()` also toggles `Focusable.focused`, which controls whether components emit `CURSOR_MARKER` for hardware cursor placement.\n\n## Key handling split: editor vs controller\n\n`CustomEditor` intercepts high-priority combos first (escape, ctrl-c/d/z, ctrl-v, ctrl-p variants, ctrl-t, alt-up, extension custom keys) and delegates the rest to base `Editor` behavior (text editing, history, autocomplete, cursor movement).\n\n`InputController.setupKeyHandlers()` then binds editor callbacks to mode actions:\n\n- cancellation / mode exits on `Escape`\n- shutdown on double `Ctrl+C` or empty-editor `Ctrl+D`\n- suspend/resume on `Ctrl+Z`\n- slash-command and selector hotkeys\n- follow-up/dequeue toggles and expansion toggles\n\nThis keeps key parsing/editor mechanics in `packages/tui` and mode semantics in coding-agent controllers.\n\n## Render loop and the append-only contract\n\n`TUI.requestRender()` coalesces render requests and rate-limits ordinary frames:\n\n- forced renders (`requestRender(true, ...)`) schedule an immediate frame and force a full window rewrite; with `clearScrollback`, they trigger a destructive full paint (ED3 outside multiplexers)\n- ordinary renders schedule through `#scheduleRender()` and respect `TUI.#MIN_RENDER_INTERVAL_MS`\n- repeated requests while a render is pending collapse into the same scheduled frame\n\n`#doRender()` pipeline:\n\n1. Render root component tree, collecting the commit-boundary seam (`NativeScrollbackLiveRegion`) from the children.\n2. Advance the append-only ledger: `windowTop = max(committedRows, frame.length - height)`, commit chunk = settled rows crossing the window top (never past the seam).\n3. Extract and strip `CURSOR_MARKER`, normalize lines, slice the visible window, composite overlays into the window slice (screen coordinates; overlays freeze commits).\n4. Emit one of: gesture-driven full paint (initial / session replace / resize), scroll-append (chunk rows only), in-window row diff, or seam rewrite (chunk + full window).\n\nNative scrollback always equals the committed frame prefix — rows enter history exactly once, in order, when the seam says they are final. There are no viewport probes and no deferred reconciliation; see [`tui-core-renderer.md`](./tui-core-renderer.md).\n\nRender writes use synchronized output mode (`CSI ? 2026 h/l`) when enabled; capability detection, DECRQM, or `PI_NO_SYNC_OUTPUT` can disable the wrappers while leaving autowrap discipline on.\n\n## Render safety constraints\n\nCritical safety checks in `TUI`:\n\n- Non-image rendered lines are expected to fit terminal width; the differential path truncates overwide lines as a last-resort guard and can write debug diagnostics when redraw debugging is enabled.\n- Overlay compositing includes defensive truncation and post-composite width guarding.\n- Width changes force repaint/rebuild planning because wrapping semantics change.\n- Cursor position is clamped before movement.\n\nThese constraints are runtime guards plus component conventions; renderers should still return width-safe lines rather than rely on truncation.\n\nThe deeper reasons these guards exist — why the renderer cannot observe scroll\nposition, why ED3 (`CSI 3 J`) is confined to one path, and why the hot path\nclamps instead of throwing — are documented in\n[`tui-core-renderer.md`](./tui-core-renderer.md).\n\n## Resize handling\n\nResize events are event-driven from `ProcessTerminal` to `TUI.requestRender()`.\n\nEffects:\n\n- A resize is an explicit user gesture: outside multiplexers the engine erases and replays (`ED3` + full paint) so history rewraps at the new geometry; the commit ledger restarts from the replayed frame.\n- Inside terminal multiplexers, resize repaints the visible window in place after a settle debounce (issue #2088); pane history keeps its old wrap, like any shell output, because pane scrollback cannot be erased safely.\n- Overlay visibility can depend on terminal dimensions (`OverlayOptions.visible`); focus is corrected when overlays become non-visible after resize.\n\n## Streaming and incremental UI updates\n\n`EventController` subscribes to `AgentSessionEvent` and updates UI incrementally:\n\n- `agent_start`: starts loader in `statusContainer`.\n- `message_start` assistant: creates `streamingComponent` and mounts it.\n- `message_update`: updates streaming assistant content; creates/updates tool execution components as tool calls appear.\n- `tool_execution_update/end`: updates tool result components and completion state.\n- `message_end`: finalizes assistant stream, handles aborted/error annotations, marks pending tool args complete on normal stop.\n- `agent_end`: stops loaders, clears transient stream state, flushes deferred model switch, issues completion notification if backgrounded.\n\nRead-tool grouping is intentionally stateful (`#lastReadGroup`) to coalesce consecutive read tool calls into one visual block until a non-read break occurs.\n\n## Status and loader orchestration\n\nStatus lane ownership:\n\n- `statusContainer` holds transient loaders (`loadingAnimation`, `autoCompactionLoader`, `retryLoader`).\n- `statusLine` renders persistent status/hooks/plan indicators and drives editor top border updates.\n\nLoader behavior:\n\n- `Loader` updates every 80ms via interval and requests render each frame.\n- Escape handlers are temporarily overridden during auto-compaction and auto-retry to cancel those operations.\n- On end/cancel paths, controllers restore prior escape handlers and stop/clear loader components.\n\n## Mode transitions and backgrounding\n\n### Bash/Python input modes\n\nInput text prefixes toggle editor border mode flags:\n\n- `!` -> bash mode\n- `$` (non-template literal prefix) -> python mode\n\nEscape exits inactive mode by clearing editor text and restoring border color; when execution is active, escape aborts the running task instead.\n\n### Plan mode\n\n`InteractiveMode` tracks plan mode flags, status-line state, active tools, and model switching. Enter/exit updates session mode entries and status/UI state, including deferred model switch if streaming is active.\n\n### Suspend/resume (`Ctrl+Z`)\n\n`InputController.handleCtrlZ()`:\n\n1. Registers one-shot `SIGCONT` handler to restart TUI and force render.\n2. Stops TUI before suspend.\n3. Sends `SIGTSTP` to process group.\n\n## Cancellation paths\n\nPrimary cancellation inputs:\n\n- `Escape` during active stream loader: restores queued messages to editor and aborts agent.\n- `Escape` during bash/python execution: aborts running command.\n- `Escape` during auto-compaction/retry: invokes dedicated abort methods through temporary escape handlers.\n- `Ctrl+C` single press: clear editor; double press within 500ms: shutdown.\n\nCancellation is state-conditional; same key can mean abort, mode-exit, selector trigger, or no-op depending on runtime state.\n\n## Event-driven vs throttled behavior\n\nEvent-driven updates:\n\n- Agent session events (`EventController`)\n- Key input callbacks (`InputController`)\n- terminal resize callback\n- terminal appearance callbacks, SIGWINCH theme reevaluation, and git branch watchers in `InteractiveMode`\n\nThrottled/debounced paths:\n\n- TUI rendering is tick-debounced (`requestRender` coalescing).\n- Loader animation is fixed-interval (80ms), each frame requesting render.\n- Editor autocomplete updates (inside `Editor`) use debounce timers, reducing recompute churn during typing.\n\nThe runtime therefore mixes event-driven state transitions with bounded render cadence to keep interactivity responsive without repaint storms.\n",
|
|
105
|
+
"tui.md": "# TUI integration for extensions and custom tools\n\nThis document covers the **current** TUI contract used by `packages/coding-agent` and `packages/tui` for extension UI, custom tool UI, and custom renderers.\n\n## What this subsystem is\n\nThe runtime has two layers:\n\n- **Rendering engine (`packages/tui`)**: differential terminal renderer, input dispatch, focus, overlays, cursor placement.\n- **Integration layer (`packages/coding-agent`)**: mounts extension/custom-tool components, wires keybindings/theme, and restores editor state.\n\n## Runtime behavior by mode\n\n| Mode | `ctx.ui.custom(...)` availability | Notes |\n| ------------------- | --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------ |\n| Interactive TUI | Supported | Component is mounted in the editor area or overlay, focused, and must call `done(result)` to resolve. |\n| Background/headless | Not interactive | UI context is no-op (`hasUI === false`). |\n| RPC mode | Not mounted | `custom()` is implemented as unsupported UI and returns `undefined as never`; do not depend on interactive UI in RPC handlers. |\n\nIf your extension/tool can run in non-interactive mode, guard with `ctx.hasUI` / `pi.hasUI`.\n\n## Core component contract (`@oh-my-pi/pi-tui`)\n\n`packages/tui/src/tui.ts` defines:\n\n```ts\nexport interface Component {\n render(width: number): readonly string[];\n handleInput?(data: string): void;\n wantsKeyRelease?: boolean;\n invalidate?(): void;\n}\n```\n\nRender results are component-owned and immutable to callers; a component that did not change should return the **same array reference** it returned last time (reference equality is what enables the renderer's memoization and row virtualization), and must return a new array whenever its content changed.\n\n`Focusable` is separate:\n\n```ts\nexport interface Focusable {\n focused: boolean;\n}\n```\n\nCursor behavior uses `CURSOR_MARKER` (not `getCursorPosition`). Focused components emit the marker in rendered text; `TUI` extracts it and positions the hardware cursor.\n\n## Rendering constraints (terminal safety)\n\nYour `render(width)` output must be terminal-safe:\n\n1. **Do not intentionally exceed `width` on any line**. The renderer truncates overwide non-image lines as a last-resort guard, but components should still return width-safe output.\n2. **Measure visual width**, not string length: use `visibleWidth()`.\n3. **Truncate/wrap ANSI-aware text** with `truncateToWidth()` / `wrapTextWithAnsi()`.\n4. **Sanitize tabs/content** from external sources using `replaceTabs()` (and higher-level sanitizers in coding-agent render paths).\n\nMinimal pattern:\n\n```ts\nimport { replaceTabs, truncateToWidth } from \"@oh-my-pi/pi-tui\";\n\nrender(width: number): readonly string[] {\n return this.lines.map(line => truncateToWidth(replaceTabs(line), width));\n}\n```\n\n## Input handling and keybindings\n\n### Raw key matching\n\nUse `matchesKey(data, \"...\")` for navigation keys and combos.\n\n### Respect user-configured app keybindings\n\nExtension UI factories receive a `KeybindingsManager` (interactive mode) so you can honor mapped actions instead of hardcoding keys:\n\n```ts\nif (keybindings.matches(data, \"interrupt\")) {\n done(undefined);\n return;\n}\n```\n\n### Key release/repeat events\n\nKey release events are filtered unless your component sets:\n\n```ts\nwantsKeyRelease = true;\n```\n\nThen use `isKeyRelease()` / `isKeyRepeat()` if needed.\n\n## Focus, overlays, and cursor\n\n- `TUI.setFocus(component)` routes input to that component.\n- Overlay APIs exist in `TUI` (`showOverlay`, `OverlayHandle`). In interactive extension/custom UI, `custom(..., { overlay: true })` mounts your component through `TUI.showOverlay(...)`; without `overlay`, it replaces the editor component area directly.\n- Overlay custom UI is anchored at `bottom-center` with full terminal width/max height and is removed through the returned overlay handle when `done(...)` closes the flow.\n\n## Mount points and return contracts\n\n## 1) Extension UI (`ExtensionUIContext`)\n\nCurrent signature (`extensibility/extensions/types.ts`):\n\n```ts\ncustom<T>(\n factory: (\n tui: TUI,\n theme: Theme,\n keybindings: KeybindingsManager,\n done: (result: T) => void,\n ) => (Component & { dispose?(): void }) | Promise<Component & { dispose?(): void }>,\n options?: { overlay?: boolean },\n): Promise<T>\n```\n\nBehavior in interactive mode (`extension-ui-controller.ts`):\n\n- Saves editor text.\n- Without `options.overlay`, replaces the editor component with your component.\n- With `options.overlay`, mounts your component as a bottom-centered overlay instead of replacing the editor.\n- Focuses your component.\n- On `done(result)`: calls `component.dispose?.()`, hides the overlay if present, restores editor + text for non-overlay flows, focuses editor, resolves promise.\n So `done(...)` is mandatory for completion.\n\n## 2) Hook/custom-tool UI context (legacy typing)\n\n`HookUIContext.custom` is typed as `(tui, theme, done)` in hook/custom-tool types.\nUnderlying interactive implementation calls factories with `(tui, theme, keybindings, done)`. JS consumers can use the extra arg; type-level compatibility still reflects the 3-arg legacy signature.\n\nCustom tools typically use the same UI entrypoint via the factory-scoped `pi.ui` object, then return the selected value in normal tool content:\n\n```ts\nasync execute(toolCallId, params, onUpdate, ctx, signal) {\n if (!pi.hasUI) {\n return { content: [{ type: \"text\", text: \"UI unavailable\" }] };\n }\n\n const picked = await pi.ui.custom<string | undefined>((tui, theme, done) => {\n const component = new MyPickerComponent(done, signal);\n return component;\n });\n\n return { content: [{ type: \"text\", text: picked ? `Picked: ${picked}` : \"Cancelled\" }] };\n}\n```\n\n## 3) Custom tool call/result renderers\n\nCustom tools and extension tools can return components from:\n\n- `renderCall(args, options, theme)`\n- `renderResult(result, options, theme, args?)`\n\n`options` currently includes:\n\n- `expanded: boolean`\n- `isPartial: boolean`\n- `spinnerFrame?: number`\n\nThese renderers are mounted by `ToolExecutionComponent`.\n\n## Lifecycle and cancellation\n\n- `dispose()` is optional at type level but should be implemented when you own timers, subprocesses, watchers, sockets, or overlays.\n- `done(...)` should be called exactly once from your component flow.\n- For cancellable long-running UI, pair `CancellableLoader` with `AbortSignal` and call `done(...)` from `onAbort`.\n\nExample cancellation pattern:\n\n```ts\nconst loader = new CancellableLoader(\n tui,\n theme.fg(\"accent\"),\n theme.fg(\"muted\"),\n \"Working...\",\n);\nloader.onAbort = () => done(undefined);\nvoid doWork(loader.signal).then((result) => done(result));\nreturn loader;\n```\n\n## Realistic custom component example (extension command)\n\n```ts\nimport type { Component } from \"@oh-my-pi/pi-tui\";\nimport {\n SelectList,\n matchesKey,\n replaceTabs,\n truncateToWidth,\n} from \"@oh-my-pi/pi-tui\";\nimport {\n getSelectListTheme,\n type ExtensionAPI,\n} from \"@oh-my-pi/pi-coding-agent\";\n\nclass Picker implements Component {\n list: SelectList;\n keybindings: any;\n done: (value: string | undefined) => void;\n\n constructor(\n items: Array<{ value: string; label: string }>,\n keybindings: any,\n done: (value: string | undefined) => void,\n ) {\n this.list = new SelectList(items, 8, getSelectListTheme());\n this.keybindings = keybindings;\n this.done = done;\n this.list.onSelect = (item) => this.done(item.value);\n this.list.onCancel = () => this.done(undefined);\n }\n\n handleInput(data: string): void {\n if (this.keybindings.matches(data, \"interrupt\")) {\n this.done(undefined);\n return;\n }\n this.list.handleInput(data);\n }\n\n render(width: number): readonly string[] {\n return this.list\n .render(width)\n .map((line) => truncateToWidth(replaceTabs(line), width));\n }\n\n invalidate(): void {\n this.list.invalidate();\n }\n}\n\nexport default function extension(pi: ExtensionAPI): void {\n pi.registerCommand(\"pick-model\", {\n description: \"Pick a model profile\",\n handler: async (_args, ctx) => {\n if (!ctx.hasUI) return;\n\n const selected = await ctx.ui.custom<string | undefined>(\n (tui, theme, keybindings, done) => {\n const items = [\n { value: \"fast\", label: theme.fg(\"accent\", \"Fast\") },\n { value: \"balanced\", label: \"Balanced\" },\n { value: \"quality\", label: \"Quality\" },\n ];\n return new Picker(items, keybindings, done);\n },\n );\n\n if (selected) ctx.ui.notify(`Selected profile: ${selected}`, \"info\");\n },\n });\n}\n```\n\n## Key implementation files\n\n- `packages/tui/src/tui.ts` — `Component`, `Focusable`, cursor marker, focus, overlay, input dispatch.\n- `packages/tui/src/utils.ts` — width/truncation/sanitization primitives.\n- `packages/tui/src/keys.ts` / `keybindings.ts` — key parsing and configurable action mapping.\n- `packages/coding-agent/src/modes/controllers/extension-ui-controller.ts` — interactive mounting/unmounting for extension/hook/custom-tool UI.\n- `packages/coding-agent/src/extensibility/extensions/types.ts` — extension UI and renderer contracts.\n- `packages/coding-agent/src/extensibility/hooks/types.ts` — hook UI contract (legacy custom signature).\n- `packages/coding-agent/src/extensibility/custom-tools/types.ts` — custom tool execute/render contracts.\n- `packages/coding-agent/src/modes/components/tool-execution.ts` — mounting `renderCall`/`renderResult` components and partial-state options.\n- `packages/coding-agent/src/tools/context.ts` — tool UI context propagation (`hasUI`, `ui`).\n",
|
|
106
|
+
};
|