@gajae-code/coding-agent 0.6.4 → 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/dist/types/cli/migrate-cli.d.ts +20 -0
- package/dist/types/commands/migrate.d.ts +33 -0
- package/dist/types/config/keybindings.d.ts +4 -0
- package/dist/types/gjc-runtime/deep-interview-recorder.d.ts +2 -0
- package/dist/types/gjc-runtime/deep-interview-runtime.d.ts +2 -2
- package/dist/types/gjc-runtime/goal-mode-request.d.ts +1 -1
- package/dist/types/gjc-runtime/session-layout.d.ts +59 -0
- package/dist/types/gjc-runtime/session-resolution.d.ts +47 -0
- package/dist/types/gjc-runtime/state-graph.d.ts +1 -1
- package/dist/types/gjc-runtime/state-runtime.d.ts +5 -4
- package/dist/types/gjc-runtime/state-schema.d.ts +2 -0
- package/dist/types/gjc-runtime/state-writer.d.ts +36 -7
- package/dist/types/gjc-runtime/ultragoal-runtime.d.ts +7 -4
- package/dist/types/gjc-runtime/workflow-command-ref.d.ts +1 -1
- package/dist/types/gjc-runtime/workflow-manifest.d.ts +1 -1
- package/dist/types/harness-control-plane/storage.d.ts +2 -1
- package/dist/types/hooks/skill-state.d.ts +12 -4
- package/dist/types/migrate/action-planner.d.ts +11 -0
- package/dist/types/migrate/adapters/claude-code.d.ts +2 -0
- package/dist/types/migrate/adapters/codex.d.ts +5 -0
- package/dist/types/migrate/adapters/index.d.ts +45 -0
- package/dist/types/migrate/adapters/opencode.d.ts +2 -0
- package/dist/types/migrate/executor.d.ts +2 -0
- package/dist/types/migrate/mcp-mapper.d.ts +20 -0
- package/dist/types/migrate/report.d.ts +18 -0
- package/dist/types/migrate/skill-normalizer.d.ts +27 -0
- package/dist/types/migrate/types.d.ts +126 -0
- package/dist/types/modes/components/custom-editor.d.ts +1 -1
- package/dist/types/modes/shared/agent-wire/unattended-audit.d.ts +1 -1
- package/dist/types/research-plan/index.d.ts +1 -0
- package/dist/types/research-plan/ledger.d.ts +33 -0
- package/dist/types/rlm/artifacts.d.ts +1 -1
- package/dist/types/runtime-mcp/config-writer.d.ts +26 -0
- package/dist/types/skill-state/active-state.d.ts +6 -11
- package/dist/types/skill-state/canonical-skills.d.ts +3 -0
- package/dist/types/skill-state/workflow-hud.d.ts +2 -0
- package/dist/types/task/spawn-gate.d.ts +1 -10
- package/package.json +7 -7
- package/src/cli/migrate-cli.ts +106 -0
- package/src/cli.ts +1 -0
- package/src/commands/deep-interview.ts +2 -2
- package/src/commands/migrate.ts +46 -0
- package/src/commands/state.ts +2 -1
- package/src/commands/team.ts +7 -3
- package/src/coordinator-mcp/policy.ts +10 -2
- package/src/defaults/gjc/extensions/grok-cli-vendor/biome.json +0 -1
- package/src/defaults/gjc/skills/deep-interview/SKILL.md +28 -24
- package/src/defaults/gjc/skills/ralplan/SKILL.md +8 -4
- package/src/defaults/gjc/skills/team/SKILL.md +51 -47
- package/src/defaults/gjc/skills/ultragoal/SKILL.md +17 -13
- package/src/extensibility/custom-commands/loader.ts +0 -7
- package/src/extensibility/gjc-plugins/injection.ts +23 -4
- package/src/extensibility/gjc-plugins/state.ts +16 -1
- package/src/gjc-runtime/deep-interview-recorder.ts +43 -18
- package/src/gjc-runtime/deep-interview-runtime.ts +49 -23
- package/src/gjc-runtime/goal-mode-request.ts +26 -11
- package/src/gjc-runtime/launch-tmux.ts +6 -1
- package/src/gjc-runtime/ralplan-runtime.ts +79 -50
- package/src/gjc-runtime/session-layout.ts +180 -0
- package/src/gjc-runtime/session-resolution.ts +217 -0
- package/src/gjc-runtime/state-graph.ts +1 -2
- package/src/gjc-runtime/state-migrations.ts +1 -0
- package/src/gjc-runtime/state-runtime.ts +230 -121
- package/src/gjc-runtime/state-schema.ts +2 -0
- package/src/gjc-runtime/state-writer.ts +289 -41
- package/src/gjc-runtime/team-runtime.ts +43 -19
- package/src/gjc-runtime/tmux-sessions.ts +7 -1
- package/src/gjc-runtime/ultragoal-guard.ts +45 -2
- package/src/gjc-runtime/ultragoal-runtime.ts +121 -41
- package/src/gjc-runtime/workflow-command-ref.ts +1 -2
- package/src/gjc-runtime/workflow-manifest.ts +1 -2
- package/src/harness-control-plane/storage.ts +14 -4
- package/src/hooks/native-skill-hook.ts +38 -12
- package/src/hooks/skill-state.ts +178 -83
- package/src/internal-urls/docs-index.generated.ts +6 -4
- package/src/migrate/action-planner.ts +318 -0
- package/src/migrate/adapters/claude-code.ts +39 -0
- package/src/migrate/adapters/codex.ts +70 -0
- package/src/migrate/adapters/index.ts +277 -0
- package/src/migrate/adapters/opencode.ts +52 -0
- package/src/migrate/executor.ts +81 -0
- package/src/migrate/mcp-mapper.ts +152 -0
- package/src/migrate/report.ts +104 -0
- package/src/migrate/skill-normalizer.ts +80 -0
- package/src/migrate/types.ts +163 -0
- package/src/modes/bridge/bridge-mode.ts +2 -2
- package/src/modes/components/custom-editor.ts +30 -20
- package/src/modes/rpc/rpc-mode.ts +2 -2
- package/src/modes/shared/agent-wire/unattended-audit.ts +3 -2
- package/src/prompts/agents/init.md +1 -1
- package/src/prompts/system/plan-mode-active.md +1 -1
- package/src/prompts/tools/ast-grep.md +1 -1
- package/src/prompts/tools/search.md +1 -1
- package/src/prompts/tools/task.md +1 -2
- package/src/research-plan/index.ts +1 -0
- package/src/research-plan/ledger.ts +177 -0
- package/src/rlm/artifacts.ts +12 -3
- package/src/rlm/index.ts +7 -0
- package/src/runtime-mcp/config-writer.ts +46 -0
- package/src/session/agent-session.ts +15 -21
- package/src/setup/hermes-setup.ts +1 -1
- package/src/skill-state/active-state.ts +72 -108
- package/src/skill-state/canonical-skills.ts +4 -0
- package/src/skill-state/deep-interview-mutation-guard.ts +28 -109
- package/src/skill-state/workflow-hud.ts +4 -2
- package/src/skill-state/workflow-state-contract.ts +3 -3
- package/src/task/agents.ts +1 -22
- package/src/task/index.ts +1 -41
- package/src/task/spawn-gate.ts +1 -38
- package/src/task/types.ts +1 -1
- package/src/tools/ask.ts +34 -12
- package/src/tools/computer.ts +58 -4
- package/dist/types/extensibility/custom-commands/bundled/review/index.d.ts +0 -10
- package/src/extensibility/custom-commands/bundled/review/index.ts +0 -456
- package/src/prompts/agents/explore.md +0 -58
- package/src/prompts/agents/plan.md +0 -49
- package/src/prompts/agents/reviewer.md +0 -141
- package/src/prompts/agents/task.md +0 -16
- package/src/prompts/review-request.md +0 -70
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// Auto-generated by scripts/generate-docs-index.ts - DO NOT EDIT
|
|
2
2
|
|
|
3
|
-
export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.md","REBRANDING_PLAN_260525.md","ai-schema-normalize.md","auth-broker-gateway.md","bash-tool-runtime.md","blob-artifact-architecture.md","bot-integration.md","brand-assets.md","bridge.md","codebase-overview.md","compaction.md","composer-codex-parity.md","computer-use/README.md","environment-variables.md","external-control-readiness.md","fs-scan-cache-architecture.md","geobench.md","gjc-dogfood-skill-template.md","gjc-session-clawhip-routing.md","grok-build-provider-design.md","handoff-generation-pipeline.md","hermes-mcp-bridge.md","hotspot-map-successor.md","keybindings.md","lsp-config.md","memory.md","models.md","multi-vendor-profiles.md","native-ffi-optimization-policy.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","onboarding-packet.md","onboarding-receipt.md","ooo-bridge-extension-contract.md","openclaw-hermes-rpc-integration.md","perf-profiling-corpus.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","python-repl.md","render-mermaid.md","resolve-tool-runtime.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","theme.md","tools/ask.md","tools/ast-edit.md","tools/ast-grep.md","tools/bash.md","tools/browser.md","tools/calc.md","tools/checkpoint.md","tools/computer.md","tools/cron.md","tools/debug.md","tools/edit.md","tools/eval.md","tools/find.md","tools/github.md","tools/inspect_image.md","tools/irc.md","tools/job.md","tools/lsp.md","tools/monitor.md","tools/read.md","tools/recipe.md","tools/render_mermaid.md","tools/resolve.md","tools/rewind.md","tools/search.md","tools/search_tool_bm25.md","tools/ssh.md","tools/task.md","tools/todo_write.md","tools/web_search.md","tools/write.md","tree.md","ttsr-injection-lifecycle.md","tui-runtime-internals.md"];
|
|
3
|
+
export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.md","REBRANDING_PLAN_260525.md","ai-schema-normalize.md","auth-broker-gateway.md","bash-tool-runtime.md","blob-artifact-architecture.md","bot-integration.md","brand-assets.md","bridge.md","codebase-overview.md","compaction.md","composer-codex-parity.md","computer-use/README.md","environment-variables.md","external-control-readiness.md","fs-scan-cache-architecture.md","geobench.md","gjc-dogfood-skill-template.md","gjc-session-clawhip-routing.md","grok-build-provider-design.md","handoff-generation-pipeline.md","hermes-mcp-bridge.md","hotspot-map-successor.md","keybindings.md","lsp-config.md","memory.md","models.md","multi-vendor-profiles.md","native-ffi-optimization-policy.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","onboarding-packet.md","onboarding-receipt.md","ooo-bridge-extension-contract.md","openclaw-hermes-rpc-integration.md","perf-profiling-corpus.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","python-repl.md","render-mermaid.md","research-plan-ledger.md","resolve-tool-runtime.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","standalone-mcp.md","theme.md","tools/ask.md","tools/ast-edit.md","tools/ast-grep.md","tools/bash.md","tools/browser.md","tools/calc.md","tools/checkpoint.md","tools/computer.md","tools/cron.md","tools/debug.md","tools/edit.md","tools/eval.md","tools/find.md","tools/github.md","tools/inspect_image.md","tools/irc.md","tools/job.md","tools/lsp.md","tools/monitor.md","tools/read.md","tools/recipe.md","tools/render_mermaid.md","tools/resolve.md","tools/rewind.md","tools/search.md","tools/search_tool_bm25.md","tools/ssh.md","tools/task.md","tools/todo_write.md","tools/web_search.md","tools/write.md","tree.md","ttsr-injection-lifecycle.md","tui-runtime-internals.md"];
|
|
4
4
|
|
|
5
5
|
export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
6
6
|
"ERRATA-GPT5-HARMONY.md": "# ERRATA — GPT-5 Harmony-Header Leakage\n\n## 1. The problem\n\nOpenAI frames tool calls in the Harmony chat protocol:\n\n```\n<|start|>assistant<|channel|>commentary to=functions.<NAME><|message|>{ARGS}<|call|>\n```\n\n`<|channel|>commentary to=functions.NAME` is the **routing header** —\ncontrol tokens consumed by the runtime to dispatch the call. These\ntokens never appear as content under normal operation; the runtime\nstrips them.\n\nThe defect: gpt-5 models occasionally emit, **as ordinary content\ninside `{ARGS}`**, the **plain-text shadow** of these routing tokens —\nthe same characters without the `<|…|>` brackets — and continue\nproducing more pseudo-routing structure (channel name, body marker,\nmultilingual spam, fake tool-result framing). The contamination lives\ninside the visible tool argument and is dispatched to the tool as if it\nwere intended content.\n\n**Critical detail.** The actual `<|start|>` / `<|channel|>` /\n`<|message|>` / `<|call|>` special tokens almost never appear in tool\nargs. What leaks is the bracket-less spelling — `analysis to=functions.X\ncode …` — because OpenAI applies a logit mask suppressing the\ncontrol-token IDs inside the args region. The mass that would have gone\nto those special tokens redistributes onto the un-bracketed plain-text\nrepresentation the model also learned. This makes the leak structurally\ninvisible to the routing parser and lands it in the tool input verbatim.\n\nManifestation in tool args (real corpus example):\n\n```\n~ add_function(iso, ctx, ns, \"installSystemChangeObserver\",\n os_install_system_change_observer);】【\"】【analysis to=functions.edit\n code above เงินไทยฟรีuser to=functions.edit code …\n```\n\nThe leading code is real and intended. Everything after the first\nnon-Latin token through the next clean structural boundary is corruption.\n\n---\n\n## 2. Observed statistics & failure modes\n\nSource: `~/.gjc/stats.db` (`ss_tool_calls`, `ss_assistant_msgs`), through\n2026-05-10. 1.05M tool calls scanned.\n\n### 2.1 Rate\n\n| Model | Leaks in tool args | Calls | per million |\n|------------------|-------------------:|--------:|------------:|\n| gpt-5.4 | 37 | 226,957 | 163 |\n| gpt-5.3-openai-code | 17 | 112,243 | 151 |\n| gpt-5.5 | 2 | 80,750 | 25 |\n| gpt-5.2-openai-code | 0 | — | — |\n\nPlus 15 hits in assistant visible text / thinking blobs.\n\n### 2.2 Tool distribution\n\n| Tool | Hits |\n|---------------------|-----:|\n| `edit` | 38 |\n| `eval` | 11 |\n| `report_tool_issue` | 3 |\n| `grep`/`read`/`search`/`yield` | 1 each |\n\nConcentrated in tools with free-form (non-JSON-schema) argument formats.\n\n### 2.3 Leak shape (deterministic)\n\n```\nLEAK ::= JUNK_PREFIX MARKER CHANNEL_BODY (LEAK)?\nMARKER ::= \"to=functions.\" TOOL_NAME\nCHANNEL_BODY ::= \" code \" (SPAM | reasoning_prose | fake_tool_output)*\nJUNK_PREFIX ::= (GLITCH_TOKEN | CHANNEL_WORD | NON_LATIN_RUN | \"}\" | \"】【\")+\n```\n\n**Cascading is common.** Of 96 marker occurrences across 71 contaminated\nrecords, 39 contain ≥2 markers and 7 contain ≥3 — the model emits\nmultiple fake `to=functions.X code …` blocks back-to-back, often with\nfake `code_output\\nCell N:\\n…` framing between them. Once the\nplain-text scaffolding is in the residual stream, the prefix now *looks\nlike* a fresh tool envelope start, so the macro prior over continuations\nkeeps voting for more scaffolding. Self-amplifying.\n\n### 2.4 Glitch tokens\n\nSingle-token identifiers in `o200k_base` whose embeddings appear to be\nnear-init from underrepresentation in post-training. ASCII residue\nimmediately before the marker in the natural corpus:\n\n| Surface string | Single-token | Token ID | Hits in corpus |\n|-------------------|:-:|---------:|---:|\n| `Japgolly` | ✅ | 199,745 | 1 |\n| `Jsii` | ✅ | 114,318 | (subtoken of `Jsii_commentary`) |\n| `Jsii_commentary` | — (3 toks) | — | 2 |\n| `changedFiles` | — (2 toks) | — | 8 |\n| `RTLU` | — (2 toks) | — | 3 |\n\n`Japgolly` is in the last 0.13% of the vocabulary — the same family of\nGitHub-corpus residue that produced `SolidGoldMagikarp` in the 2023\nGPT-2 vocabulary (Rumbelow & Watkins). `SolidGoldMagikarp` itself\ntokenizes to 5 tokens in `o200k_base` — that specific token was retired,\nbut the class wasn't.\n\nFor the multi-token entries, the corpus-level signature is the surface\nstring; the underlying glitch trigger is a sub-token (e.g. `Jsii` inside\n`Jsii_commentary`). The detector list (`G` signal) keys on the surface\nstrings.\n\nStable across unrelated sessions. Treated as a high-precision detector\nsignal.\n\n### 2.5 Channel-word leakage\n\n`analysis` (5), `assistant` (5), `commentary` (3), `user` (1) appear\ndirectly preceding `to=`. Always bare words; never `<|channel|>analysis`\nor any other bracketed form. Consistent with §1 — the brackets are\nmasked, the words are not.\n\n### 2.6 Non-Latin spam residue\n\n96 marker hits, by script: CJK 40, Cyrillic 12, Telugu/Kannada/Malayalam\n18, Thai 8, Georgian 7, Armenian 7, Arabic 1. Recurring fragments are\nChinese gambling SEO (`大发时时彩`, `天天中彩票`), Georgian/Abkhaz junk,\nand Thai casino spam — well-known low-quality crawl residue.\n\nThis is the same script distribution observed in the controlled\nreproduction (§7.3), independent of the prompt's natural language.\n\n### 2.7 Failure-mode breakdown for the `edit` tool\n\nThe `edit` tool exists in two variants in the corpus:\n\n| Variant | Calls | Recovery |\n|--------------------------|------:|----------|\n| Patch-DSL (`§PATH`/anchor/`«»≔` ops) | 27 | **Recoverable** by op-truncation (§3.3) |\n| JSON-schema (`{path,edits:[…]}`) | 11 | **Not recoverable** — contamination is escaped *inside* JSON strings, parser accepts it cleanly, content would be written verbatim into source files |\n\nFor Patch-DSL leaks specifically:\n\n- 20/27 cases: contamination on the last input line; nothing follows.\n- 7/27 cases: contamination mid-input; what follows is one of: a\n duplicate replay of an earlier file/anchor, intended content for a\n *different* tool call (the model started its next call inline), or\n pure hallucination. Post-contamination content is never trustworthy.\n\n### 2.8 Mechanism (confirmed)\n\n**Prior collapse from null-embedding glitch tokens, into a\ncontrol-token-masked basin whose mass redistributes onto the\nplain-text shadow of the Harmony protocol.**\n\nStep by step:\n\n1. The model is mid-`{ARGS}` of a Harmony tool call. The runtime applies\n a logit mask suppressing structural control tokens (`<|channel|>`,\n `<|message|>`, `<|call|>`, `<|start|>`, `<|end|>`) inside the args\n region. Without this mask, normal generation would constantly\n hallucinate envelope-closes; with it, those token IDs have logit\n `-∞` in args.\n2. A glitch token `g` is sampled. By construction `g` was in the BPE\n merge corpus but barely in LM/RL training, so its **input embedding\n `e_g` ≈ near-init noise of small norm**.\n3. At position t+1, the residual update `h_{t+1} ≈ LN(h_t + e_g + Attn +\n MLP)` is dominated by the prefix-derived terms; the just-emitted-token\n signal is effectively absent. Generation diversity normally comes\n from `e_x` steering the residual into different sub-regions —\n stripped here.\n4. The next-token distribution therefore collapses onto the **conditional\n prior over continuations of the prefix, with local conditioning\n removed**. In a tool-calling rollout context, that prior is sharply\n peaked on Harmony scaffolding (control tokens + routing tokens) —\n that's what RL trained.\n5. The mask zeros the control-token IDs. Mass redistributes onto the\n **next-best continuation**: the un-bracketed surface-form spelling of\n the same protocol (`analysis`, `commentary`, ` to=functions.X`,\n ` code `). This spelling is unmasked because those characters are\n ordinary tokens.\n6. Once a few tokens of plain-text scaffolding land in the residual\n stream, the prefix now resembles a fresh envelope start. The macro\n prior keeps voting for more scaffolding. Cascading (§2.3) follows.\n7. Multilingual spam after the marker is the same prior-collapse\n continuation, drawn from the training neighborhood of the glitch\n token (often ESL/auto-generated multilingual web junk — exactly the\n crawl residue in §2.6).\n\n**Two corollaries the corpus data demanded but only the experiment\nexplained:**\n\n- **The brackets never appear** (§1, §2.5). The mask is what makes the\n leak land in plain text instead of as a real envelope-close.\n- **Counterintuitive grammar dependency** (§7.4). The leak is *worse* in\n formats closest to OpenAI's training distribution. Off-distribution\n custom grammars dampen the macro-prior basin; the official\n `*** Begin Patch` format is the strongest collapse target.\n\nThe 2023 SolidGoldMagikarp paper documented mechanism (1)+(2)+(4). The\nnew piece is (5): when constrained decoding masks the natural collapse\ntarget, the mass laundered through the un-masked plain-text shadow\nbecomes a structurally-invisible exfiltration channel.",
|
|
@@ -17,7 +17,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
17
17
|
"composer-codex-parity.md": "# Composer 2.5 Fast parity repro\n\nThis document records the one-command repros for the Composer 2.5 Fast stability work. Scope is GJC-local only: no OpenClaw reference, no Cursor live e2e, no upstream xAI/server change, and no Codex refactor. Codex is the baseline/report model only.\n\n## Focused discipline regression\n\n```sh\nbun test packages/ai/test/composer-discipline.test.ts\n```\n\nExpected contract:\n\n- `grok-build/grok-composer-2.5-fast` and other composer ids receive `COMPOSER_EDIT_DISCIPLINE_PROMPT` ahead of host/default system prompts on the `openai-completions`, `openai-responses`, and Cursor RPC prompt paths.\n- Non-composer models keep their system prompt payload unchanged.\n- The prompt explicitly covers adversarial shell file discovery, shell file reads, out-of-band shell writes, fabricated/stale anchors, malformed tool arguments, and contaminated bash command strings.\n\n## V3 mock P1 gate\n\n```sh\nbun packages/agent/bench/composer-stability-v3.ts --mock --seed 42 -n 5 --model grok-build/grok-composer-2.5-fast --baseline-model openai-codex/gpt-5.5:low\n```\n\nEquivalent package script:\n\n```sh\nbun run bench:composer-stability-v3\n```\n\nP1 passes when `candidateFailureCount <= baselineFailureCount` over the same deterministic scenario matrix. Mock mode is a smoke gate, not live parity proof.\n\n## V3 trace-backed gate\n\n```sh\nbun packages/agent/bench/composer-stability-v3.ts --trace --trace-file packages/agent/test/fixtures/composer-stability-v3/traces/parity.json\n```\n\nEquivalent package script:\n\n```sh\nbun run bench:composer-stability-v3:trace\n```\n\nTrace files can be JSON, JSON arrays, JSON `{ \"records\": [...] }`, or JSONL. Each record declares `scenarioId`, `modelRole` (`candidate` or `baseline`), `model`, `trial`, optional `expected`, and `events`. The classifier maps recorded tool behavior to failure classes:\n\n- `shell-read`\n- `shell-file-discovery`\n- `shell-write`\n- `contaminated-command`\n- `bad-anchor-unrecovered`\n- `malformed-tool-args-unrecovered`\n- `sanitize-replay-regression`\n- `wrong-file-edit`\n- `missing-tool-turn`\n- `timeout`\n\nTrace P1 is applicable only when both candidate and baseline records exist, and it can pass only with at least three comparable candidate/baseline scenario ids so a one-scenario smoke cannot fake parity. It reports `candidateFailureCount`, `baselineFailureCount`, `parityDelta`, per-scenario counts, and the trace artifact paths that were scored.\n\n## Optional live smoke\n\n```sh\nbun packages/agent/bench/composer-stability-v3.ts --live -n 3 --model grok-build/grok-composer-2.5-fast --baseline-model openai-codex/gpt-5.5:low\n```\n\nLive smoke is informational. Without `GROK_CLI_OAUTH_TOKEN` and Codex/OpenAI credentials, or without trace artifacts from a real capture, `--live` exits successfully with an explicit skip record and `p1.applicable=false`; it does not fake a P1 pass. Pass `--live --trace-dir <captured-traces>` to score real captured runs through the same trace classifier. Cursor live e2e is intentionally out of scope.\n\n## Broader local verification\n\n```sh\nbun test packages/agent/test/composer-stability-v3.test.ts packages/coding-agent/test/grok-cli-sanitize.test.ts packages/coding-agent/test/grok-build-stream.test.ts\nbun test packages/agent packages/ai\nbun scripts/verify-g002-gates.ts\n```\n\nUse `mise x bun@1.3.14 -- <command>` when `bun` is not on `PATH`.\n",
|
|
18
18
|
"computer-use/README.md": "# Native computer-use tool\n\nStatus: **in progress (draft)** — coordinate contract + native `screenshot`\ncapture landed and verified; input primitives, kill-switch, and napi/TS surface\nto follow.\n\nA new, model-agnostic `computer` tool that lets any model drive the user's real\nmacOS desktop via the OpenAI computer-use action set. Built fresh (the\nopen-source `openai/codex` repo has no GUI computer-use source to copy; only the\npublic action *schema* is mirrored).\n\nThis feature was scoped through GJC's deep-interview (requirements) and ralplan\n(Planner/Architect/Critic consensus) workflows. The full deep-interview spec and\nthe consensus plan + ADR are the authoritative source of truth; this document is\nthe committed summary and roadmap.\n\n## Locked decisions (ADR summary)\n\n- **Target:** the user's real macOS desktop, OS-native control. v1 is macOS-only\n (Linux/Windows deferred behind the same tool schema).\n- **Driver:** any model via a generic structured tool-call interface — no\n provider-specific computer-use API.\n- **Action set:** the exact OpenAI computer-use primitives — `screenshot`,\n `click`, `double_click`, `move`, `drag`, `scroll`, `type`, `keypress`, `wait`.\n- **Implementation:** built fresh in the Rust `pi-natives` crate (napi),\n exposed through `packages/natives` to a new\n `packages/coding-agent/src/tools/computer.ts`, kept deliberately lower-level\n than the existing `browser` tool (coordinate/input primitives only, no web\n semantics).\n- **Coordinate contract:** a single normalized virtual display. The returned\n screenshot's pixel dimensions *are* the action coordinate space; Rust owns the\n transform to macOS logical points (Retina/HiDPI-safe) and display selection.\n- **Permissions:** macOS TCC (Accessibility + Screen Recording) auto-preflighted;\n on a missing grant, open the relevant Settings pane and return a clear\n \"grant then retry/relaunch\" error.\n- **Gating:** off by default; opt-in config flag (per session) plus a persistent\n always-on option.\n- **Safety:** no per-action approval (autonomous), **but** a daemon-enforced\n global kill-switch outside model control (global hotkey OR TUI stop key) that\n aborts queued actions, releases held keys/buttons, suspends further input, and\n snapshots the last screen. Reset is user-only, never via the model-facing tool.\n- **Architecture:** every primitive delegates to one central Rust\n `execute_action` state machine (preflight, validation, cancellation, audit,\n screenshot policy, release-all) so per-primitive methods cannot drift past the\n safety contract. The in-process supervisor sits behind a `SupervisorClient`\n boundary so an out-of-process daemon can replace it later without changing the\n napi surface.\n\n## Capture + coordinate contract (shipped)\n\n`crates/pi-natives/src/computer/coords.rs` implements the pure, framework-free\ncore: `NormalizedDisplay` maps a screenshot-space pixel `(x, y)` to a macOS\nlogical point via per-axis scale and the display's logical origin, rejecting\nout-of-bounds and non-finite inputs. It is unit-tested (scale 1.0/2.0,\nfractional and anisotropic scale, non-zero origins, edges, out-of-bounds,\ninvalid scale) and requires no display or granted permissions.\n\n`crates/pi-natives/src/computer/capture.rs` (macOS) implements the read-only\n`screenshot` primitive: it captures the primary display via CoreGraphics into a\nPNG and derives the `NormalizedDisplay` scale from captured physical pixels vs\nlogical bounds, surfacing a missing Screen Recording grant as\n`CaptureError::CaptureFailed` (never a silent black frame). Verified live: a\nreal, non-uniform primary-display capture decodes as a PNG with matching\ndimensions (`cargo test -p pi-natives --ignored captures_non_uniform_primary_display`).\n\n## Delivery roadmap\n\nDelivery ships a `screenshot`+`click`+`type` vertical slice first; the remaining\nsix primitives fast-follow; v1 acceptance = all nine primitives drive a real\nmacOS app end-to-end plus a kill-switch drill (per-primitive napi unit tests +\nmanual macOS E2E).\n\n| Slice | Scope | Status |\n|-------|-------|--------|\n| Coordinate contract + planning docs | `coords` module + unit tests + this doc | **done (this PR)** |\n| Native screen capture (`screenshot`) | `capture` module, primary display, PNG + scale | **done (this PR, verified live)** |\n| TCC preflight (`permissions`) | Accessibility + Screen Recording checks, Settings openers, fail-closed guards | **done (this PR, verified live)** |\n| napi screenshot binding (`computerScreenshot`) | napi → `packages/natives` → TS, verified live | **done (this PR)** |\n| Native input orchestration (`input`) | `InputController` click/double_click/move/drag/scroll/type/keypress + release_all over an `EventSink` | **done (this PR)** — logic unit-tested; **live cursor-move injection verified** (Accessibility granted) |\n| Central `execute_action` state machine | preflight + supervisor + cancellation + audit + release-all | planned |\n| Kill-switch supervisor + global-hotkey event-tap | `supervisor` (fail-closed `input_allowed`, user-only reset) + `hotkey` CGEventTap on a CFRunLoop thread | **done (this PR)** — supervisor unit-tested; **synthetic-hotkey latch verified live** |\n| Supervisor-gated `execute_action` + napi/TS `computer` tool | wire input through `input_allowed` + cancellation; `ComputerController` napi; `computer.ts` schema/gating/prompt/renderer | next |\n| Manual macOS E2E acceptance | TextEdit all-nine + kill-switch drill | planned (requires macOS hardware + granted TCC + human operator) |\n\nThe remaining input backend, kill-switch, napi/TS surface, and manual\nend-to-end acceptance still require injecting events into a live desktop and a\nhuman-operated drill, so they are tracked as follow-up work rather than landed\nin this draft.\n",
|
|
19
19
|
"environment-variables.md": "# Environment Variables (Current Runtime Reference)\n\nThis reference is derived from current code paths in:\n\n- `packages/coding-agent/src/**`\n- `packages/ai/src/**` (provider/auth resolution used by coding-agent)\n- `packages/utils/src/**` and `packages/tui/src/**` where those vars directly affect coding-agent runtime\n\nIt documents only active behavior.\n\n## Resolution model and precedence\n\nMost runtime lookups use `$env` from `@gajae-code/utils` (`packages/utils/src/env.ts`).\n\n`$env` loading order:\n\n1. Existing process environment (`Bun.env`)\n2. Project `.env` (`$PWD/.env`) for keys not already set\n3. Agent `.env` (`~/.gjc/agent/.env`, respecting `GJC_CONFIG_DIR` / `GJC_CODING_AGENT_DIR`) for keys not already set\n4. Config-root `.env` (`~/.gjc/.env`, respecting `GJC_CONFIG_DIR`) for keys not already set\n5. Home `.env` (`~/.env`) for keys not already set\n\nAdditional rule inside each `.env` file: `GJC_*` keys are mirrored to `GJC_*` keys in that parsed file.\n\n---\n\n## 1) Model/provider authentication\n\nThese are consumed via `getEnvApiKey()` (`packages/ai/src/stream.ts`) unless noted otherwise.\n\n### Core provider credentials\n\n| Variable | Used for | Required when | Notes / precedence |\n| ------------------------------- | ------------------------------------------------ | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_OAUTH_TOKEN` | Anthropic API auth | Using Anthropic with OAuth token auth | Takes precedence over `ANTHROPIC_API_KEY` for provider auth resolution |\n| `ANTHROPIC_API_KEY` | Anthropic API auth | Using Anthropic without OAuth token | Fallback after `ANTHROPIC_OAUTH_TOKEN` |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Anthropic via Azure Foundry / enterprise gateway | `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` enabled | Takes precedence over `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY` when Foundry mode is enabled |\n| `OPENAI_API_KEY` | OpenAI auth | Using OpenAI-family providers without explicit apiKey argument | Used by OpenAI Completions/Responses providers |\n| `GEMINI_API_KEY` | Google Gemini auth | Using `google` provider models | Primary key for Gemini provider mapping |\n| `GOOGLE_API_KEY` | Gemini image tool auth fallback | Using `gemini_image` tool without `GEMINI_API_KEY` | Used by coding-agent image tool fallback path |\n| `GROQ_API_KEY` | Groq auth | Using Groq models | |\n| `CEREBRAS_API_KEY` | Cerebras auth | Using Cerebras models | |\n| `FIREWORKS_API_KEY` | Fireworks auth | Using Fireworks models | |\n| `TOGETHER_API_KEY` | Together auth | Using `together` provider | |\n| `HUGGINGFACE_HUB_TOKEN` | Hugging Face auth | Using `huggingface` provider | Primary Hugging Face token env var |\n| `HF_TOKEN` | Hugging Face auth | Using `huggingface` provider | Fallback when `HUGGINGFACE_HUB_TOKEN` is unset |\n| `SYNTHETIC_API_KEY` | Synthetic auth | Using Synthetic models | |\n| `NVIDIA_API_KEY` | NVIDIA auth | Using `nvidia` provider | |\n| `NANO_GPT_API_KEY` | NanoGPT auth | Using `nanogpt` provider | |\n| `VENICE_API_KEY` | Venice auth | Using `venice` provider | |\n| `LITELLM_API_KEY` | LiteLLM auth | Using `litellm` provider | OpenAI-compatible LiteLLM proxy key |\n| `LM_STUDIO_API_KEY` | LM Studio auth (optional) | Using `lm-studio` provider with authenticated hosts | Local LM Studio usually runs without auth; any non-empty token works when a key is required |\n| `OLLAMA_API_KEY` | Ollama auth (optional) | Using `ollama` provider with authenticated hosts | Local Ollama usually runs without auth; any non-empty token works when a key is required |\n| `LLAMA_CPP_API_KEY` | llama.cpp auth (optional) | Using `llama.cpp` provider with authenticated hosts | Local llama.cpp usually runs without auth; any non-empty token works when a key is configured |\n| `XIAOMI_API_KEY` | Xiaomi MiMo auth | Using `xiaomi` provider | |\n| `MOONSHOT_API_KEY` | Moonshot auth | Using `moonshot` provider | |\n| `XAI_API_KEY` | xAI auth | Using xAI models | |\n| `OPENROUTER_API_KEY` | OpenRouter auth | Using OpenRouter models | Also used by image tool when preferred/auto provider is OpenRouter |\n| `MISTRAL_API_KEY` | Mistral auth | Using Mistral models | |\n| `ZAI_API_KEY` | z.ai auth | Using z.ai models | Also used by z.ai web search provider |\n| `MINIMAX_API_KEY` | MiniMax auth | Using `minimax` provider | |\n| `AZURE_OPENAI_API_KEY` | Azure OpenAI auth | Using `azure-openai` / `azure-openai-responses` models | Pair with `AZURE_OPENAI_BASE_URL` or `AZURE_OPENAI_RESOURCE_NAME` |\n| `MINIMAX_CODE_API_KEY` | MiniMax Code auth | Using `minimax-code` provider | |\n| `MINIMAX_CODE_CN_API_KEY` | MiniMax Code CN auth | Using `minimax-code-cn` provider | |\n| `OPENCODE_API_KEY` | OpenCode auth | Using `opencode-go` / `opencode-zen` models | |\n| `QIANFAN_API_KEY` | Qianfan auth | Using `qianfan` provider | |\n| `QWEN_OAUTH_TOKEN` | Qwen Portal auth | Using `qwen-portal` with OAuth token | Takes precedence over `QWEN_PORTAL_API_KEY` |\n| `QWEN_PORTAL_API_KEY` | Qwen Portal auth | Using `qwen-portal` with API key | Fallback after `QWEN_OAUTH_TOKEN` |\n| `ZENMUX_API_KEY` | ZenMux auth | Using `zenmux` provider | Used for ZenMux OpenAI and Anthropic-compatible routes |\n| `VLLM_API_KEY` | vLLM auth/discovery opt-in | Using `vllm` provider (local OpenAI-compatible servers) | Any non-empty value works for no-auth local servers |\n| `CURSOR_ACCESS_TOKEN` | Cursor provider auth | Using Cursor provider | |\n| `AI_GATEWAY_API_KEY` | Vercel AI Gateway auth | Using `vercel-ai-gateway` provider | |\n| `CLOUDFLARE_AI_GATEWAY_API_KEY` | Cloudflare AI Gateway auth | Using `cloudflare-ai-gateway` provider | Base URL must be configured as `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic` |\n| `ALIBABA_CODING_PLAN_API_KEY` | Alibaba Coding Plan auth | Using `alibaba-coding-plan` provider | |\n| `DEEPSEEK_API_KEY` | DeepSeek auth | Using DeepSeek models | |\n| `KILO_API_KEY` | Kilo auth | Using Kilo models | |\n| `OLLAMA_CLOUD_API_KEY` | Ollama Cloud auth | Using `ollama-cloud` provider | |\n| `GITLAB_TOKEN` | GitLab Duo auth | Using `gitlab-duo` provider | |\n\n### GitHub/Copilot token chains\n\n| Variable | Used for | Chain |\n| ---------------------- | ------------------------------------------------ | ---------------------------------------------------- |\n| `COPILOT_GITHUB_TOKEN` | GitHub Copilot provider auth | `COPILOT_GITHUB_TOKEN` → `GH_TOKEN` → `GITHUB_TOKEN` |\n| `GH_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: `GITHUB_TOKEN` → `GH_TOKEN` |\n| `GITHUB_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: checked before `GH_TOKEN` |\n\n### Auth broker / auth gateway (remote credential vault)\n\nWhen the broker is enabled, the local SQLite credential store is bypassed and all OAuth refresh / access tokens live on the broker host. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full protocol, CLI surface, and 5-min/15-s usage cache layering.\n\n| Variable | Used for | Required when | Notes / precedence |\n| ----------------------- | ------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `GJC_AUTH_BROKER_URL` | Base URL of the remote auth-broker (e.g. `https://broker.tailnet:8765`); selects broker mode | Resolving credentials through a broker; also required by `gjc auth-gateway serve` (the gateway is itself a broker client) | Wins over `auth.broker.url` in `config.yml`. When set with no resolvable token, `resolveAuthBrokerConfig()` hard-errors instead of falling back to local SQLite. |\n| `GJC_AUTH_BROKER_TOKEN` | Bearer token sent on every broker endpoint except `/v1/healthz` | `GJC_AUTH_BROKER_URL` is set and no token is available from `auth.broker.token` or `<config-dir>/auth-broker.token` | Resolution: this env → `auth.broker.token` (`$ENV_NAME` indirection supported) → `<config-dir>/auth-broker.token` (mode `0600`). `<config-dir>` is `~/.gjc/` (respecting `GJC_CONFIG_DIR`). |\n\nThe gateway has no dedicated env vars — it inherits `GJC_AUTH_BROKER_*`. Its own inbound bearer token lives at `<config-dir>/auth-gateway.token` and is managed via `gjc auth-gateway token`.\n\n### Multi-account credential ranking\n\nWhen more than one OAuth credential is stored for the same provider (e.g. several Anthropic accounts), `AuthStorage` ranks them at session start to pick which one serves the session. This env var selects the ranking strategy; it is fully opt-in and does not change the default.\n\n| Variable | Used for | Required when | Notes / precedence |\n| ----------------------------- | ------------------------------------------------- | -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `GJC_CREDENTIAL_RANKING_MODE` | Multi-account OAuth credential selection strategy | Never (opt-in) | `balanced` (default) prefers the least-drained account (spreads load, keeps burst headroom). `earliest-reset` prefers the soonest-to-reset non-blocked account (earliest-expiry-first) so perishable tumbling-window quota (e.g. Claude 5h/7d) is drained before reset. Unset/unknown → `balanced`. Only affects session-start ranking; blocked/exhausted accounts still sort last. |\n\n---\n\n## 2) Provider-specific runtime configuration\n\n### Anthropic Foundry Gateway (Azure / enterprise proxy)\n\nWhen `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` is enabled, Anthropic requests switch to Foundry mode:\n\n- Base URL resolves from `FOUNDRY_BASE_URL` (fallback remains model/default base URL if unset).\n- API key resolution for provider `anthropic` becomes:\n `ANTHROPIC_FOUNDRY_API_KEY` → `ANTHROPIC_OAUTH_TOKEN` → `ANTHROPIC_API_KEY`.\n- `ANTHROPIC_CUSTOM_HEADERS` is parsed as comma/newline-separated `key: value` pairs and merged into request headers.\n- TLS client/server material can be injected from env values:\n `NODE_EXTRA_CA_CERTS`, `ANTHROPIC_MODEL_CODE_CLIENT_CERT`, `ANTHROPIC_MODEL_CODE_CLIENT_KEY`.\n Each accepts either:\n - a filesystem path to PEM content, or\n - inline PEM (including escaped `\\n` sequences).\n\n| Variable | Value type | Behavior |\n| --------------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------- |\n| `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` | Boolean-like string (`1`, `true`, `yes`, `on`) | Enables Foundry mode for Anthropic provider |\n| `FOUNDRY_BASE_URL` | URL string | Anthropic endpoint base URL in Foundry mode |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Token string | Used for `Authorization: Bearer <token>` |\n| `ANTHROPIC_CUSTOM_HEADERS` | Header list string | Extra headers; format `header-a: value, header-b: value` or newline-separated |\n| `NODE_EXTRA_CA_CERTS` | PEM path or inline PEM | Extra CA chain for server certificate validation |\n| `ANTHROPIC_MODEL_CODE_CLIENT_CERT` | PEM path or inline PEM | mTLS client certificate |\n| `ANTHROPIC_MODEL_CODE_CLIENT_KEY` | PEM path or inline PEM | mTLS client private key (must be paired with cert) |\n\n### Amazon Bedrock\n\n| Variable | Default / behavior |\n| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- |\n| `AWS_REGION` | Primary region source |\n| `AWS_DEFAULT_REGION` | Fallback if `AWS_REGION` unset |\n| `AWS_PROFILE` | Enables named profile auth path |\n| `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | Enables IAM key auth path |\n| `AWS_BEARER_TOKEN_BEDROCK` | Enables bearer token auth path |\n| `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` / `AWS_CONTAINER_CREDENTIALS_FULL_URI` | Enables ECS task credential path |\n| `AWS_WEB_IDENTITY_TOKEN_FILE` + `AWS_ROLE_ARN` | Enables web identity auth path |\n| `AWS_BEDROCK_SKIP_AUTH` | If `1`, injects dummy credentials (proxy/non-auth scenarios) |\n| `AWS_BEDROCK_FORCE_HTTP1` | If `1`, forces Node HTTP/1 request handler |\n| `HTTPS_PROXY` / `HTTP_PROXY` / `ALL_PROXY` | Routes Bedrock runtime and AWS SSO credential calls through the configured proxy using HTTP/1 |\n| `NO_PROXY` | Excludes matching hosts from proxy routing when a proxy variable is configured |\n\nRegion fallback in provider code: `options.region` → `AWS_REGION` → `AWS_DEFAULT_REGION` → `us-east-1`.\n\nCredential fallback order is static env (`AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` plus optional `AWS_SESSION_TOKEN`), named profile / SSO / `credential_process`, then EC2 IMDSv2. `models.yml` Bedrock entries use `api: bedrock-converse-stream` and do not require `apiKey` or `apiKeyEnv` because the provider signs requests from this AWS chain.\n\n### Azure OpenAI Responses\n\n| Variable | Default / behavior |\n| ---------------------------------- | --------------------------------------------------------------------------- |\n| `AZURE_OPENAI_API_KEY` | Required unless API key passed as option |\n| `AZURE_OPENAI_API_VERSION` | Default `v1` |\n| `AZURE_OPENAI_BASE_URL` | Direct base URL override |\n| `AZURE_OPENAI_RESOURCE_NAME` | Used to construct base URL: `https://<resource>.openai.azure.com/openai/v1` |\n| `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` | Optional mapping string: `modelId=deploymentName,model2=deployment2` |\n\nBase URL resolution: option `azureBaseUrl` → env `AZURE_OPENAI_BASE_URL` → option/env resource name → `model.baseUrl`.\n\n### Model provider base URL overrides\n\nBuilt-in model provider base URLs resolve with this precedence:\n\n1. `models.yml` / model config provider `baseUrl`\n2. provider-specific base URL environment variable\n3. bundled provider default\n\nSupported aliases:\n\n| Provider | Variables |\n| --- | --- |\n| OpenAI | `OPENAI_BASE_URL` |\n| Anthropic | `ANTHROPIC_BASE_URL` |\n| Google Gemini | `GOOGLE_BASE_URL`, `GEMINI_BASE_URL` |\n| Google Antigravity | `GOOGLE_ANTIGRAVITY_BASE_URL`, then `GOOGLE_BASE_URL`, then `GEMINI_BASE_URL` |\n| Google Gemini CLI | `GOOGLE_GEMINI_CLI_BASE_URL`, then `GOOGLE_BASE_URL`, then `GEMINI_BASE_URL` |\n| Google Vertex | `GOOGLE_VERTEX_BASE_URL`, then `GOOGLE_BASE_URL`, then `GEMINI_BASE_URL` |\n| Any provider id | derived `<PROVIDER_ID>_BASE_URL`, uppercased with non-alphanumerics converted to `_` (for example `my-proxy` → `MY_PROXY_BASE_URL`) |\n\nOpenAI-compatible proxy note: the built-in `openai` provider keeps its bundled API transport (`openai-responses`). Setting `OPENAI_BASE_URL` changes the host but still calls `<baseUrl>/responses`. If your proxy only supports Chat Completions, configure a custom `models.yml` provider with `api: openai-completions` instead of using the built-in OpenAI provider override:\n\n```yaml\nproviders:\n openai-compatible:\n baseUrl: https://proxy.example.com/v1\n apiKey: OPENAI_API_KEY\n api: openai-completions\n models:\n - id: gpt-4o\n name: GPT-4o via proxy\n api: openai-completions\n```\n\nFor OpenRouter traffic, GJC explicitly sends `User-Agent: Gajae-Code/<package version>` plus OpenRouter attribution headers. For the built-in OpenAI Responses transport and generic OpenAI-compatible Chat Completions transport, GJC passes model/provider headers through the OpenAI JavaScript SDK and does not set a GJC user-agent unless the provider-specific code adds one.\n\n### OpenAI-compatible proxy provider config\n\nFor OpenAI-compatible proxies that only implement Chat Completions, prefer a custom `models.yml` provider over `OPENAI_BASE_URL`:\n\n```yaml\nproviders:\n openai-compatible:\n baseUrl: https://proxy.example.com/v1\n apiKeyEnv: OPENAI_API_KEY\n api: openai-completions\n auth: apiKey\n headers:\n User-Agent: curl/8.7.1\n models:\n - id: gpt-4o\n name: GPT-4o via proxy\n reasoning: false\n input: [text]\n cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }\n```\n\n`models.yml` is strict: unsupported provider/model keys fail validation before the provider request is dispatched.\n\n### GJC workflow bridge commands\n\n`gjc ralplan`, `gjc deep-interview`, and `gjc state` are private runtime bridge commands. They require `GJC_RUNTIME_BINARY` (or legacy `GJC_LEGACY_RUNTIME_BINARY`) to point at the private runtime executable; public bundled workflow use remains through `/skill:ralplan` and `/skill:deep-interview` inside a GJC session.\n\n| Variable | Behavior |\n| --- | --- |\n| `GJC_RUNTIME_BINARY` | Private runtime bridge binary for `gjc ralplan`, `gjc deep-interview`, and `gjc state` |\n| `GJC_LEGACY_RUNTIME_BINARY` | Legacy fallback bridge binary name |\n\n### Interactive `--tmux` startup and scroll/mouse profile\n\n`gjc --tmux` launches the interactive TUI inside a fresh GJC-managed tmux session. When GJC creates that session it applies a profile that is **scoped to the GJC session only** (it never runs `set -g` / global tmux options), including:\n\n- `mouse on` — enables mouse-wheel scrolling into tmux copy-mode (history/scrollback).\n- `set-clipboard on` and a readable copy-mode `mode-style`.\n- GJC ownership/identity tags (`@gjc-profile`, branch/project markers).\n\nThis profile is applied on macOS, Linux, and WSL (Linux) alike; only native Windows (`win32`) skips the tmux launch. It is applied **only to sessions GJC itself creates**. If you start tmux yourself and then run `gjc` inside it, GJC leaves your tmux configuration untouched — add `set -g mouse on` to your own `~/.tmux.conf`, or relaunch with `gjc --tmux` to get the managed profile.\n\n| Variable | Behavior |\n| --- | --- |\n| `GJC_LAUNCH_POLICY` | Launch policy for `--tmux` startup: `tmux` (default) or `direct` (skip the tmux session) |\n| `GJC_TMUX_SESSION` | Explicit tmux session name override for `--tmux` startup |\n| `GJC_TMUX_COMMAND` | tmux binary/command override for every GJC tmux flow (`GJC_TEAM_TMUX_COMMAND` is honored as a team-path alias) |\n| `GJC_TMUX_PROFILE` | Set `0`/`false`/`off` to apply only the required ownership tags and skip the scroll/mouse/clipboard profile |\n| `GJC_MOUSE` | Set `0`/`false`/`off` to skip `mouse on`, leaving wheel scrolling to the host terminal instead of tmux copy-mode |\n\n#### WSL / Windows Terminal scrolling\n\nOn WSL with Windows Terminal, scrolling behaves differently depending on whether tmux owns the mouse:\n\n- **With the GJC profile (default):** the mouse wheel enters tmux copy-mode and scrolls the pane's scrollback. Keyboard fallback: `Ctrl-b [` to enter copy-mode, then `PgUp`/arrows; `q` to exit.\n- **Without tmux mouse capture (`GJC_MOUSE=off`, or running outside `gjc --tmux`):** Windows Terminal handles the wheel and scrolls its own native scrollback.\n\nIf the wheel does not scroll inside `gjc --tmux` on WSL, confirm the session is GJC-managed (`gjc session list`) so the `mouse on` profile is actually applied; sessions you launched yourself do not receive it. Set `GJC_MOUSE=off` if you prefer Windows Terminal's native scrollback over tmux copy-mode.\n\n### Team tmux backend, dry-run, and state paths\n\n`gjc team ...` starts tmux worker panes from the current tmux-backed leader session. Start that leader with `gjc --tmux` first; `gjc team` intentionally does not create or attach the leader session itself.\n\n`gjc team ... --dry-run --json` creates the same machine-readable state tree as a team launch without starting tmux panes. By default that state is written under `<cwd>/.gjc/state/team/<team>/`; treat it as ephemeral smoke-test/review state. Do not commit generated `.gjc/state/team` contents. Remove the generated team directory after a dry-run when the harness no longer needs it.\n\n| Variable | Behavior |\n| --- | --- |\n| `GJC_TEAM_STATE_ROOT` | Overrides the team state root (default `<cwd>/.gjc/state/team`) |\n| `GJC_TEAM_TMUX_COMMAND` | tmux binary/command override for team launch |\n| `GJC_TEAM_WORKER_COMMAND` | Worker GJC command override |\n| `GJC_TEAM_WORKER_CLI` | Team worker CLI selector; accepted values are `auto` or `gjc` |\n| `GJC_TEAM_WORKER_CLI_MAP` | Comma-separated worker CLI selector map; entries must be `auto` or `gjc` |\n\n### Hermes MCP bridge\n\n`gjc mcp-serve coordinator` exposes a GJC-native outward MCP bridge for Hermes-style coordinators. `gjc mcp-serve hermes` is a compatibility alias for the same bridge. The bridge is read-only by default and fails closed until roots and mutation classes are explicitly configured.\n\nCoordinator MCP currently exposes durable polling/await tools, not push subscriptions. Consume `gjc_coordinator_read_coordination_status`, `gjc_coordinator_read_turn`, or bounded `gjc_coordinator_await_turn` for state changes.\n\n| Variable | Behavior |\n| --- | --- |\n| `GJC_COORDINATOR_MCP_WORKDIR_ROOTS` | Required allowlist for workdir and artifact paths. `gjc setup hermes` renders absolute normalized paths joined with the platform path delimiter (`:` on POSIX, `;` on Windows). The bridge parser also accepts commas, semicolons, and newlines for legacy manual configs. |\n| `GJC_COORDINATOR_MCP_MUTATIONS` | Enables mutating tool classes as a comma-separated list (`sessions`, `questions`, `reports`) or `all`. `sessions` covers session startup, prompt delivery, durable turn journal updates, queue, and force operations. Per-call `allow_mutation: true` is still required. |\n| `GJC_COORDINATOR_MCP_ARTIFACT_BYTE_CAP` | Max bytes returned by artifact reads (default `65536`, capped at `1048576`). |\n| `GJC_COORDINATOR_MCP_STATE_ROOT` | Bridge coordination state root (default `<cwd>/.gjc/state/coordinator-mcp`). |\n| `GJC_COORDINATOR_MCP_PROFILE` | Optional profile namespace for session/question/report state. Missing scope never widens to global session enumeration. |\n| `GJC_COORDINATOR_MCP_REPO` | Optional repo namespace for session/question/report state. Missing scope never widens to global session enumeration. |\n| `GJC_COORDINATOR_MCP_SESSION_COMMAND` | GJC-compatible command used by mutating session startup to launch a detached tmux session. `gjc setup hermes` renders this to `gjc --worktree` by default so Hermes-installed configs start real GJC work in a GJC-managed worktree while preserving GJC project/session resume identity. Explicit values are preserved as user intent. When manually omitted, mutating session startup fails closed unless a service adapter is injected. |\n| `GJC_COORDINATOR_MCP_SETUP_MANAGED_BY` | Marker written by `gjc setup hermes` for safe managed config updates. |\n| `GJC_COORDINATOR_MCP_SETUP_SCHEMA_VERSION` | Managed setup schema version written by `gjc setup hermes`. |\n| `GJC_COORDINATOR_MCP_SETUP_SIGNATURE` | Deterministic managed setup signature used to detect safe updates versus unmanaged conflicts. |\n\n### Google Vertex AI\n\n| Variable | Required? | Notes |\n| -------------------------------- | ------------------------------ | ------------------------------------------------------------------------------------------------------------------------- |\n| `GOOGLE_CLOUD_PROJECT` | Yes (unless passed in options) | Fallback: `GCLOUD_PROJECT` |\n| `GCLOUD_PROJECT` | Fallback | Used as alternate project ID source |\n| `GOOGLE_CLOUD_PROJECT_ID` | OAuth login helper only | Used by Gemini CLI OAuth project discovery |\n| `GOOGLE_CLOUD_LOCATION` | Yes (unless passed in options) | No default in provider |\n| `GOOGLE_CLOUD_API_KEY` | Conditional | Direct Vertex API-key auth; otherwise ADC fallback can authenticate when project and location are set |\n| `GOOGLE_APPLICATION_CREDENTIALS` | Conditional | If set, file must exist; otherwise ADC fallback path is checked (`~/.config/gcloud/application_default_credentials.json`) |\n\n### Kimi\n\n| Variable | Default / behavior |\n| ---------------------- | -------------------------------------------------------- |\n| `KIMI_CODE_OAUTH_HOST` | Primary OAuth host override |\n| `KIMI_OAUTH_HOST` | Fallback OAuth host override |\n| `KIMI_CODE_BASE_URL` | Overrides Kimi usage endpoint base URL (`usage/kimi.ts`) |\n\nOAuth host chain: `KIMI_CODE_OAUTH_HOST` → `KIMI_OAUTH_HOST` → `https://auth.kimi.com`.\n\n### Gemini CLI compatibility\n\n| Variable | Default / behavior |\n| -------------------------- | --------------------------------------------------------------- |\n| `GJC_AI_GEMINI_CLI_VERSION` | Overrides Gemini CLI user-agent version tag (`0.35.3` if unset) |\n\n### OpenAI code provider responses (feature/debug controls)\n\n| Variable | Behavior |\n| ------------------------------------ | ---------------------------------------------------- |\n| `GJC_OPENAI_CODE_DEBUG` | `1`/`true` enables OpenAI code provider debug logging |\n| `GJC_OPENAI_CODE_WEBSOCKET` | `1`/`true` enables websocket transport preference |\n| `GJC_OPENAI_CODE_WEBSOCKET_V2` | `1`/`true` enables websocket v2 path |\n| `GJC_OPENAI_CODE_WEBSOCKET_IDLE_TIMEOUT_MS` | Positive integer override (default 300000) |\n| `GJC_OPENAI_CODE_WEBSOCKET_RETRY_BUDGET` | Non-negative integer override (default 5) |\n| `GJC_OPENAI_CODE_WEBSOCKET_RETRY_DELAY_MS` | Positive integer base backoff override (default 500) |\n| `GJC_OPENAI_STREAM_IDLE_TIMEOUT_MS` | Positive integer OpenAI stream idle timeout override |\n\n### Cursor provider debug\n\n| Variable | Behavior |\n| ------------------ | ------------------------------------------------------------------------ |\n| `DEBUG_CURSOR` | Enables provider debug logs; `2`/`verbose` for detailed payload snippets |\n| `DEBUG_CURSOR_LOG` | Optional file path for JSONL debug log output |\n\n### Prompt cache compatibility switch\n\n| Variable | Behavior |\n| -------------------- | ----------------------------------------------------------------------------------------------------------------- |\n| `GJC_CACHE_RETENTION` | If `long`, enables long retention where supported (`anthropic`, `openai-responses`, Bedrock retention resolution); any other value forces `short`. The Anthropic provider already defaults to `long` (1h) when unset, so this is mainly an opt-out (`short`) or a way to extend long retention to other providers. |\n\n---\n\n## 3) Web search subsystem\n\n### Search provider credentials\n\n| Variable | Used by |\n| --------------------------------------------------- | ------------------------------------------------------------- |\n| `EXA_API_KEY` | Exa search provider |\n| `BRAVE_API_KEY` | Brave search provider |\n| `PERPLEXITY_API_KEY` | Perplexity search provider API-key mode |\n| `PERPLEXITY_COOKIES` | Perplexity cookie-auth search mode |\n| `TAVILY_API_KEY` | Tavily search provider |\n| `ZAI_API_KEY` | z.ai search provider (also checks stored OAuth in `agent.db`) |\n| `OPENAI_API_KEY` / OpenAI code OAuth in DB | OpenAI code search provider availability/auth |\n| `GJC_OPENAI_CODE_WEB_SEARCH_MODEL` | OpenAI code search provider model override |\n| `MOONSHOT_SEARCH_API_KEY` / `KIMI_SEARCH_API_KEY` | Kimi/Moonshot search provider env auth |\n| `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` | Kimi/Moonshot search endpoint override |\n| `KAGI_API_KEY` | Kagi search provider |\n| `JINA_API_KEY` | Jina search provider |\n| `PARALLEL_API_KEY` | Parallel search provider |\n| `SEARXNG_ENDPOINT`, `SEARXNG_TOKEN` | SearXNG endpoint and optional bearer token |\n| `SEARXNG_BASIC_USERNAME`, `SEARXNG_BASIC_PASSWORD` | SearXNG HTTP Basic Auth credentials |\n\nSearXNG also reads the equivalent `searxng.endpoint`, `searxng.token`, `searxng.basicUsername`, and `searxng.basicPassword` settings from `~/.gjc/agent/config.yml`; environment variables are fallbacks.\n\n### Anthropic web search auth chain\n\nAnthropic web search uses `findAnthropicAuth()` from `packages/ai/src/utils/anthropic-auth.ts` in this order:\n\n1. `ANTHROPIC_SEARCH_API_KEY` (+ optional `ANTHROPIC_SEARCH_BASE_URL`)\n2. `ANTHROPIC_FOUNDRY_API_KEY` when `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` is enabled\n3. Anthropic OAuth credentials from `agent.db` (must not expire within 5-minute buffer)\n4. Anthropic API-key credentials from `agent.db`\n5. Generic Anthropic env fallback: provider key (`ANTHROPIC_FOUNDRY_API_KEY` in Foundry mode, otherwise `ANTHROPIC_OAUTH_TOKEN`/`ANTHROPIC_API_KEY`) + optional `ANTHROPIC_BASE_URL` (`FOUNDRY_BASE_URL` when Foundry mode is enabled)\n\nRelated vars:\n\n| Variable | Default / behavior |\n| --------------------------- | ---------------------------------------------------- |\n| `ANTHROPIC_SEARCH_API_KEY` | Highest-priority explicit search key |\n| `ANTHROPIC_SEARCH_BASE_URL` | Defaults to `https://api.anthropic.com` when omitted |\n| `ANTHROPIC_SEARCH_MODEL` | Defaults to `anthropic-model-haiku-4-5` |\n| `ANTHROPIC_BASE_URL` | Generic fallback base URL for tier-4 auth path |\n\n### Perplexity OAuth flow behavior flag\n\n| Variable | Behavior |\n| ------------------- | ------------------------------------------------------------------------------- |\n| `GJC_AUTH_NO_BORROW` | If set, disables macOS native-app token borrowing path in Perplexity login flow |\n\n---\n\n## 4) Python tooling and kernel runtime\n\n| Variable | Default / behavior |\n| ------------------------- | ------------------------------------------------------------------------------------------------------------------- |\n| `GJC_PY` | Eval backend override: `0`/`bash`=JavaScript only, `1`/`py`=Python only, `mix`/`both`=both; invalid values ignored |\n| `GJC_PYTHON_SKIP_CHECK` | If `1`, skips Python interpreter availability checks (subprocess runner still starts on demand) |\n| `GJC_PYTHON_INTEGRATION` | If `1`, opts gated integration tests in (e.g. `python-runner.integration.test.ts`) into running against real Python |\n| `GJC_PYTHON_IPC_TRACE` | If `1`, logs NDJSON frames exchanged with the Python runner subprocess |\n| `VIRTUAL_ENV` | Highest-priority venv path for Python runtime resolution |\n\nExtra conditional behavior:\n\n- If `BUN_ENV=test` or `NODE_ENV=test`, Python availability checks are treated as OK and warming is skipped.\n- Python env filtering denies common API keys and allows safe base vars + `LC_`, `XDG_`, `GJC_` prefixes.\n\n---\n\n## 5) Agent/runtime behavior toggles\n\n| Variable | Default / behavior |\n| ---------------------------- | -------------------------------------------------------------------------------------------------- |\n| `GJC_SMOL_MODEL` | Ephemeral model-role override for `smol` (CLI `--smol` takes precedence) |\n| `GJC_SLOW_MODEL` | Ephemeral model-role override for `slow` (CLI `--slow` takes precedence) |\n| `GJC_PLAN_MODEL` | Ephemeral model-role override for `plan` (CLI `--plan` takes precedence) |\n| `GJC_NO_TITLE` | If set (any non-empty value), disables auto session title generation on first user message |\n| `NULL_PROMPT` | If `true`, system prompt builder returns empty string |\n| `GJC_BLOCKED_AGENT` | Blocks a specific subagent type in task tool |\n| `GJC_SUBPROCESS_CMD` | Overrides subagent spawn command (`gjc` / `gjc.cmd` resolution bypass) |\n| `GJC_TASK_MAX_OUTPUT_BYTES` | Max captured output bytes per subagent (default `500000`) |\n| `GJC_TASK_MAX_OUTPUT_LINES` | Max captured output lines per subagent (default `5000`) |\n| `GJC_TIMING` | If set (any non-empty value), prints a hierarchical timing-span tree to **stderr** via `logger.printTimings()`. In interactive mode the tree prints once the agent is ready (before the TUI starts); in print mode it prints after the whole prompt batch completes. Print-mode prompts are wrapped in `print:prompt:initial` / `print:prompt:next` spans so each user message shows up as its own row. `GJC_TIMING=x` exits the process with code 0 right after printing in interactive mode (use to measure cold startup only). `GJC_TIMING=full` lists every module-load entry instead of just the top N. |\n| `GJC_PACKAGE_DIR` | Overrides package asset base dir resolution (docs/examples/changelog path lookup) |\n| `GJC_DISABLE_LSPMUX` | If `1`, disables lspmux detection/integration and forces direct LSP server spawning |\n| `GJC_RPC_EMIT_TITLE` | Boolean-like flag enabling title events in RPC mode |\n| `SMITHERY_URL` | Smithery web URL override (default `https://smithery.ai`) |\n| `SMITHERY_API_URL` | Smithery API base URL override (default `https://api.smithery.ai`) |\n| `PUPPETEER_EXECUTABLE_PATH` | Browser tool Chromium executable override |\n| `LM_STUDIO_BASE_URL` | Default implicit LM Studio discovery base URL override (`http://127.0.0.1:1234/v1` if unset) |\n| `OLLAMA_BASE_URL` | Default implicit Ollama discovery base URL override (`http://127.0.0.1:11434` if unset) |\n| `LLAMA_CPP_BASE_URL` | Default implicit Llama.cpp discovery base URL override (`http://127.0.0.1:8080` if unset) |\n| `GJC_EDIT_VARIANT` | Forces edit tool variant when valid (`patch`, `replace`, `hashline`, `atom`, `vim`, `apply_patch`) |\n| `GJC_FORCE_IMAGE_PROTOCOL` | Forces supported image protocol (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) where used |\n| `GJC_ALLOW_SIXEL_PASSTHROUGH` | Allows SIXEL passthrough when `GJC_FORCE_IMAGE_PROTOCOL=sixel` |\n| `GJC_NO_PTY` | If `1`, disables interactive PTY path for bash tool |\n\n`GJC_NO_PTY` is also set internally when CLI `--no-pty` is used.\n\n---\n\n## 6) Storage and config root paths\n\nThese are consumed via `@gajae-code/utils/dirs` and affect where coding-agent stores data.\n\n| Variable | Default / behavior |\n| --------------------- | ----------------------------------------------------------------------------- |\n| `GJC_CONFIG_DIR` | Config root dirname under home (default `.gjc`) |\n| `GJC_CODING_AGENT_DIR` | Full override for agent directory (default `~/<GJC_CONFIG_DIR or .gjc>/agent`) |\n| `PWD` | Used when matching canonical current working directory in path helpers |\n\n---\n\n## 7) Shell/tool execution environment\n\n(From `packages/utils/src/procmgr.ts` and coding-agent bash tool integration.)\n\n| Variable | Behavior |\n| -------------------------- | ------------------------------------------------------------------------------ |\n| `GJC_BASH_NO_CI` | Suppresses automatic `CI=true` injection into spawned shell env |\n| `ANTHROPIC_MODEL_BASH_NO_CI` | Legacy alias fallback for `GJC_BASH_NO_CI` |\n| `GJC_BASH_NO_LOGIN` | Disables login-shell mode; shell args become `['-c']` instead of `['-l','-c']` |\n| `ANTHROPIC_MODEL_BASH_NO_LOGIN` | Legacy alias fallback for `GJC_BASH_NO_LOGIN` |\n| `GJC_SHELL_PREFIX` | Optional command prefix wrapper |\n| `ANTHROPIC_MODEL_CODE_SHELL_PREFIX` | Legacy alias fallback for `GJC_SHELL_PREFIX` |\n| `VISUAL` | Preferred external editor command |\n| `EDITOR` | Fallback external editor command |\n\nCurrent implementation: `GJC_BASH_NO_LOGIN`/`ANTHROPIC_MODEL_BASH_NO_LOGIN` are active; when either is set, `getShellArgs()` returns `['-c']`.\n\n---\n\n## 8) UI/theme/session detection (auto-detected env)\n\nThese are read as runtime signals; they are usually set by the terminal/OS rather than manually configured.\n\n| Variable | Used for |\n| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------- |\n| `COLORTERM`, `TERM`, `WT_SESSION` | Color capability detection (theme color mode) |\n| `COLORFGBG` | Terminal background light/dark auto-detection |\n| `TERM_PROGRAM`, `TERM_PROGRAM_VERSION`, `TERMINAL_EMULATOR` | Terminal identity in system prompt/context |\n| `KDE_FULL_SESSION`, `XDG_CURRENT_DESKTOP`, `DESKTOP_SESSION`, `XDG_SESSION_DESKTOP`, `GDMSESSION`, `WINDOWMANAGER` | Desktop/window-manager detection in system prompt/context |\n| `KITTY_WINDOW_ID`, `TMUX_PANE`, `TERM_SESSION_ID`, `WT_SESSION` | Stable per-terminal session breadcrumb IDs |\n| `SHELL`, `ComSpec`, `TERM_PROGRAM`, `TERM` | System info diagnostics |\n| `APPDATA`, `XDG_CONFIG_HOME` | lspmux config path resolution |\n| `HOME` | Path shortening in command UI |\n\n---\n\n## 9) TUI runtime flags (shared package, affects coding-agent UX)\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------------------------- |\n| `GJC_NOTIFICATIONS` | `off` / `0` / `false` suppress desktop notifications |\n| `GJC_TUI_WRITE_LOG` | If set, logs TUI writes to file |\n| `GJC_HARDWARE_CURSOR` | If `1`, enables hardware cursor mode |\n| `GJC_CLEAR_ON_SHRINK` | If `1`, clears empty rows when content shrinks |\n| `GJC_DEBUG_REDRAW` | If `1`, enables redraw debug logging |\n| `GJC_TUI_DEBUG` | If `1`, enables deep TUI debug dump path |\n| `GJC_FORCE_IMAGE_PROTOCOL` | Forces terminal image protocol detection (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) |\n| `GJC_TUI_KEYBOARD_PROTOCOL` | Enhanced keyboard input (Kitty keyboard protocol + xterm modifyOtherKeys). Enabled by default; set `0` / `false` to leave the keyboard in its default mode. Use this when a terminal (e.g. Android Termius) breaks IME/Hangul composition while these enhanced modes are active. |\n\n---\n\n## 10) Commit generation controls\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------- |\n| `GJC_COMMIT_TEST_FALLBACK` | If `true` (case-insensitive), force commit fallback generation path |\n| `GJC_COMMIT_NO_FALLBACK` | If `true`, disables fallback when agent returns no proposal |\n| `GJC_COMMIT_MAP_REDUCE` | If `false`, disables map-reduce commit analysis path |\n| `DEBUG` | If set, commit agent error stack traces are printed |\n\n---\n\n## 11) Bridge mode (`--mode bridge`)\n\nConsumed by `packages/coding-agent/src/modes/bridge/*`. The bridge is a\nnetwork-reachable control surface and is **secure-by-default**: it refuses to\nstart without TLS and a bearer token, and the 0.3.1 default endpoint matrix\nfail-closes session events, commands, controller ownership, UI responses, host\ntool results, and host URI results. See `docs/bridge.md` for protocol details.\n\n| Variable | Required | Default | Behavior |\n| --- | --- | --- | --- |\n| `GJC_BRIDGE_TOKEN` | Yes | — | Bearer token required on authenticated endpoints. **Secret — never commit.** |\n| `GJC_BRIDGE_TLS_CERT` | Yes | — | Path to the TLS certificate (PEM). Startup fails closed if cert/key are missing (TLS is mandatory, including loopback). |\n| `GJC_BRIDGE_TLS_KEY` | Yes | — | Path to the TLS private key (PEM). **Secret — never commit; `chmod 600`.** |\n| `GJC_BRIDGE_HOST` | No | `127.0.0.1` | Bind hostname. |\n| `GJC_BRIDGE_PORT` | No | `4077` | Bind port (1–65535). |\n| `GJC_BRIDGE_SCOPES` | No | `prompt` | Parsed for dormant command-surface compatibility. Valid scopes: `prompt`, `control`, `bash`, `export`, `session`, `model`, `message:read`, `host_tools`, `host_uri`, `admin`. The default endpoint matrix still advertises no accepted scopes and rejects commands before scope checks. |\n\nLocal development with a self-signed certificate must add the local CA to the\nclient trust store; there is no plaintext or certificate-verification-bypass mode.\n\n---\n\n## Security-sensitive variables\n\nTreat these as secrets; do not log or commit them:\n\n- Provider/API keys and OAuth/bearer credentials (all `*_API_KEY`, `*_TOKEN`, OAuth access/refresh tokens)\n- Cloud credentials (`AWS_*`, `GOOGLE_APPLICATION_CREDENTIALS` path may expose service-account material)\n- Search/provider auth vars (`EXA_API_KEY`, `BRAVE_API_KEY`, `PERPLEXITY_API_KEY`, Anthropic search keys)\n- Foundry mTLS material (`ANTHROPIC_MODEL_CODE_CLIENT_CERT`, `ANTHROPIC_MODEL_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS` when it points to private CA bundles)\n- Bridge auth/TLS material (`GJC_BRIDGE_TOKEN` and the `GJC_BRIDGE_TLS_KEY` private key; never commit cert/key/token material)\n\nPython runtime also explicitly strips many common key vars before spawning kernel subprocesses (`packages/coding-agent/src/eval/py/runtime.ts`).\n",
|
|
20
|
-
"external-control-readiness.md": "# External control surface readiness\n\nThis document classifies every public GJC surface that an external controller, bot, editor, or harness can use to drive `gjc`. It is intentionally narrower than the generic bot guide: it states what is ready today, what is only editor/client-oriented, and what remains experimental.\n\n## Readiness matrix\n\n| Surface | Current readiness | Primary command | Use when | Do not use when | Provider-independent smoke path |\n| --- | --- | --- | --- | --- | --- |\n| Coordinator MCP | Preferred multi-session bot/control-plane surface. | `gjc mcp-serve coordinator` | A controller needs to start/register GJC sessions, send bounded turns, answer questions, read artifacts, and write durable status reports across one or more repo/worktree lanes. | The controller only needs one embedded subprocess and can own stdio directly. | `gjc mcp-serve coordinator --check --json`; `packages/coding-agent/test/coordinator-mcp.test.ts`; `packages/coding-agent/test/setup-cli.test.ts`. |\n| RPC stdio | Stable subprocess worker surface. | `gjc --mode rpc` | A host embeds one GJC worker process, sends JSONL commands over stdin, consumes stdout frames, and optionally uses `python/gjc-rpc`. | The host needs remote HTTPS, multi-session orchestration, or MCP tool discovery. | `packages/coding-agent/test/rpc-unattended-stdio.test.ts`; `packages/coding-agent/test/rpc-client.start.test.ts`; `packages/coding-agent/test/rpc-host-tools.test.ts`; `packages/coding-agent/test/rpc-host-uris.test.ts`. |\n| ACP mode | Editor/ACP client surface with tested protocol initialization, session lifecycle, client-owned MCP, file/terminal client bridges, permission routing, and stdout hygiene. | `gjc --mode acp` or `gjc acp` | An editor or ACP-compatible client wants to drive GJC through the Agent Client Protocol over stdio. | A bot needs a generic multi-session control plane; use Coordinator MCP instead. | `packages/coding-agent/test/acp-initialize-conformance.test.ts`; `packages/coding-agent/test/acp-stdout-hygiene.test.ts`; `packages/coding-agent/test/acp-lazy-startup.test.ts`; `packages/coding-agent/test/acp-mcp-isolation.test.ts`; `packages/coding-agent/test/read-acp-fs.test.ts`; `packages/coding-agent/test/write-acp-fs.test.ts`; `packages/coding-agent/test/bash-acp-terminal.test.ts`. |\n| Bridge HTTPS | Experimental, fail-closed remote session-control surface. | `gjc --mode bridge` | A future remote client needs HTTPS protocol scaffolding, authenticated health/help/handshake behavior, or SDK compatibility tests. | Production bot lifecycle, default external-controller integration, or claims that remote session events/commands are enabled by default. | `packages/coding-agent/test/bridge/bridge-auth.test.ts`; `packages/coding-agent/test/bridge/bridge-mode-handler.test.ts`; `packages/coding-agent/test/bridge/bridge-conformance.test.ts`; `packages/bridge-client/test/bridge-client.test.ts`. |\n\n## Surface details\n\n### Coordinator MCP\n\nCoordinator MCP is the default answer for external bot and orchestration integrations. It exposes a transport-level MCP tool contract for session discovery, managed session start, visible tmux registration, prompt delivery, bounded turn waiting, structured question answering, artifact reads, and explicit completion/failure/cancellation reports.\n\nReadiness claim:\n\n- Ready as the preferred generic external-controller control plane.\n- Provider-independent contract checks exist for server metadata, tool discovery, read-only defaults, mutation gates, setup rendering, and dry-run lifecycle behavior.\n- It is not a provider/model contract. Live model execution remains the operator's environment-specific smoke.\n\nPrimary references:\n\n- `docs/bot-integration.md`\n- `docs/hermes-mcp-bridge.md`\n- `packages/coding-agent/src/coordinator/contract.ts`\n- `packages/coding-agent/src/coordinator-mcp/server.ts`\n\n### RPC stdio\n\nRPC mode is the stable embedded-worker surface. It is newline-delimited JSON over stdio and emits a `{ \"type\": \"ready\" }` frame before accepting commands. Hosts can drive prompts, state queries, host tools, host URI schemes, workflow gates, extension UI responses, cancellation, and unattended negotiation through the RPC command catalog.\n\nReadiness claim:\n\n- Ready for single-process host integration and subprocess workers.\n- The public Python client in `python/gjc-rpc` is the recommended typed client for Python hosts.\n- Multi-session orchestration and MCP tool discovery are out of scope for RPC; use Coordinator MCP for those.\n\nPrimary references:\n\n- `docs/rpc.md`\n- `python/gjc-rpc/README.md`\n- `packages/coding-agent/src/modes/rpc/rpc-mode.ts`\n- `packages/coding-agent/src/modes/rpc/rpc-types.ts`\n\n### ACP mode\n\nACP mode runs GJC as an Agent Client Protocol server over stdio. It is useful for editor-style clients that own the ACP transport and want session creation, session load/fork/resume/close metadata, prompt handling, client-provided MCP servers, permission prompts, editor file reads/writes, terminal-backed bash, and elicitation support.\n\nReadiness claim:\n\n- ACP is implemented and covered for current editor/client contracts: initialize conformance, agent capability advertisement, lazy startup, stdout JSON-RPC hygiene, client-owned MCP isolation, event mapping, file bridge routing, terminal routing, and permission routing.\n- ACP is not the preferred bot control-plane surface. It is not positioned as a multi-session external bot coordinator, and it does not replace Coordinator MCP reports/artifacts/turn state.\n- A real prompt still depends on the selected provider/model credentials, so required PR smokes should stay on provider-independent initialize, lifecycle, bridge, and mapper tests.\n\nCurrent entrypoints:\n\n```sh\ngjc --mode acp\n# equivalent ACP subcommand for ACP clients that prefer command-style launch\ngjc acp\n```\n\nPrimary references:\n\n- `packages/coding-agent/src/commands/acp.ts`\n- `packages/coding-agent/src/modes/acp/acp-mode.ts`\n- `packages/coding-agent/src/modes/acp/acp-agent.ts`\n- `packages/coding-agent/src/modes/acp/acp-client-bridge.ts`\n- `packages/coding-agent/src/modes/acp/acp-event-mapper.ts`\n\n### Bridge HTTPS\n\nBridge mode is an experimental network protocol surface over HTTPS. Its current public posture is deliberately fail-closed: unauthenticated health/help are available, authenticated handshake is available, and default session-control endpoints advertise no accepted capabilities/scopes and reject with `endpoint_disabled`.\n\nReadiness claim:\n\n- Ready as experimental protocol scaffolding with fail-closed behavior and SDK/client conformance tests.\n- Not ready as the default external-bot product surface.\n- Do not document events, commands, controller ownership, UI responses, host tool results, or host URI results as enabled by default. Those names remain in the protocol catalog for internal compatibility and future re-enable work.\n\nPrimary references:\n\n- `docs/bridge.md`\n- `packages/coding-agent/src/modes/bridge/bridge-mode.ts`\n- `packages/coding-agent/src/modes/bridge/auth.ts`\n- `packages/bridge-client/src/index.ts`\n\n## PR smoke checklist\n\nFor external-control PRs, use this provider-independent checklist before any optional live provider smoke:\n\n1. **Docs-to-code alignment:** the readiness matrix still matches CLI mode parsing, MCP command registration, ACP command registration, bridge endpoint defaults, and RPC/ACP/Bridge tests.\n2. **Coordinator MCP:** `gjc mcp-serve coordinator --check --json` still reports the coordinator server and tool list, and focused MCP tests pass without provider credentials.\n3. **RPC stdio:** at least one stdio or client contract test proves JSONL startup/command routing without a real provider key.\n4. **ACP mode:** initialize/stdout or conformance tests prove the ACP JSON-RPC entrypoint and capability advertisement without a real provider key.\n5. **Bridge HTTPS:** bridge auth/handler tests prove TLS requirement, authenticated handshake, help/health behavior, and default `endpoint_disabled` session-control posture.\n6. **Local leak audit:** deliverable docs/tests must not contain private profile names, user-home paths, callback artifact paths, local proxy names, terminal app names, or private launch wrappers.\n\nOptional live smokes are useful diagnostics for one operator's model/profile/network setup, but they must not be required for PR readiness unless the PR explicitly changes live provider behavior.\n",
|
|
20
|
+
"external-control-readiness.md": "# External control surface readiness\n\nThis document classifies every public GJC surface that an external controller, bot, editor, or harness can use to drive `gjc`. It is intentionally narrower than the generic bot guide: it states what is ready today, what is only editor/client-oriented, and what remains experimental.\n\n## Readiness matrix\n\n| Surface | Current readiness | Primary command | Use when | Do not use when | Provider-independent smoke path |\n| --- | --- | --- | --- | --- | --- |\n| Coordinator MCP | Preferred multi-session bot/control-plane surface. | `gjc mcp-serve coordinator` | A controller needs to start/register GJC sessions, send bounded turns, answer questions, read artifacts, and write durable status reports across one or more repo/worktree lanes. | The controller only needs one embedded subprocess and can own stdio directly. | `gjc mcp-serve coordinator --check --json`; `packages/coding-agent/test/coordinator-mcp.test.ts`; `packages/coding-agent/test/setup-cli.test.ts`. |\n| RPC stdio | Stable subprocess worker surface. | `gjc --mode rpc` | A host embeds one GJC worker process, sends JSONL commands over stdin, consumes stdout frames, and optionally uses `python/gjc-rpc`. | The host needs remote HTTPS, multi-session orchestration, or MCP tool discovery. | `packages/coding-agent/test/rpc-unattended-stdio.test.ts`; `packages/coding-agent/test/rpc-client.start.test.ts`; `packages/coding-agent/test/rpc-host-tools.test.ts`; `packages/coding-agent/test/rpc-host-uris.test.ts`. |\n| ACP mode | Editor/ACP client surface with tested protocol initialization, session lifecycle, client-owned MCP, file/terminal client bridges, permission routing, and stdout hygiene. | `gjc --mode acp` or `gjc acp` | An editor or ACP-compatible client wants to drive GJC through the Agent Client Protocol over stdio. | A bot needs a generic multi-session control plane; use Coordinator MCP instead. | `packages/coding-agent/test/acp-initialize-conformance.test.ts`; `packages/coding-agent/test/acp-stdout-hygiene.test.ts`; `packages/coding-agent/test/acp-lazy-startup.test.ts`; `packages/coding-agent/test/acp-mcp-isolation.test.ts`; `packages/coding-agent/test/read-acp-fs.test.ts`; `packages/coding-agent/test/write-acp-fs.test.ts`; `packages/coding-agent/test/bash-acp-terminal.test.ts`. |\n| Bridge HTTPS | Experimental, fail-closed remote session-control surface. | `gjc --mode bridge` | A future remote client needs HTTPS protocol scaffolding, authenticated health/help/handshake behavior, or SDK compatibility tests. | Production bot lifecycle, default external-controller integration, or claims that remote session events/commands are enabled by default. | `packages/coding-agent/test/bridge/bridge-auth.test.ts`; `packages/coding-agent/test/bridge/bridge-mode-handler.test.ts`; `packages/coding-agent/test/bridge/bridge-conformance.test.ts`; `packages/bridge-client/test/bridge-client.test.ts`. |\n\n## Surface details\n\n### Standalone TUI and MCP inheritance\n\nNormal standalone GJC (`gjc`, `gjc --tmux`, and print-mode prompts) does not inherit Claude Code, Codex, Cursor, Gemini, Windsurf, or other tools' MCP servers as a public startup contract. It also does not expose a supported standalone-TUI setting that automatically imports arbitrary MCP servers for the model. See [Standalone GJC MCP support](./standalone-mcp.md) for the user-facing boundary and workarounds.\n\n### Coordinator MCP\n\nCoordinator MCP is the default answer for external bot and orchestration integrations. It exposes a transport-level MCP tool contract for session discovery, managed session start, visible tmux registration, prompt delivery, bounded turn waiting, structured question answering, artifact reads, and explicit completion/failure/cancellation reports.\n\nReadiness claim:\n\n- Ready as the preferred generic external-controller control plane.\n- Provider-independent contract checks exist for server metadata, tool discovery, read-only defaults, mutation gates, setup rendering, and dry-run lifecycle behavior.\n- It is not a provider/model contract. Live model execution remains the operator's environment-specific smoke.\n\nPrimary references:\n\n- `docs/bot-integration.md`\n- `docs/hermes-mcp-bridge.md`\n- `packages/coding-agent/src/coordinator/contract.ts`\n- `packages/coding-agent/src/coordinator-mcp/server.ts`\n\n### RPC stdio\n\nRPC mode is the stable embedded-worker surface. It is newline-delimited JSON over stdio and emits a `{ \"type\": \"ready\" }` frame before accepting commands. Hosts can drive prompts, state queries, host tools, host URI schemes, workflow gates, extension UI responses, cancellation, and unattended negotiation through the RPC command catalog.\n\nReadiness claim:\n\n- Ready for single-process host integration and subprocess workers.\n- The public Python client in `python/gjc-rpc` is the recommended typed client for Python hosts.\n- Multi-session orchestration and MCP tool discovery are out of scope for RPC; use Coordinator MCP for those.\n\nPrimary references:\n\n- `docs/rpc.md`\n- `python/gjc-rpc/README.md`\n- `packages/coding-agent/src/modes/rpc/rpc-mode.ts`\n- `packages/coding-agent/src/modes/rpc/rpc-types.ts`\n\n### ACP mode\n\nACP mode runs GJC as an Agent Client Protocol server over stdio. It is useful for editor-style clients that own the ACP transport and want session creation, session load/fork/resume/close metadata, prompt handling, client-provided MCP servers, permission prompts, editor file reads/writes, terminal-backed bash, and elicitation support.\n\nReadiness claim:\n\n- ACP is implemented and covered for current editor/client contracts: initialize conformance, agent capability advertisement, lazy startup, stdout JSON-RPC hygiene, client-owned MCP isolation, event mapping, file bridge routing, terminal routing, and permission routing.\n- ACP is not the preferred bot control-plane surface. It is not positioned as a multi-session external bot coordinator, and it does not replace Coordinator MCP reports/artifacts/turn state.\n- A real prompt still depends on the selected provider/model credentials, so required PR smokes should stay on provider-independent initialize, lifecycle, bridge, and mapper tests.\n\nCurrent entrypoints:\n\n```sh\ngjc --mode acp\n# equivalent ACP subcommand for ACP clients that prefer command-style launch\ngjc acp\n```\n\nPrimary references:\n\n- `packages/coding-agent/src/commands/acp.ts`\n- `packages/coding-agent/src/modes/acp/acp-mode.ts`\n- `packages/coding-agent/src/modes/acp/acp-agent.ts`\n- `packages/coding-agent/src/modes/acp/acp-client-bridge.ts`\n- `packages/coding-agent/src/modes/acp/acp-event-mapper.ts`\n\n### Bridge HTTPS\n\nBridge mode is an experimental network protocol surface over HTTPS. Its current public posture is deliberately fail-closed: unauthenticated health/help are available, authenticated handshake is available, and default session-control endpoints advertise no accepted capabilities/scopes and reject with `endpoint_disabled`.\n\nReadiness claim:\n\n- Ready as experimental protocol scaffolding with fail-closed behavior and SDK/client conformance tests.\n- Not ready as the default external-bot product surface.\n- Do not document events, commands, controller ownership, UI responses, host tool results, or host URI results as enabled by default. Those names remain in the protocol catalog for internal compatibility and future re-enable work.\n\nPrimary references:\n\n- `docs/bridge.md`\n- `packages/coding-agent/src/modes/bridge/bridge-mode.ts`\n- `packages/coding-agent/src/modes/bridge/auth.ts`\n- `packages/bridge-client/src/index.ts`\n\n## PR smoke checklist\n\nFor external-control PRs, use this provider-independent checklist before any optional live provider smoke:\n\n1. **Docs-to-code alignment:** the readiness matrix still matches CLI mode parsing, MCP command registration, ACP command registration, bridge endpoint defaults, and RPC/ACP/Bridge tests.\n2. **Coordinator MCP:** `gjc mcp-serve coordinator --check --json` still reports the coordinator server and tool list, and focused MCP tests pass without provider credentials.\n3. **RPC stdio:** at least one stdio or client contract test proves JSONL startup/command routing without a real provider key.\n4. **ACP mode:** initialize/stdout or conformance tests prove the ACP JSON-RPC entrypoint and capability advertisement without a real provider key.\n5. **Bridge HTTPS:** bridge auth/handler tests prove TLS requirement, authenticated handshake, help/health behavior, and default `endpoint_disabled` session-control posture.\n6. **Local leak audit:** deliverable docs/tests must not contain private profile names, user-home paths, callback artifact paths, local proxy names, terminal app names, or private launch wrappers.\n\nOptional live smokes are useful diagnostics for one operator's model/profile/network setup, but they must not be required for PR readiness unless the PR explicitly changes live provider behavior.\n",
|
|
21
21
|
"fs-scan-cache-architecture.md": "# Filesystem Scan Cache Architecture Contract\n\nThis document defines the current contract for the shared filesystem scan cache implemented in Rust (`crates/pi-natives/src/fs_cache.rs`) and consumed by native discovery/search APIs exposed to `packages/coding-agent`.\n\n## What this cache is\n\nThe cache stores full directory-scan entry lists (`GlobMatch[]`) keyed by scan scope and traversal policy, then lets higher-level operations (glob filtering, fuzzy scoring, grep file selection) run against those cached entries.\n\nPrimary goals:\n\n- avoid repeated filesystem walks for repeated discovery/search calls\n- keep consistency across `glob`, `fuzzyFind`, and `grep` when they share the same scan policy\n- allow explicit staleness recovery for empty results and explicit invalidation after file mutations\n\n## Ownership and public surface\n\n- Cache implementation and policy: `crates/pi-natives/src/fs_cache.rs`\n- Native consumers:\n - `crates/pi-natives/src/glob.rs`\n - `crates/pi-natives/src/fd.rs` (`fuzzyFind`)\n - `crates/pi-natives/src/grep.rs`\n- JS binding/export:\n - `packages/natives/src/glob/index.ts` (`invalidateFsScanCache`)\n - `packages/natives/src/glob/types.ts`\n - `packages/natives/src/grep/types.ts`\n- Coding-agent mutation invalidation helpers:\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts`\n\n## Cache key partitioning (hard contract)\n\nEach entry is keyed by:\n\n- canonicalized `root` directory path\n- `include_hidden` boolean\n- `use_gitignore` boolean\n- `skip_node_modules` boolean\n\nImplications:\n\n- Hidden and non-hidden scans do **not** share entries.\n- Gitignore-respecting and ignore-disabled scans do **not** share entries.\n- Scans that prune `node_modules` do **not** share entries with scans that include it.\n- Consumers must pass stable semantics for hidden/gitignore/node_modules behavior; changing any flag creates a different cache partition.\n\n## Scan collection behavior\n\nCache population uses a deterministic walker (`ignore::WalkBuilder`) configured by `include_hidden`, `use_gitignore`, and `skip_node_modules`:\n\n- `follow_links(false)`\n- sorted by file path\n- `.git` is always skipped\n- `node_modules` is pruned at traversal time when `skip_node_modules=true`\n- entry file type + `mtime` are captured via `symlink_metadata`\n\nSearch roots are resolved by `resolve_search_path`:\n\n- relative paths are resolved against current cwd\n- target must be an existing directory\n- root is canonicalized when possible\n\n## Freshness and eviction policy\n\nGlobal policy (environment-overridable):\n\n- `FS_SCAN_CACHE_TTL_MS` (default `1000`)\n- `FS_SCAN_EMPTY_RECHECK_MS` (default `200`)\n- `FS_SCAN_CACHE_MAX_ENTRIES` (default `16`)\n\nBehavior:\n\n- `get_or_scan(...)`\n - if TTL is `0`: bypass cache entirely, always fresh scan (`cache_age_ms = 0`)\n - on cache hit within TTL: return cached entries + non-zero `cache_age_ms`\n - on expired hit: evict key, rescan, store fresh entry\n- max entry enforcement is oldest-first eviction by `created_at`\n\n## Empty-result fast recheck (separate from normal hits)\n\nNormal cache hit:\n\n- a cache hit inside TTL returns cached entries and does nothing else.\n\nEmpty-result fast recheck:\n\n- this is a **caller-side** policy using `ScanResult.cache_age_ms`\n- if filtered/query result is empty and cached scan age is at least `empty_recheck_ms()`, caller performs one `force_rescan(...)` and retries\n- intended to reduce stale-negative results when files were recently added but cache is still within TTL\n\nCurrent consumers:\n\n- `glob`: rechecks when filtered matches are empty and scan age exceeds threshold\n- `fuzzyFind` (`fd.rs`): rechecks only when query is non-empty and scored matches are empty\n- `grep`: rechecks when selected candidate file list is empty\n\n## Consumer defaults and cache usage\n\nCache is opt-in on all exposed APIs (`cache?: boolean`, default `false`).\n\nCurrent defaults in native APIs:\n\n- `glob`: `hidden=false`, `gitignore=true`, `cache=false`, and `node_modules` included only when the pattern mentions `node_modules`\n- `fuzzyFind`: `hidden=false`, `gitignore=true`, `cache=false`, and `node_modules` is skipped\n- `grep`: `hidden=true`, `gitignore=true`, `cache=false`, and `node_modules` included only when the glob mentions `node_modules`\n\nCoding-agent callers today:\n\n- High-volume mention candidate discovery enables cache:\n - `packages/coding-agent/src/utils/file-mentions.ts`\n - profile: `hidden=true`, `gitignore=true`, `includeNodeModules=true`, `cache=true`\n- Tool-level `grep` integration currently disables scan cache (`cache: false`):\n - `packages/coding-agent/src/tools/grep.ts`\n\n## Invalidation contract\n\nNative invalidation entrypoint:\n\n- `invalidateFsScanCache(path?: string)`\n - with `path`: remove cache entries whose root is a prefix of target path\n - without path: clear all scan cache entries\n\nPath handling details:\n\n- relative invalidation paths are resolved against cwd\n- invalidation attempts canonicalization\n- if target does not exist (e.g., delete), fallback canonicalizes parent and reattaches filename when possible\n- this preserves invalidation behavior for create/delete/rename where one side may not exist\n\n## Coding-agent mutation flow responsibilities\n\nCoding-agent code must invalidate after successful filesystem mutations.\n\nCentral helpers:\n\n- `invalidateFsScanAfterWrite(path)`\n- `invalidateFsScanAfterDelete(path)`\n- `invalidateFsScanAfterRename(oldPath, newPath)` (invalidates both sides when paths differ)\n\nCurrent mutation tool callsites:\n\n- `packages/coding-agent/src/tools/write.ts`\n- `packages/coding-agent/src/patch/index.ts` (hashline/patch/replace flows)\n\nRule: if a flow mutates filesystem content or location and bypasses these helpers, cache staleness bugs are expected.\n\n## Adding a new cache consumer safely\n\nWhen introducing cache use in a new scanner/search path:\n\n1. **Use stable scan policy inputs**\n - decide hidden/gitignore/node_modules semantics first\n - pass them consistently to `get_or_scan`/`force_rescan` so cache partitions are intentional\n\n2. **Treat cache data as pre-filtered only by traversal policy**\n - apply tool-specific filtering (glob patterns, type filters, scoring) after retrieval\n - never assume cached entries already reflect your higher-level filters\n\n3. **Implement empty-result fast recheck only for stale-negative risk**\n - use `scan.cache_age_ms >= empty_recheck_ms()`\n - retry once with `force_rescan(..., store=true, ...)`\n - keep this path separate from normal cache-hit logic\n\n4. **Respect no-cache mode explicitly**\n - when caller disables cache, call `force_rescan(..., store=false, ...)`\n - do not populate shared cache in a no-cache request path\n\n5. **Wire mutation invalidation for any new write path**\n - after successful write/edit/delete/rename, call the coding-agent invalidation helper\n - for rename/move, invalidate both old and new paths\n\n6. **Do not add per-call TTL knobs**\n - current contract is global policy only (env-configured), no per-request TTL override\n\n## Known boundaries\n\n- Cache scope is process-local in-memory (`DashMap`), not persisted across process restarts.\n- Cache stores scan entries, not final tool results.\n- `glob`/`fuzzyFind`/`grep` share scan entries only when key dimensions (`root`, `hidden`, `gitignore`, `skip_node_modules`) match.\n- `.git` is always excluded at scan collection time regardless of caller options.\n",
|
|
22
22
|
"geobench.md": "# GEO benchmark for Gajae-Code\n\nThis repository includes a [`geobench`](https://github.com/NomaDamas/geobench) product spec for measuring LLM answer visibility: hit rate, MRR, share of voice, citation rate/share, and confidence intervals.\n\n```bash\n/path/to/geobench/dist/geobench estimate --product geobench/gajae-code.yaml --providers openai --tier cheap\n/path/to/geobench/dist/geobench profile geobench/gajae-code.yaml\n/path/to/geobench/dist/geobench bench --product geobench/gajae-code.yaml --providers openai --tier cheap --mode benchmark\n```\n\nPublish aggregate metrics only; do not publish raw provider answers, secrets, or private run logs.\n",
|
|
23
23
|
"gjc-dogfood-skill-template.md": "# GJC dogfood local skill template\n\nIssue #93 requested a gaebal-gajae/operator dogfood skill. The live issue has no comment approving a fifth bundled default workflow skill, so this stays a local template instead of changing the default workflow surface. Operators can copy it into a user or project override when they want GJC-first session guidance:\n\n```sh\nmkdir -p ~/.gjc/skills/gjc-dogfood\ncp docs/gjc-dogfood-skill-template.md ~/.gjc/skills/gjc-dogfood/SKILL.md\n```\n\nFor a single project, copy it to `<project>/.gjc/skills/gjc-dogfood/SKILL.md` instead. Do not commit that project `.gjc` copy unless the project explicitly wants a local override.\n\n---\nname: gjc-dogfood\ndescription: Use when running or reviewing work through GJC sessions, dogfooding Gajae-Code, or migrating an operator workflow from OMX to GJC.\n---\n\n# GJC Dogfood Operator Workflow\n\nUse GJC first for coding, review, planning, and follow-up sessions. Treat OMX as a fallback only when GJC is unavailable, broken, or missing a required capability.\n\n## Locate and launch GJC\n\n- Installed CLI: run `command -v gjc` and then launch with `gjc --tmux`.\n- Repository checkout: from the gajae-code repo, prefer `bun packages/coding-agent/src/cli.ts --tmux` when testing source changes before install.\n- Worktree isolation: for branch-specific work, either let GJC create a managed sibling worktree with `gjc --tmux --worktree <branch-like-name>` or `cd <existing-worktree-path>` and run `gjc --tmux` there. Do not pass filesystem paths to `--worktree`.\n- Name sessions explicitly with the project and issue, for example `gajae-code-93-dogfood-skill`, so tmux panes, logs, and exports remain traceable.\n\n## Start the session\n\n- Put git operations inside the GJC session: fetch, branch/worktree setup, focused commits, pushes, and PR creation should be visible in-session.\n- Submit the initial prompt with the issue URL, target branch, acceptance criteria, verification limits, and any existing plan/spec link.\n- Verify the prompt was accepted: the TUI should show the user prompt, an active assistant turn, or a tool/action request. If the session silently idles, resend once with a shorter prompt and capture the failure.\n- Verify working state before leaving the session unattended: confirm the target cwd/worktree, branch, and issue scope are visible in the transcript or command output.\n\n## During work\n\n- Keep session names and branch names issue-scoped.\n- Prefer GJC workflow skills only when they fit: `deep-interview` for unclear requirements, `ralplan` for planning, `ultragoal` for durable ledgers, and `team` for coordinated tmux execution.\n- Keep evidence in the session: issue reads, focused tests/checks, screenshots only when visual behavior matters, and PR URLs.\n- When GJC is weaker than OMX, finish the urgent work with the smallest safe fallback and file a gajae-code follow-up issue with the missing capability, exact command/session context, expected behavior, and evidence.\n\n## Fallback policy\n\nUse OMX or another operator path only when:\n\n- `gjc` cannot be located or launched after checking installed and repo-local commands;\n- authentication, model routing, tmux, or prompt submission is broken;\n- GJC lacks a required capability that OMX already has;\n- an urgent production/review deadline would be missed by debugging GJC first.\n\nRecord the fallback reason and create or link the gajae-code issue that would make GJC sufficient next time.\n\n## Evidence checklist\n\nReport:\n\n- project, issue, branch/worktree, and session name;\n- whether GJC was installed or repo-local;\n- prompt acceptance and working-state evidence;\n- git operations performed in-session;\n- focused verification commands and results;\n- PR/issue URLs;\n- follow-up gajae-code issues for any GJC gap or fallback.\n",
|
|
@@ -26,7 +26,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
26
26
|
"handoff-generation-pipeline.md": "# `/handoff` generation pipeline\n\nThis document describes how the coding-agent implements `/handoff`: trigger path, oneshot generation, session switch, context reinjection, persistence, and UI behavior.\n\n## Scope\n\nCovers:\n\n- Interactive `/handoff` command dispatch\n- `AgentSession.handoff()` lifecycle and state transitions\n- `generateHandoff(...)` request shape\n- How old/new sessions persist handoff data differently\n- UI behavior for success, cancel, and failure\n\nDoes not cover:\n\n- Generic tree navigation/branch internals\n- Non-handoff session commands (`/new`, `/fork`, `/resume`)\n\n## Implementation files\n\n- [`../src/modes/controllers/input-controller.ts`](../packages/coding-agent/src/modes/controllers/input-controller.ts)\n- [`../src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`packages/agent/src/compaction/compaction.ts`](../packages/agent/src/compaction/compaction.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/extensibility/slash-commands.ts`](../packages/coding-agent/src/extensibility/slash-commands.ts)\n\n## Trigger path\n\n1. `/handoff` is declared in builtin slash command metadata (`slash-commands.ts`) with optional inline hint: `[focus instructions]`.\n2. In interactive input handling (`InputController`), submit text matching `/handoff` or `/handoff ...` is intercepted before normal prompt submission.\n3. The editor is cleared and `handleHandoffCommand(customInstructions?)` is called.\n4. `CommandController.handleHandoffCommand` performs a preflight guard using current entries:\n - Counts `type === \"message\"` entries.\n - If `< 2`, it warns: `Nothing to hand off (no messages yet)` and returns.\n\nThe same minimum-content guard exists again inside `AgentSession.handoff()` and throws if violated. This duplicates safety at both UI and session layers.\n\n## End-to-end lifecycle\n\n### 1) Start handoff generation\n\n`AgentSession.handoff(customInstructions?)`:\n\n- Reads current branch entries (`sessionManager.getBranch()`).\n- Validates minimum message count (`>= 2`).\n- Creates `#handoffAbortController` and links any caller-provided abort signal to it.\n- Resolves the current model API key through `ModelRegistry`.\n- Calls `generateHandoff(...)` with:\n - live agent messages (`agent.state.messages`),\n - the current model and API key,\n - the base system prompt (`#baseSystemPrompt`),\n - the live tool array (`agent.state.tools`),\n - optional focus instructions,\n - coding-agent message conversion (`convertToLlm`),\n - provider metadata and `initiatorOverride: \"agent\"`.\n\n`generateHandoff(...)` lives in `packages/agent/src/compaction/compaction.ts` next to summarization. It renders `packages/agent/src/compaction/prompts/handoff-document.md` via `renderHandoffPrompt(...)` with optional `additionalFocus`.\n\n### 2) Generate and capture output\n\n`generateHandoff(...)` converts the existing `AgentMessage[]` history to real LLM `Message[]` history, then appends one trailing agent-attributed `user` message containing the rendered handoff prompt.\n\nThe request uses `completeSimple(...)` directly:\n\n```ts\nawait completeSimple(\n model,\n {\n systemPrompt,\n messages: requestMessages,\n tools,\n },\n {\n apiKey,\n signal,\n reasoning: Effort.High,\n toolChoice: \"none\",\n initiatorOverride,\n metadata,\n },\n);\n```\n\nImportant generation properties:\n\n- The request preserves the live provider cache prefix by reusing the same system prompt, tool definitions, and real message history shape as the active agent.\n- The handoff instruction is a trailing `user` message, not a developer message, so the cached prefix remains aligned with the prior turn.\n- `toolChoice: \"none\"` prevents intentional tool dispatch.\n- The returned assistant content is filtered to text blocks and joined with `\\n`; stray tool-call blocks are ignored if a provider does not honor `toolChoice: \"none\"`.\n- `stopReason === \"error\"` throws a generation error.\n\nNo agent-loop events are used for capture. The handoff path no longer waits for `agent_end` and no longer scans the latest assistant message.\n\n### 3) Cancellation checks\n\nCancellation throws `Error(\"Handoff cancelled\")`; a completed generation with no text returns `undefined`.\n\n- caller signal aborts `#handoffAbortController`\n- `completeSimple(...)` receives the abort signal\n- aborted handoff signal or provider `AbortError` is normalized to `Error(\"Handoff cancelled\")`\n- empty generated text returns `undefined`\n\n`AgentSession.handoff()` always clears `#handoffAbortController` in `finally`.\n\n### 4) New session creation\n\nIf text was generated and not aborted:\n\n1. Flush current session writer (`sessionManager.flush()`).\n2. Cancel session-owned async jobs.\n3. Start a brand-new session with `parentSession` pointing at the previous session file when one exists.\n4. Reset in-memory agent state (`agent.reset()`).\n5. Rebind `agent.sessionId` to the new session id.\n6. Rekey/reset hindsight state for the new session.\n7. Clear queued context arrays (`#steeringMessages`, `#followUpMessages`, `#pendingNextTurnMessages`) and any scheduled hidden next-turn generation.\n8. Reset todo reminder counter.\n\n### 5) Handoff-context injection\n\nThe generated handoff document is wrapped by coding-agent session glue and appended to the new session as a `custom_message` entry:\n\n```text\n<handoff-context>\n...handoff text...\n</handoff-context>\n\nThe above is a handoff document from a previous session. Use this context to continue the work seamlessly.\n```\n\nInsertion call:\n\n```ts\nthis.sessionManager.appendCustomMessageEntry(\"handoff\", handoffContent, true, undefined, \"agent\");\n```\n\nSemantics:\n\n- `customType`: `\"handoff\"`\n- `display`: `true` (visible in TUI rebuild)\n- attribution: `\"agent\"`\n- Entry type: `custom_message` (participates in LLM context)\n\n### 6) Rebuild active agent context\n\nAfter injection:\n\n1. `buildDisplaySessionContext()` resolves message list for current leaf.\n2. `agent.replaceMessages(sessionContext.messages)` makes the injected handoff message active context.\n3. Todo phases are synchronized from the new branch.\n4. Method returns `{ document: handoffText, savedPath? }`.\n\nAt this point, the active LLM context in the new session contains the injected handoff message, not the old transcript.\n\n## Persistence model: old session vs new session\n\n### Old session\n\nHandoff generation is a oneshot request, not a visible agent turn. The generated handoff text is not appended to the old session as an assistant message.\n\nResult: the original session keeps its prior transcript unchanged except for data already persisted before handoff began.\n\n### New session\n\nAfter session reset, handoff is persisted as `custom_message` with `customType: \"handoff\"`.\n\n`buildSessionContext()` converts this entry into a runtime custom/user-context message via `createCustomMessage(...)`, so it is included in future prompts from the new session.\n\nAuto-triggered handoffs can additionally write a timestamped `handoff-*.md` artifact under the session artifacts directory when `compaction.handoffSaveToDisk` is enabled. Manual `/handoff` does not write that artifact.\n\n## Controller/UI behavior\n\n`CommandController.handleHandoffCommand` behavior:\n\n- Shows a status loader: `Generating handoff… (esc to cancel)`.\n- Calls `await session.handoff(customInstructions)`.\n- If result is `undefined`: `showError(\"Handoff cancelled\")`.\n- On success:\n - `rebuildChatFromMessages()` (loads new session context, including injected handoff)\n - invalidates status line and editor top border\n - reloads todos\n - appends success chat line: `New session started with handoff context`\n- On exception:\n - if message is `\"Handoff cancelled\"` or error name is `AbortError`: `showError(\"Handoff cancelled\")`\n - otherwise: `showError(\"Handoff failed: <message>\")`\n- Stops the loader, restores the previous Escape handler, and requests render at end.\n\nManual `/handoff` no longer streams the generated document into chat. A cancellable loader remains visible while the oneshot request runs, and the chat is rebuilt after generation completes.\n\n## Cancellation semantics\n\n### Session-level cancellation primitive\n\n`AgentSession` exposes:\n\n- `abortHandoff()` → aborts `#handoffAbortController`\n- `isGeneratingHandoff` → true while controller exists\n\nWhen this abort path is used, the abort signal is passed to `completeSimple(...)`; `handoff()` normalizes the cancellation to `Error(\"Handoff cancelled\")`, and command controller maps it to cancellation UI.\n\n### Interactive `/handoff` path\n\nThe command controller installs a temporary Escape handler for `/handoff` while the loader is visible. Pressing Escape calls `session.abortHandoff()`, which aborts the `completeSimple(...)` request through `#handoffAbortController`.\n\n## Aborted vs failed handoff\n\nCurrent UI classification:\n\n- **Aborted/cancelled**\n - `abortHandoff()` path triggers `\"Handoff cancelled\"`, or\n - thrown `AbortError`\n - UI shows `Handoff cancelled`\n- **Failed**\n - any other thrown error from `handoff()` / `generateHandoff()` / provider request path\n - UI shows `Handoff failed: ...`\n\nAdditional nuance: if generation completes but no text is returned, `handoff()` returns `undefined` and controller currently reports **cancelled**, not **failed**.\n\n## Short-session and minimum-content guardrails\n\nTwo guards prevent low-signal handoffs:\n\n- UI layer (`handleHandoffCommand`): warns and returns early for `< 2` message entries\n- Session layer (`handoff()`): throws the same condition as an error\n\nThis avoids creating a new session with empty/near-empty handoff context.\n\n## State transition summary\n\nHigh-level state flow:\n\n1. Interactive slash command intercepted.\n2. Preflight message-count guard.\n3. `#handoffAbortController` created (`isGeneratingHandoff = true`).\n4. `generateHandoff(...)` issues one `completeSimple(...)` request with live system prompt, tools, message history, and trailing handoff prompt.\n5. Assistant response text blocks are joined; tool-call blocks are discarded.\n6. If missing text → return `undefined`; if aborted → cancellation error path.\n7. If present:\n - flush old session\n - cancel async jobs\n - create new empty session with previous session as parent\n - reset runtime queues/counters\n - append `custom_message(handoff)`\n - optionally save an auto-triggered handoff document under the session artifacts directory when `compaction.handoffSaveToDisk` is enabled\n8. Controller rebuilds chat UI and announces success.\n9. `#handoffAbortController` cleared (`isGeneratingHandoff = false`).\n\n## Known assumptions and limitations\n\n- No structural validation checks that generated markdown follows the requested section format.\n- Missing generated text is reported as cancellation in controller UX.\n- Manual handoff has no streaming visibility; a cancellable loader is shown until the UI updates after generation completes.\n- Auto-triggered handoffs can write a timestamped `handoff-*.md` artifact when `compaction.handoffSaveToDisk` is enabled; write failure is logged and does not fail the handoff.\n",
|
|
27
27
|
"hermes-mcp-bridge.md": "# Coordinator MCP bridge\n\nGJC exposes a native outward MCP bridge for external coordinators:\n\n```bash\ngjc mcp-serve coordinator\n```\n\n`gjc mcp-serve hermes` is accepted as a compatibility alias for the same coordinator bridge.\n\nThe bridge is intentionally separate from GJC's client-side MCP runtime. It lets an external coordinator list sessions, start worktree/tmux-oriented sessions, queue bounded follow-up prompts, read status/tail/artifacts, handle structured questions, and write coordination reports without scraping terminal scrollback.\n\n## Core contract and adapters\n\nThe coordinator bridge is intentionally a core contract with multiple adapters, not an MCP-only or Hermes-only product direction. Hermes is one compatibility preset, not a privileged integration mode:\n\n- `packages/coding-agent/src/coordinator/contract.ts` owns transport-neutral server metadata and tool names.\n- `gjc mcp-serve coordinator` is the outward MCP adapter for external agents.\n- `gjc coordinator` is the read-only CLI/debug adapter for humans and scripts that need to inspect the same contract without starting MCP transport.\n- `gjc setup hermes` is the compatibility setup adapter that renders coordinator config and operator guidance.\n\nFuture session, turn, question, artifact, and report behavior should move toward shared coordinator core services that both MCP and CLI adapters call instead of duplicating transport-specific logic.\n\n## Coordinator setup adapter\n\nUse `gjc setup hermes` to render or install a portable MCP setup package for any controller that accepts Hermes-compatible MCP config:\n\n```bash\ngjc setup hermes --root /path/to/repo --profile my-bot --repo gajae-code\n```\n\nThe default mode is render-only and writes no files. To install into a Hermes profile:\n\n```bash\ngjc setup hermes \\\n --root /path/to/repo \\\n --profile my-bot \\\n --repo gajae-code \\\n --mutation sessions,questions,reports \\\n --profile-dir /path/to/hermes/profile \\\n --install\n```\n\nThe generated setup is model-agnostic and worktree-isolated. By default it renders `GJC_COORDINATOR_MCP_SESSION_COMMAND` as `gjc --worktree`, so spawned sessions launch inside a GJC-managed sibling worktree while GJC still records the original repo as the project identity for tmux/session resume. Users who need a stable named branch can set `--worktree-name`; users who need a specific local wrapper, dev checkout, or provider/model can opt in explicitly:\n\n```bash\ngjc setup hermes \\\n --root /path/to/repo \\\n --worktree-name hermes-gajae-code\n```\n\n```bash\ngjc setup hermes \\\n --root /path/to/repo \\\n --session-command \"gjc --worktree hermes-custom --model <provider/model>\"\n```\n\nProvider/model examples are examples only; GJC does not hard-code GPT, Anthropic, or any other provider as the Hermes bridge default.\n\nRun a non-mutating setup smoke check with:\n\n```bash\ngjc setup hermes --root /path/to/repo --smoke\n```\n\nSmoke verifies the MCP server/tool contract. It does not call a downstream LLM and does not validate provider credentials.\n\n\n## Safety model\n\nThe bridge is read-only and fail-closed by default.\n\nRequired root allowlist:\n\n```bash\nexport GJC_COORDINATOR_MCP_WORKDIR_ROOTS=\"/path/to/repo:/path/to/worktrees\"\n```\n\nMutating tools require both startup opt-in and per-call consent:\n\n```bash\nexport GJC_COORDINATOR_MCP_MUTATIONS=\"sessions,questions,reports\"\n```\n\nEvery mutating MCP call must also include `allow_mutation: true`. Missing startup opt-in or missing per-call consent returns an error instead of falling back to shell or terminal relay.\n\nReal tmux/GJC actuation uses the configured GJC-compatible session command. `gjc setup hermes` writes this as `gjc --worktree` by default so GJC owns worktree creation and resume identity:\n\n```bash\nexport GJC_COORDINATOR_MCP_SESSION_COMMAND=\"gjc --worktree\"\n```\n\nWith that command configured, `gjc_coordinator_start_session` launches a detached tmux session, `gjc_coordinator_send_prompt` creates a durable turn and sends input to that pane, `gjc_coordinator_read_coordination_status` returns a canonical polling snapshot for sessions, session states, turns, questions, reports, and bounded event summaries, and `gjc_coordinator_read_tail` reads bounded advisory pane output. Tmux tail parsing is not the completion source of truth; turn completion comes from explicit durable turn state such as runtime session state or `gjc_coordinator_report_status`.\n\nFor resume safety, prefer the generated GJC-native worktree command over creating a git worktree in Hermes itself. GJC's launch path records the original repo as the project identity while running in the worktree, so session listing/resume can still group the session under the source project. If Hermes creates and later deletes an unmanaged worktree, a saved session may still exist but its cwd can be gone.\n\nWhen an operator needs the session to stay visible in a routed tmux pane (for example a Clawhip/Hermes/OpenClaw channel that watches stale sessions and accepts follow-up prompts), use the documented visible-session fallback instead of inventing a private terminal protocol: [`docs/gjc-session-clawhip-routing.md`](./gjc-session-clawhip-routing.md). It keeps the same worktree isolation discipline while making the router, not GJC internals, own channel ids, mentions, and notification policy.\n\nArtifact reads are canonicalized, symlink escapes are rejected, and returned content is byte-capped by `GJC_COORDINATOR_MCP_ARTIFACT_BYTE_CAP`.\n\n`gjc setup hermes` renders `GJC_COORDINATOR_MCP_WORKDIR_ROOTS` with the host platform path delimiter (`:` on POSIX, `;` on Windows). Manual configs should prefer the same encoding.\n\n## Optional namespace\n\nUse namespace variables to prevent cross-profile or cross-repo enumeration:\n\n```bash\nexport GJC_COORDINATOR_MCP_PROFILE=\"team-a\"\nexport GJC_COORDINATOR_MCP_REPO=\"gajae-code\"\n```\n\nMissing namespace never widens into global session enumeration.\n\n## Tool surface\n\nRead tools:\n\n- `gjc_coordinator_list_sessions`\n- `gjc_coordinator_read_status`\n- `gjc_coordinator_read_tail`\n- `gjc_coordinator_list_questions`\n- `gjc_coordinator_list_artifacts`\n- `gjc_coordinator_read_artifact`\n- `gjc_coordinator_read_coordination_status`\n- `gjc_coordinator_read_turn`\n- `gjc_coordinator_await_turn`\n- `gjc_coordinator_watch_events`\n\n\nMutating tools:\n\n- `gjc_coordinator_start_session`\n- `gjc_coordinator_register_session`\n- `gjc_coordinator_send_prompt`\n- `gjc_coordinator_submit_question_answer`\n- `gjc_coordinator_report_status`\n\n\n`gjc_coordinator_register_session` registers an existing visible tmux-backed GJC pane as the coordinator-authoritative session. Use it when an operator has already launched a visible terminal/tmux lane and the external coordinator must send prompts to that same pane instead of creating a hidden `gjc-coordinator-*` session. The tool validates the workdir allowlist, safe session/target tokens, and tmux target liveness before writing session state.\n## Turn orchestration flow\n\nExternal coordinators should treat turns, not terminal scrollback, as the unit of work:\n\n1. Call `gjc_coordinator_start_session` with `allow_mutation: true`.\n2. Call `gjc_coordinator_send_prompt` with `allow_mutation: true`.\n3. Store the returned `turn_id`.\n4. Poll `gjc_coordinator_read_turn`, or call bounded `gjc_coordinator_await_turn`, until the turn is terminal.\n5. If `gjc_coordinator_list_questions` shows a question for that turn, answer with `gjc_coordinator_submit_question_answer`.\n6. Use `gjc_coordinator_report_status` with `session_id` and `turn_id` to write explicit completion/failure evidence.\n Use `status: \"cancelled\"` for coordinator-policy cancellation, and `status: \"failed\"` plus `blocker` for provider/tool/task failures.\n\n`gjc_coordinator_send_prompt` preserves the legacy `queued` and `delivered` fields and adds turn fields:\n\n```json\n{\n \"ok\": true,\n \"session_id\": \"gjc-coordinator-demo\",\n \"turn_id\": \"turn-00000000-0000-0000-0000-000000000000\",\n \"active_turn_id\": \"turn-00000000-0000-0000-0000-000000000000\",\n \"status\": \"active\",\n \"queued\": false,\n \"delivered\": true\n}\n```\n\nA session may have only one active turn by default. A second prompt is rejected with `active_turn_exists` unless the caller explicitly passes `queue: true` or `force: true`. Queued turns are durable and the next queued turn is promoted when the active turn reaches a terminal `gjc_coordinator_report_status`. Force supersedes the previous active turn and audits that state in the turn journal.\nCoordinator cancellation is recorded through `gjc_coordinator_report_status` with terminal `status: \"cancelled\"`; this updates durable turn state but does not kill the underlying tmux process. If the correct policy is replacement work rather than cancellation, send the replacement prompt with `force: true` so the previous active turn is superseded and audited.\n\n`gjc_coordinator_read_turn` returns the authoritative durable turn plus advisory pane status:\n\n```json\n{\n \"ok\": true,\n \"turn\": {\n \"schema_version\": 1,\n \"turn_id\": \"turn-00000000-0000-0000-0000-000000000000\",\n \"session_id\": \"gjc-coordinator-demo\",\n \"status\": \"completed\",\n \"final_response\": {\n \"text\": \"Done\",\n \"format\": \"markdown\",\n \"source\": \"report_status\",\n \"artifact_path\": null,\n \"truncated\": false\n },\n \"evidence\": [{ \"path\": \"artifact.txt\" }],\n \"error\": null\n },\n \"advisory_status\": {\n \"live\": true,\n \"state\": \"idle_or_unknown\"\n }\n}\n```\n\nThe coordinator MCP bridge is currently a durable polling/await surface. It does not expose a push subscription stream; external coordinators should poll `gjc_coordinator_read_coordination_status`, `gjc_coordinator_read_turn`, or bounded `gjc_coordinator_await_turn` instead of waiting for server-sent push events.\n\nExternal `session_id`, `turn_id`, and `question_id` values are validated before path use, and loaded records must match the requested session/turn owner.\n\n## Coordinator event journal\n\nThe bridge persists a restart-safe event journal under the configured coordinator state namespace, for example:\n\n```text\n$GJC_COORDINATOR_MCP_STATE_ROOT/<profile>/<repo>/events/event-journal.jsonl\n```\n\nEach event is a bounded JSONL record with `schema_version`, monotonic namespace-local `seq`, stable `id`, `timestamp`, canonical `kind`, optional `session_id`/`turn_id`/`question_id`/`report_id`, short `summary`, optional `payload_ref`, and bounded scalar `metadata`. Full prompts, reports, final responses, and artifacts stay in their existing turn/report/artifact read paths; event records only point at them.\n\n`gjc_coordinator_watch_events` is a bounded long-poll MCP tool, not an unbounded stream. Inputs are `after_seq` (default `0`), optional `session_id`, optional `event_types`, `timeout_ms` capped at 30000, and `limit` capped at 100. If matching events already exist after `after_seq`, it returns immediately. Otherwise it waits for the event journal to change or for timeout. The response includes `events`, `latest_seq`, `timed_out`, and `transport: { \"mcp\": \"long_poll\", \"push_subscriptions\": false }`, so coordinators can persist `latest_seq` and resume safely after restart.\n\n`gjc_coordinator_read_coordination_status` keeps its existing report fields and now also includes `latest_event_seq` plus recent event summaries for snapshot-style consumers.\n\n## Generic controller config snippet\n\n```json\n{\n \"mcp_servers\": {\n \"gjc_coordinator\": {\n \"command\": \"gjc\",\n \"args\": [\"mcp-serve\", \"coordinator\"],\n \"env\": {\n \"GJC_COORDINATOR_MCP_WORKDIR_ROOTS\": \"/path/to/repo\",\n \"GJC_COORDINATOR_MCP_PROFILE\": \"team-a\",\n \"GJC_COORDINATOR_MCP_REPO\": \"project\",\n \"GJC_COORDINATOR_MCP_SESSION_COMMAND\": \"gjc --worktree\"\n },\n \"enabled\": true\n }\n }\n}\n```\n\n## Smoke check\n\n```bash\ngjc mcp-serve coordinator --check --json\n```\n\nExpected result includes `ok: true`, server name `gjc-coordinator-mcp`, and the GJC-named tool list.\n",
|
|
28
28
|
"hotspot-map-successor.md": "# cpu-hotspot-map.json — successor pointer\n\n[`cpu-hotspot-map.json`](./cpu-hotspot-map.json) is **closed out**. All 11 CPU hotspots (H01–H11) and 5 memory hotspots (M01–M05) are resolved or rationally deferred across Optimization Suites v1 (#356), v2 (#530), and v3 (#548/#557/#558). Do **not** treat it as an open implementation backlog.\n\nThat map was a **static structural ranking** (algorithmic complexity × trigger frequency). Its `method` field records that real CPU self-time was \"to be measured by the agreed profiling corpus during optimization.\"\n\nFuture perf prioritization comes from the **profiling corpus**, not from this static map:\n\n- Evidence classes (`wallClockPhase`, `processCpuUsage`, `profilerSelfTime`, `rssMemory`, `byteParity`) and the corpus schema: see `docs/perf-profiling-corpus.md` (added with the corpus foundation).\n- Native algorithmic ports proposed for leftover hotspots are gated by [`native-ffi-optimization-policy.md`](./native-ffi-optimization-policy.md).\n\nA hotspot may be labeled `CPU-self-time confirmed` only when a `profilerSelfTime` artifact exists; v1–v3 shipped wins are otherwise classified as `covered-current`, `not-visible`, `needs-trace-coverage`, or `fallback-toggle-confirmed`.\n",
|
|
29
|
-
"keybindings.md": "# Keybindings\n\nRun `/hotkeys` inside an `gjc` session to see the active chords for your current build. The list reflects any remaps loaded from disk and any bindings added by extensions.\n\n## Customize keybindings\n\nUser remaps live in `~/.gjc/agent/keybindings.json`. The file is a JSON object whose keys are keybinding action IDs and whose values are either one chord string or an array of chord strings. It is not read from `~/.gjc/agent/config.yml`, and there is no nested `keybindings` object.\n\n```json\n{\n \"app.model.cycleForward\": \"Ctrl+P\",\n \"app.model.selectTemporary\": \"Alt+P\",\n \"app.plan.toggle\": \"Alt+Shift+P\"\n}\n```\n\nChord names are case-insensitive and use the same notation shown in the UI, such as `Ctrl+P`, `Alt+Shift+P`, `Shift+Enter`, and `Ctrl+Backspace`.\n\nSet an action to an empty array to disable it:\n\n```json\n{\n \"app.stt.toggle\": []\n}\n```\n\n## Common action IDs\n\n| Action ID | Default | Meaning |\n| --- | --- | --- |\n| `app.model.cycleForward` | `Ctrl+P` | Cycle role models forward |\n| `app.model.cycleBackward` | `Shift+Ctrl+P` | Cycle role models backward |\n| `app.model.selectTemporary` | `Alt+P` | Pick a model temporarily for this session |\n| `app.model.select` | `Ctrl+L` | Open the model selector and set roles |\n| `app.plan.toggle` | `Alt+Shift+P` | Toggle plan mode |\n| `app.history.search` | `Ctrl+R` | Search prompt history |\n| `app.tools.expand` | `Ctrl+O` | Toggle tool-output expansion |\n| `app.thinking.toggle` | `Ctrl+T` | Toggle thinking-block visibility |\n| `app.thinking.cycle` | `Shift+Tab` | Cycle thinking level |\n| `app.editor.external` | `Ctrl+G` | Edit the draft in `$VISUAL` / `$EDITOR` |\n| `app.message.followUp` | `Ctrl+Enter` | Send a follow-up message |\n| `app.message.queue` | `Alt+Enter` | Explicitly queue a message for the next turn |\n| `app.message.dequeue` | `Alt+Up` | Dequeue a queued message back into the editor |\n| `app.clipboard.copyLine` | `Alt+Shift+L` | Copy the current line |\n| `app.clipboard.copyPrompt` | `Alt+Shift+C` | Copy the whole prompt |\n| `app.stt.toggle` | `Alt+H` | Toggle speech-to-text recording |\n\nOlder unqualified action names are migrated when `keybindings.json` is loaded, but new docs and new configs should use the namespaced action IDs above.\n",
|
|
29
|
+
"keybindings.md": "# Keybindings\n\nRun `/hotkeys` inside an `gjc` session to see the active chords for your current build. The list reflects any remaps loaded from disk and any bindings added by extensions.\n\n## Customize keybindings\n\nUser remaps live in `~/.gjc/agent/keybindings.json`. The file is a JSON object whose keys are keybinding action IDs and whose values are either one chord string or an array of chord strings. It is not read from `~/.gjc/agent/config.yml`, and there is no nested `keybindings` object.\n\n```json\n{\n \"app.model.cycleForward\": \"Ctrl+P\",\n \"app.model.selectTemporary\": \"Alt+P\",\n \"app.plan.toggle\": \"Alt+Shift+P\"\n}\n```\n\nChord names are case-insensitive and use the same notation shown in the UI, such as `Ctrl+P`, `Alt+Shift+P`, `Shift+Enter`, and `Ctrl+Backspace`.\n\nSet an action to an empty array to disable it:\n\n```json\n{\n \"app.stt.toggle\": []\n}\n```\n\n## Common action IDs\n\n| Action ID | Default | Meaning |\n| --- | --- | --- |\n| `app.model.cycleForward` | `Ctrl+P` | Cycle role models forward |\n| `app.model.cycleBackward` | `Shift+Ctrl+P` | Cycle role models backward |\n| `app.model.selectTemporary` | `Alt+P` | Pick a model temporarily for this session |\n| `app.model.select` | `Ctrl+L` | Open the model selector and set roles |\n| `app.plan.toggle` | `Alt+Shift+P` | Toggle plan mode |\n| `app.history.search` | `Ctrl+R` | Search prompt history |\n| `app.tools.expand` | `Ctrl+O` | Toggle tool-output expansion |\n| `app.thinking.toggle` | `Ctrl+T` | Toggle thinking-block visibility |\n| `app.thinking.cycle` | `Shift+Tab` | Cycle thinking level |\n| `app.editor.external` | `Ctrl+G` | Edit the draft in `$VISUAL` / `$EDITOR` |\n| `app.message.followUp` | `Ctrl+Enter` | Send a follow-up message |\n| `app.message.queue` | `Alt+Enter` | Explicitly queue a message for the next turn |\n| `app.message.dequeue` | `Alt+Up` | Dequeue a queued message back into the editor |\n| `app.clipboard.copyLine` | `Alt+Shift+L` | Copy the current line |\n| `app.clipboard.copyPrompt` | `Alt+Shift+C` | Copy the whole prompt |\n| `app.stt.toggle` | `Alt+H` | Toggle speech-to-text recording |\n\nOlder unqualified action names are migrated when `keybindings.json` is loaded, but new docs and new configs should use the namespaced action IDs above.\n\n## Auditing default-key collisions\n\nSome default chords are intentionally reused across different UI contexts, where the focused component disambiguates them at dispatch time. For example `Enter` maps to both input submit and selection confirm, and `Ctrl+C` maps to both input copy and selection cancel. These are not conflicts — only one context is active at a time.\n\nTo audit the registry for keys whose default binding is claimed by more than one action, use `detectDefaultKeyCollisions(definitions)` from `@gajae-code/tui/keybindings`. It returns one entry per colliding key with the list of claiming action IDs, which is useful when adding new defaults or reviewing the surface. User-remap conflicts (multiple actions bound to the same chord in `keybindings.json`) continue to be reported separately by `KeybindingsManager.getConflicts()`.\n\nTwo audit clarifications for the current surface:\n\n- `app.clipboard.copyLine` is registry-backed and dispatched through the input controller's custom key handlers, not hardcoded.\n- `tui.input.copy` is declared in the registry but is not currently dispatched by `Editor.handleInput`.\n\nThe editor's configurable action defaults (including the platform-aware `app.clipboard.pasteImage` default) are derived directly from the central `KEYBINDINGS` registry, so there is a single source of truth for those defaults.\n\n## Current surface audit\n\nAuthoritative inventory of the keybinding registry, one row per action. Generated from `TUI_KEYBINDINGS` (`packages/tui/src/keybindings.ts`) and `KEYBINDINGS` (`packages/coding-agent/src/config/keybindings.ts`). Every action ID below is remappable via `~/.gjc/agent/keybindings.json` unless noted. A drift test (`packages/coding-agent/test/keybindings-audit.test.ts`) asserts every registry action ID appears in this table.\n\n### Editor context (`tui.editor.*`)\n\n| Action ID | Default | Notes |\n| --- | --- | --- |\n| `tui.editor.cursorUp` | `up` | |\n| `tui.editor.cursorDown` | `down` | |\n| `tui.editor.cursorLeft` | `left`, `ctrl+b` | `ctrl+b` also `app.tool.backgroundFold` (other context) |\n| `tui.editor.cursorRight` | `right`, `ctrl+f` | |\n| `tui.editor.cursorWordLeft` | `alt+left`, `ctrl+left`, `alt+b` | `ctrl+left` also `app.tree.foldOrUp` |\n| `tui.editor.cursorWordRight` | `alt+right`, `ctrl+right`, `alt+f` | `ctrl+right` also `app.tree.unfoldOrDown` |\n| `tui.editor.cursorLineStart` | `home`, `ctrl+a` | |\n| `tui.editor.cursorLineEnd` | `end`, `ctrl+e` | |\n| `tui.editor.jumpForward` | `ctrl+]` | |\n| `tui.editor.jumpBackward` | `ctrl+alt+]` | |\n| `tui.editor.pageUp` | `pageUp` | |\n| `tui.editor.pageDown` | `pageDown` | |\n| `tui.editor.deleteCharBackward` | `backspace` | |\n| `tui.editor.deleteCharForward` | `delete`, `ctrl+d` | `ctrl+d` also `app.exit` / `app.session.delete` |\n| `tui.editor.deleteWordBackward` | `ctrl+w`, `alt+backspace`, `ctrl+backspace` | |\n| `tui.editor.deleteWordForward` | `alt+delete`, `alt+d` | |\n| `tui.editor.deleteToLineStart` | `ctrl+u` | |\n| `tui.editor.deleteToLineEnd` | `ctrl+k` | |\n| `tui.editor.yank` | `ctrl+y` | |\n| `tui.editor.yankPop` | `alt+y` | |\n| `tui.editor.undo` | `ctrl+-`, `ctrl+_` | |\n\n### Input context (`tui.input.*`)\n\n| Action ID | Default | Notes |\n| --- | --- | --- |\n| `tui.input.newLine` | `shift+enter` | |\n| `tui.input.submit` | `enter` | also `tui.select.confirm` (other context) |\n| `tui.input.tab` | `tab` | |\n| `tui.input.copy` | `ctrl+c` | declared but not dispatched by `Editor.handleInput` |\n\n### Selection context (`tui.select.*`)\n\n| Action ID | Default | Notes |\n| --- | --- | --- |\n| `tui.select.up` | `up` | |\n| `tui.select.down` | `down` | |\n| `tui.select.pageUp` | `pageUp` | |\n| `tui.select.pageDown` | `pageDown` | |\n| `tui.select.confirm` | `enter` | |\n| `tui.select.cancel` | `escape`, `ctrl+c` | `escape` also `app.interrupt` |\n\n### Application context (`app.*`)\n\n| Action ID | Default | Notes |\n| --- | --- | --- |\n| `app.interrupt` | `escape` | |\n| `app.clear` | `ctrl+c` | |\n| `app.exit` | `ctrl+d` | |\n| `app.suspend` | `ctrl+z` | |\n| `app.thinking.cycle` | `shift+tab` | |\n| `app.thinking.toggle` | `ctrl+t` | |\n| `app.model.cycleForward` | `ctrl+p` | also `app.session.togglePath` (session list) |\n| `app.model.cycleBackward` | `shift+ctrl+p` | |\n| `app.model.select` | `ctrl+l` | |\n| `app.model.selectTemporary` | `alt+p` | |\n| `app.tools.expand` | `ctrl+o` | |\n| `app.tool.backgroundFold` | `ctrl+b` | |\n| `app.editor.external` | `ctrl+g` | |\n| `app.message.followUp` | `ctrl+enter` | |\n| `app.message.queue` | `alt+enter` | |\n| `app.message.dequeue` | `alt+up` | |\n| `app.clipboard.pasteImage` | `ctrl+v` (`alt+v` on win32) | platform-aware; single source of truth in `KEYBINDINGS` |\n| `app.clipboard.copyLine` | `alt+shift+l` | registry-backed via input-controller custom handler |\n| `app.clipboard.copyPrompt` | `alt+shift+c` | |\n| `app.session.new` | _(none)_ | command-driven; empty default |\n| `app.session.tree` | _(none)_ | command-driven; empty default |\n| `app.session.fork` | _(none)_ | command-driven; empty default |\n| `app.session.resume` | _(none)_ | command-driven; empty default |\n| `app.session.observe` | `ctrl+s` | also `app.session.toggleSort` (session list) |\n| `app.jobs.open` | `alt+j` | |\n| `app.session.togglePath` | `ctrl+p` | session-list context |\n| `app.session.toggleSort` | `ctrl+s` | session-list context |\n| `app.session.rename` | `ctrl+r` | also `app.history.search` |\n| `app.session.delete` | `ctrl+d` | session-list context |\n| `app.session.deleteNoninvasive` | `ctrl+backspace` | |\n| `app.tree.foldOrUp` | `ctrl+left`, `alt+left` | |\n| `app.tree.unfoldOrDown` | `ctrl+right`, `alt+right` | |\n| `app.plan.toggle` | `alt+shift+p` | |\n| `app.history.search` | `ctrl+r` | |\n| `app.stt.toggle` | `alt+h` | |\n\n### Global engine context (`tui.global.*`)\n\n| Action ID | Default | Notes |\n| --- | --- | --- |\n| `tui.global.debug` | `shift+ctrl+d` | Toggle debug overlay; resolved through the registry in `tui.ts` |\n\nCross-context default reuse (`ctrl+p`, `ctrl+s`, `ctrl+r`, `ctrl+d`, `ctrl+b`, `ctrl+left`/`ctrl+right`, `enter`, `escape`, `ctrl+c`) is intentional: each pair is active in a different focused context and is disambiguated at dispatch time. Use `detectDefaultKeyCollisions()` (above) to re-derive this list from the registry.\n\n### Not yet registry-managed\n\nA few contexts still match chords directly instead of resolving through the registry, and are tracked for a later phase:\n\n- Tree selector (`tree-selector.ts`): up/down/left/right/enter, `ctrl+c`, filter cycling (`ctrl+o` / `ctrl+shift+o`), filter modes (`alt+d/t/u/l/a`), label edit (`shift+l`).\n- Parts of the model selector.\n",
|
|
30
30
|
"lsp-config.md": "# LSP configuration in GJC\n\nThis guide explains how to configure language servers for the GJC coding agent.\n\nSource of truth in code:\n\n- Server config type: `packages/coding-agent/src/lsp/types.ts` (`ServerConfig`)\n- Config loader: `packages/coding-agent/src/lsp/config.ts`\n- Built-in server definitions: `packages/coding-agent/src/lsp/defaults.json`\n\n## Auto-detection\n\nWhen no LSP config file is present, GJC auto-detects servers by intersecting two conditions:\n\n1. The project directory contains at least one of the server's `rootMarkers`.\n2. The server binary is available — checked in project-local bin directories first (e.g., `node_modules/.bin/`, `.venv/bin/`), then `$PATH`.\n\nNo configuration is required for common setups. The built-in server list covers most popular languages; see [`defaults.json`](../packages/coding-agent/src/lsp/defaults.json) for the full set.\n\n## Config file locations\n\nGJC merges LSP config from multiple files, lowest to highest priority:\n\n| Priority | Location |\n|----------|----------|\n| 5 (lowest) | `~/lsp.json`, `~/.lsp.json`, `~/lsp.yaml`, `~/.lsp.yaml` |\n| 3 | `~/.gjc/agent/lsp.json`, `~/.gjc/agent/lsp.yaml`, `~/.gemini/lsp.*` |\n| 2 | `<project>/.gjc/lsp.json`, `<project>/.gjc/lsp.yaml`, `<project>/.gemini/lsp.*` |\n| 1 (highest) | `<project>/lsp.json`, `<project>/.lsp.json`, `<project>/lsp.yaml` |\n\nEach location accepts both `.json` and `.yaml` / `.yml` variants, as well as hidden-file versions (`.lsp.json`, `.lsp.yaml`). Files are merged in order: higher-priority files override lower-priority fields for the same server. Servers not mentioned in any override file remain at their built-in defaults.\n\n**Recommended locations:**\n\n- User-wide preferences → `~/.gjc/agent/lsp.json`\n- Project-specific overrides → `<project>/.gjc/lsp.json`\n\n> **Note:** The presence of any LSP config file disables auto-detection. When at least one file is found, GJC skips the binary-scan phase and loads all servers that have matching `rootMarkers`, an available binary, and are not explicitly `disabled`.\n\n## File shape\n\nBoth JSON and YAML are accepted. The top-level object can use either a `servers` wrapper key or a flat map directly:\n\n```json\n{\n \"servers\": {\n \"server-name\": { ... }\n },\n \"idleTimeoutMs\": 300000\n}\n```\n\nor (flat, without the `servers` wrapper):\n\n```json\n{\n \"server-name\": { ... },\n \"idleTimeoutMs\": 300000\n}\n```\n\nTop-level keys:\n\n- `servers` — map of server name to `ServerConfig` (optional wrapper; flat form is equivalent)\n- `idleTimeoutMs` — shut down idle language servers after this many milliseconds; disabled by default\n\n## ServerConfig fields\n\n| Field | Type | Required | Description |\n|-------|------|----------|-------------|\n| `command` | `string` | yes | Binary name (resolved via PATH/local bins) or absolute path |\n| `args` | `string[]` | no | Arguments passed to the binary |\n| `fileTypes` | `string[]` | yes | File extensions this server handles, e.g. `[\".ts\", \".tsx\"]` |\n| `rootMarkers` | `string[]` | yes | Files/dirs that indicate a project root; glob patterns (e.g. `*.cabal`) are supported |\n| `initOptions` | `object` | no | Sent as `initializationOptions` during LSP handshake |\n| `settings` | `object` | no | Workspace settings pushed via `workspace/didChangeConfiguration` |\n| `disabled` | `boolean` | no | Set to `true` to disable this server entirely |\n| `warmupTimeoutMs` | `number` | no | Startup timeout in ms for this server (overrides the global default) |\n| `isLinter` | `boolean` | no | Mark server as linter/formatter only; excluded from type-intelligence operations (hover, go-to-definition, etc.) |\n| `capabilities` | `object` | no | Opt-in server-specific features; see [Capabilities](#capabilities) |\n\n`resolvedCommand` is populated automatically at runtime — do not set it manually.\n\n### Capabilities\n\nThe `capabilities` object enables optional server-specific features that GJC supports on a per-server basis:\n\n```json\n{\n \"capabilities\": {\n \"flycheck\": true,\n \"ssr\": true,\n \"expandMacro\": true,\n \"runnables\": true,\n \"relatedTests\": true\n }\n}\n```\n\nAll fields are boolean and optional. They are currently used by `rust-analyzer`.\n\n## Common recipes\n\n### Override a built-in server's settings\n\nPartial overrides are merged onto the built-in defaults. You only need to specify the fields you want to change.\n\n```json\n{\n \"servers\": {\n \"typescript-language-server\": {\n \"args\": [\"--stdio\", \"--log-level\", \"4\"]\n }\n }\n}\n```\n\n```yaml\nservers:\n gopls:\n settings:\n gopls:\n gofumpt: false\n staticcheck: false\n```\n\n### Disable a built-in server\n\n```json\n{\n \"servers\": {\n \"eslint\": {\n \"disabled\": true\n }\n }\n}\n```\n\n### Register a custom server\n\nNew servers require `command`, `fileTypes`, and `rootMarkers`. All other fields are optional.\n\n```json\n{\n \"servers\": {\n \"my-lsp\": {\n \"command\": \"my-lsp-server\",\n \"args\": [\"--stdio\"],\n \"fileTypes\": [\".xyz\"],\n \"rootMarkers\": [\".xyz-project\", \".git\"]\n }\n }\n}\n```\n\n### Set a global idle timeout\n\nShut down language servers that have been inactive for more than five minutes:\n\n```json\n{\n \"idleTimeoutMs\": 300000\n}\n```\n\n### Disable a server for one project, keep it globally\n\nPlace the override in `<project>/.gjc/lsp.json`:\n\n```json\n{\n \"servers\": {\n \"pylsp\": {\n \"disabled\": true\n }\n }\n}\n```\n\nThe user-level config in `~/.gjc/agent/lsp.json` is unaffected; pylsp is only suppressed in this project.\n\n## Built-in server list\n\nThe following servers ship in `defaults.json` and are eligible for auto-detection:\n\n| Server key | Language(s) | Binary |\n|---|---|---|\n| `rust-analyzer` | Rust | `rust-analyzer` |\n| `clangd` | C, C++, ObjC | `clangd` |\n| `zls` | Zig | `zls` |\n| `gopls` | Go | `gopls` |\n| `typescript-language-server` | TypeScript, JavaScript | `typescript-language-server` |\n| `denols` | TypeScript, JavaScript (Deno) | `deno` |\n| `biome` | TS/JS/JSON (linter) | `biome` |\n| `eslint` | TS/JS/Vue/Svelte (linter) | `vscode-eslint-language-server` |\n| `vscode-html-language-server` | HTML | `vscode-html-language-server` |\n| `vscode-css-language-server` | CSS, SCSS, Less | `vscode-css-language-server` |\n| `vscode-json-language-server` | JSON | `vscode-json-language-server` |\n| `tailwindcss` | HTML, CSS, TS/JS | `tailwindcss-language-server` |\n| `svelte` | Svelte | `svelteserver` |\n| `vue-language-server` | Vue | `vue-language-server` |\n| `astro` | Astro | `astro-ls` |\n| `pyright` | Python | `pyright-langserver` |\n| `basedpyright` | Python | `basedpyright-langserver` |\n| `pylsp` | Python | `pylsp` |\n| `ruff` | Python (linter) | `ruff` |\n| `jdtls` | Java | `jdtls` |\n| `kotlin-lsp` | Kotlin | `kotlin-lsp` |\n| `metals` | Scala | `metals` |\n| `hls` | Haskell | `haskell-language-server-wrapper` |\n| `ocamllsp` | OCaml | `ocamllsp` |\n| `elixirls` | Elixir | `elixir-ls` |\n| `erlangls` | Erlang | `erlang_ls` |\n| `gleam` | Gleam | `gleam` |\n| `solargraph` | Ruby | `solargraph` |\n| `ruby-lsp` | Ruby | `ruby-lsp` |\n| `rubocop` | Ruby (linter) | `rubocop` |\n| `bashls` | Bash, Zsh | `bash-language-server` |\n| `lua-language-server` | Lua | `lua-language-server` |\n| `intelephense` | PHP | `intelephense` |\n| `phpactor` | PHP | `phpactor` |\n| `omnisharp` | C# | `omnisharp` |\n| `yamlls` | YAML | `yaml-language-server` |\n| `terraformls` | Terraform | `terraform-ls` |\n| `dockerls` | Dockerfile | `docker-langserver` |\n| `helm-ls` | Helm | `helm_ls` |\n| `nixd` | Nix | `nixd` |\n| `nil` | Nix | `nil` |\n| `ols` | Odin | `ols` |\n| `dartls` | Dart | `dart` |\n| `marksman` | Markdown | `marksman` |\n| `texlab` | LaTeX | `texlab` |\n| `graphql` | GraphQL | `graphql-lsp` |\n| `prismals` | Prisma | `prisma-language-server` |\n| `vimls` | Vim script | `vim-language-server` |\n| `emmet-language-server` | HTML, CSS, JSX | `emmet-language-server` |\n| `sourcekit-lsp` | Swift | `sourcekit-lsp` |\n| `swiftlint` | Swift (linter) | `swiftlint` |\n| `tlaplus` | TLA+ | `tlapm_lsp` |\n",
|
|
31
31
|
"memory.md": "# Autonomous Memory\n\nWhen enabled, the agent automatically extracts durable knowledge from past sessions and injects a compact summary into each new session. Over time it builds a project-scoped memory store — technical decisions, recurring workflows, pitfalls — that carries forward without manual effort.\n\nDisabled by default. Enable via `/settings` or `config.yml`:\n\n```yaml\nmemories:\n enabled: true\n```\n\n## Usage\n\n### What gets injected\n\nAt session start, if a memory summary exists for the current project, it is injected into the system prompt as a **Memory Guidance** block. The agent is instructed to:\n\n- Treat memory as heuristic context — useful for process and prior decisions, not authoritative on current repo state.\n- Pair memory-influenced decisions with current-repo evidence before acting.\n- Prefer repo state and user instruction when they conflict with memory; treat conflicting memory as stale.\n\n### Memory artifacts\n\nGenerated local-memory artifacts are private runtime state, not a public tool or URI surface. They may be summarized into the system prompt when local memory is enabled, but users and model-facing tool docs should not rely on direct `memory://` reads. The legacy internal `memory://` resolver remains only for compatibility with existing persisted guidance and is not part of the public coding harness contract; remove it after legacy local-memory prompts no longer reference it.\n### `/memory` slash command\n\n| Subcommand | Effect |\n| --------------------- | ---------------------------------------------- |\n| `view` | Show the current memory injection payload |\n| `clear` / `reset` | Delete all memory data and generated artifacts |\n| `enqueue` / `rebuild` | Force consolidation to run at next startup |\n\n## How it works\n\nMemories are built by a background pipeline that runs at startup or when manually triggered via slash command.\n\n**Phase 1 — per-session extraction:** For each past session that has changed since it was last processed, a model reads the session history and extracts durable signal: technical decisions, constraints, resolved failures, recurring workflows. Sessions that are too recent, too old, or currently active are skipped. Each extraction produces a raw memory block and a short synopsis for that session.\n\n**Phase 2 — consolidation:** After extraction, a second model pass reads all per-session extractions and produces three outputs written to disk:\n\n- `MEMORY.md` — a curated long-term memory document\n- `memory_summary.md` — the compact text injected at session start\n- `skills/` — reusable procedural playbooks, each in its own subdirectory\n\nPhase 2 uses a lease to prevent double-running when multiple processes start simultaneously. Stale skill directories from prior runs are pruned automatically.\n\nAll output is scanned for secrets before being written to disk.\n\n### Extraction behavior\n\nMemory extraction and consolidation behavior is driven by static prompt files in `packages/coding-agent/src/prompts/memories/`.\n\n| File | Purpose | Variables |\n| --------------------- | ------------------------------------------- | ------------------------------------------- |\n| `stage_one_system.md` | System prompt for per-session extraction | — |\n| `stage_one_input.md` | User-turn template wrapping session content | `{{thread_id}}`, `{{response_items_json}}` |\n| `consolidation.md` | Prompt for cross-session consolidation | `{{raw_memories}}`, `{{rollout_summaries}}` |\n| `read_path.md` | Memory guidance injected into live sessions | `{{memory_summary}}` |\n\n### Model selection\n\nMemory piggybacks on the model role system.\n\n| Phase | Role | Purpose |\n| ----------------------- | ------------------------------------------------------------------- | -------------------------------- |\n| Phase 1 (extraction) | `default` | Per-session knowledge extraction |\n| Phase 2 (consolidation) | `smol` (falls back to `default`, then current/first registry model) | Cross-session synthesis |\n\nIf the requested memory role is not configured, memory model resolution falls back to the `default` role, then the active session model, then the first model in the registry.\n\n## Configuration\n\n| Setting | Default | Description |\n| ------------------------------------- | ------- | --------------------------------------------------------- |\n| `memories.enabled` | `false` | Master switch |\n| `memories.maxRolloutAgeDays` | `30` | Sessions older than this are not processed |\n| `memories.minRolloutIdleHours` | `12` | Sessions active more recently than this are skipped |\n| `memories.maxRolloutsPerStartup` | `64` | Cap on sessions processed in a single startup |\n| `memories.summaryInjectionTokenLimit` | `5000` | Max tokens of the summary injected into the system prompt |\n\nAdditional tuning knobs (concurrency, lease durations, token budgets) are available in config for advanced use.\n\n## Key files\n\n- `packages/coding-agent/src/memories/index.ts` — pipeline orchestration, injection, slash command handling\n- `packages/coding-agent/src/memories/storage.ts` — SQLite-backed job queue and thread registry\n- `packages/coding-agent/src/prompts/memories/` — memory prompt templates\n- `packages/coding-agent/src/internal-urls/memory-protocol.ts` — legacy non-public `memory://` compatibility handler\n",
|
|
32
32
|
"models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects models for the default and agent roles\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — API key + OAuth resolution order\n- `packages/ai/src/models.ts` and `packages/ai/src/types.ts` — built-in providers/models and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.gjc/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n <provider-id>:\n # provider-level config\nequivalence:\n overrides:\n <provider-id>/<model-id>: <canonical-model-id>\n exclude:\n - <provider-id>/<model-id>\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n`equivalence` is optional and configures canonical model grouping on top of concrete provider models:\n\n- `overrides` maps an exact concrete selector (`provider/modelId`) to an official upstream canonical id\n- `exclude` opts a concrete selector out of canonical grouping\n\n## Provider-level fields\n\n```yaml\nproviders:\n my-provider:\n baseUrl: https://api.example.com/v1\n apiKey: MY_PROVIDER_API_KEY\n api: openai-completions\n headers:\n X-Team: platform\n authHeader: true\n auth: apiKey\n disableStrictTools: false # set true for Anthropic-compatible endpoints that reject the strict field\n cacheRetention: short # none | short | long; model entries and modelOverrides can override this\n discovery:\n type: ollama\n modelOverrides:\n some-model-id:\n name: Renamed model\n cacheRetention: long\n models:\n - id: some-model-id\n name: Some Model\n api: openai-completions\n reasoning: false\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 128000\n maxTokens: 16384\n headers:\n X-Model: value\n cacheRetention: none\n thinking:\n minLevel: low\n maxLevel: xhigh\n mode: effort\n defaultLevel: high\n levels: [low, medium, high, xhigh]\n compat:\n supportsStore: true\n supportsDeveloperRole: true\n supportsReasoningEffort: true\n maxTokensField: max_completion_tokens\n openRouterRouting:\n only: [anthropic]\n vercelGatewayRouting:\n order: [anthropic, openai]\n extraBody:\n gateway: m1-01\n controller: mlx\nmodelBindings:\n modelRoles:\n default: my-provider/some-model-id:high\n agentModelOverrides:\n executor: my-provider/some-model-id\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `bedrock-converse-stream`\n- `anthropic-messages`\n- `bedrock-converse-stream`\n- `google-generative-ai`\n- `google-vertex`\n- `google-gemini-cli`\n- `ollama-chat`\n- `cursor-agent`\n\n\n### First-class Azure OpenAI and Amazon Bedrock examples\n\nAzure OpenAI uses canonical OpenAI model IDs in GJC and resolves those IDs to Azure deployment names at request time. Set `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` to avoid assuming model id equals deployment name:\n\n```yaml\nproviders:\n azure-openai:\n baseUrl: https://my-resource.openai.azure.com/openai/v1\n apiKeyEnv: AZURE_OPENAI_API_KEY\n api: azure-openai-responses\n models:\n - id: gpt-4.1\n - id: o3\n```\n\n```sh\nexport AZURE_OPENAI_DEPLOYMENT_NAME_MAP='gpt-4.1=gpt-41-prod,o3=o3-reasoning-prod'\n```\n\nAmazon Bedrock uses the native `bedrock-converse-stream` transport and AWS credential chain auth. Do not put AWS access keys in `models.yml`; configure `AWS_REGION` / `AWS_PROFILE` or standard static AWS credential environment variables instead:\n\n```yaml\nproviders:\n amazon-bedrock:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com\n api: bedrock-converse-stream\n models:\n - id: us.anthropic.claude-opus-4-6-v1\n - id: anthropic.claude-3-5-sonnet-20241022-v2:0\n```\n\n### MiniMax and GLM custom provider examples\n\nFor common MiniMax and GLM/zAI setup, prefer the provider presets so the OpenAI-compatible API, base URL, env var, model id, and compatibility flags are written together:\n\n```sh\ngjc setup provider --preset minimax\ngjc setup provider --preset minimax-cn\ngjc setup provider --preset glm\n```\n\nThe same presets are available inside the TUI:\n\n```text\n/provider add --preset minimax\n/provider add --preset glm\n/provider add zai\n```\n\nPresets only write `models.yml` entries that reference documented environment variable names (`MINIMAX_CODE_API_KEY`, `MINIMAX_CODE_CN_API_KEY`, or `ZAI_API_KEY`); they do not store or validate real credentials. The GLM preset aliases (`glm`, `zai`, `z-ai`) write an OpenAI-compatible custom provider named `glm-proxy` and do not replace the first-class `zai` provider.\n\n## Model profiles (`--mpreset`)\n\nModel profiles are optional top-level `profiles:` entries in `~/.gjc/agent/models.yml`. A profile can require provider credentials before activation and can map one or more model roles; omitted roles inherit from the active defaults.\n\n> See also: [Cross-vendor role-based profiles](./multi-vendor-profiles.md) — a curated multi-vendor `profiles:` recipe and verified selector notes that build on the mechanism described here.\n\n```yaml\nprofiles:\n team-standard:\n required_providers: [openai, anthropic]\n model_mapping:\n default: openai/gpt-5.2\n executor: anthropic/claude-sonnet-4-6:medium\n architect: openai/o3:high\n planner: openai/o3:high\n critic: openai/o3:high\n```\n\n`model_mapping` keys are role names (`default`, `executor`, `architect`, `planner`, `critic`). Each role maps to exactly one model selector in the form `provider/modelId[:effort]`; comma-separated fallback chains are not supported in a single role value.\n`required_providers` is the aggregate set of providers required across the profile's mapped roles, not a per-role fallback chain.\n\nBuilt-in profiles are grouped by provider mix and tier:\n\n- `codex-{eco,medium,pro}` — all roles on `openai-codex/gpt-5.5`, differing only by per-role reasoning effort\n- `opencodego` — single OpenCode Go preset (Kimi default, DeepSeek executor/architect, Qwen planner, MiMo critic)\n- `claude-opus` — Anthropic OAuth preset centered on `claude-opus-4-8`\n- Single-provider tiers: `glm-{eco,medium,pro}`, `kimi-coding-plan-{eco,medium,pro}`, `mimo-{eco,medium,pro}`, `grok-{eco,medium,pro}`, `cursor-{eco,medium,pro}`, `minimax-{eco,medium,pro}`\n- Combos: `opus-codex` (Claude main agent with Codex support roles), `codex-opencodego` (Codex orchestrator/architect with OpenCode Go workers)\n\nThe `eco` tier favors cheaper/faster defaults, `medium` matches normal production defaults, and `pro` raises reasoning for architect, critic, and planner roles. Effort suffixes are clamped to each model's supported thinking range at preview and activation time (for example `codex-eco`'s executor `:minimal` resolves to effective `low` on `gpt-5.5`). Single-provider tiers pin each provider's current flagship (`zai/glm-5.2`, `kimi-code/kimi-k2.7-code`, `xiaomi/mimo-v2.5-pro`, `xai/grok-4.3`, `cursor/composer-1.5`, `minimax-code/minimax-m3`). User-defined profiles override built-ins by exact profile name.\n\n\nUse `gjc --mpreset <name>` to activate a profile for the current session only. Activation hard-blocks when any provider listed in `required_providers` lacks credentials. Add `--default` to persist the selected profile as `modelProfile.default` in `config.yml`, so it applies at startup:\n\n```sh\ngjc --mpreset codex-medium\ngjc --mpreset opencodego --default\n```\n\nThe `/model` command opens to a preset landing view: presets are grouped by provider with live auth marks (✓/✗), highlighting a group expands its tiers, and selecting a tier shows the full role→model preview before applying for the session or as default. Typing jumps straight to model search, and `Browse all models` opens the classic tabbed model selector. In `/login`, `Add custom provider` is the first option for configuring credentials needed by custom or profile-required providers; after a successful provider login, the matching preset is recommended automatically.\n\nMiniMax's OpenAI-compatible endpoint rejects multiple system messages and emits thinking in `reasoning_content`, so pin the public-safe compatibility fields when hand-authoring a custom provider:\n\n```yaml\nproviders:\n minimax-custom:\n baseUrl: https://api.minimax.io/v1\n apiKeyEnv: MINIMAX_API_KEY\n api: openai-completions\n compat:\n supportsStore: false\n supportsDeveloperRole: false\n supportsReasoningEffort: false\n reasoningContentField: reasoning_content\n models:\n - id: MiniMax-M2.5\n```\n\nGLM via z.ai is available as the first-class `zai` provider. For a private GLM-compatible proxy, keep secrets in an env var and disable OpenAI-only request fields as needed:\n\n```yaml\nproviders:\n glm-proxy:\n baseUrl: https://api.z.ai/api/paas/v4\n apiKeyEnv: ZAI_API_KEY\n api: openai-completions\n compat:\n supportsDeveloperRole: false\n supportsReasoningEffort: false\n models:\n - id: glm-4.6\n```\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default), `none`, or `oauth`; for `models.yml` custom models, `oauth` is accepted by schema but does not waive the `apiKey` requirement\n- `models.yml` is strict: unknown provider/model keys fail validation before provider dispatch, so stale keys such as `requestTransform` or `wireModelId` only work where this document lists them.\n- `discovery.type`: `ollama`, `llama.cpp`, or `lm-studio`\n- `cacheRetention`: `none`, `short`, or `long`; request-time options win over model/modelOverride values, then provider values, then `GJC_CACHE_RETENTION`, then the runtime default. The runtime default is `short` for most providers, but the Anthropic provider defaults to `long` (`ttl: \"1h\"`) because the ~5m default is too fragile for long-running subagent workflows. The 1h marker is only emitted on the canonical Anthropic API (`api.anthropic.com`) for models advertising `supportsLongCacheRetention`; proxies, gateways, and incapable models fall back to the default ephemeral (~5m) breakpoint. For OpenAI Responses, this controls `prompt_cache_retention` only; it does not disable `prompt_cache_key` when a stable session id exists.\n\n## OpenAI-compatible proxy configuration\n\nOpenAI-compatible proxy providers should use schema-supported provider keys first:\n\n```yaml\nproviders:\n proxy-provider:\n baseUrl: https://api.proxy.example/v1\n apiKeyEnv: PROXY_API_KEY\n api: openai-completions\n auth: apiKey\n headers:\n User-Agent: curl/8.7.1\n models:\n - id: local-gpt\n name: Local GPT\n reasoning: true\n input: [text]\n cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }\n contextWindow: 400000\n maxTokens: 128000\n```\n\nUse provider-level `headers` for proxy-required headers. Keep the provider `api` set to `openai-completions` when the proxy exposes Chat Completions-compatible `/v1/chat/completions` semantics. `auth: apiKey` sends the resolved token as bearer auth; use `auth: none` only for trusted local/no-auth endpoints.\n\n`requestTransform` and `wireModelId` remain supported for request-body shaping, but they are not needed for ordinary OpenAI-compatible proxies whose local model id is already the upstream wire id. Unknown config keys fail validation before a provider request is sent.\n\nWhen request shaping is needed:\n\n- `requestTransform.profile: openai-proxy` strips OpenAI SDK/Stainless telemetry and beta headers at final fetch time and sets a generic GJC user agent.\n- `stripHeaders` replaces the preset strip list when provided.\n- `setHeaders` is applied after stripping; use `null` to remove a header.\n- `extraBody` is shallow-merged into the JSON request body after provider compatibility fields; core transport keys such as `model`, `messages`/`input`, `stream`, `tools`, and `tool_choice` are protected and ignored.\n- Model-level `requestTransform` overrides provider-level fields and shallow-merges `setHeaders`/`extraBody`.\n- `wireModelId` changes only the upstream request body model id; local selection still uses `provider/id`.\n\n### Layofflabs-style proxy example\n\n```yaml\nproviders:\n layofflabs:\n baseUrl: https://api.layofflabs.com/v1\n apiKeyEnv: OPENAI_API_KEY\n api: openai-completions\n auth: apiKey\n headers:\n User-Agent: curl/8.7.1\n models:\n - id: gpt-5.5\n name: GPT 5.5 via Layofflabs\n reasoning: true\n thinking:\n minLevel: low\n maxLevel: xhigh\n mode: effort\n defaultLevel: high\n levels: [low, medium, high, xhigh]\n input: [text]\n cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }\n contextWindow: 400000\n maxTokens: 128000\n\nmodelBindings:\n modelRoles:\n default: layofflabs/gpt-5.5:high\n agentModelOverrides:\n executor: layofflabs/gpt-5.5:high\n```\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `headers`\n- `compat`\n- `requestTransform`\n- `disableStrictTools`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`.\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n- unknown provider, model, override, and request-transform keys fail schema validation; remove stale keys instead of relying on them being ignored.\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@gajae-code/ai`.\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`, `requestTransform`, `disableStrictTools`, `cacheRetention`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n - same `provider + id` replaces existing\n - otherwise append\n6. Load cached/runtime-discovered models (Ollama, llama.cpp, LM Studio, plus built-in provider managers), then re-apply model overrides.\n\n### Provider-model cache and static fingerprint\n\nCached per-provider model lists are persisted in the model-cache SQLite\ndatabase (schema v3) with a `static_fingerprint` column that hashes the\nstatic catalog slice merged into the row. When `resolveProviderModels`\nskips the network fetch and the fingerprint of the in-memory static\ncatalog matches the cached one, the cached rows are returned verbatim —\nthe static + dynamic merge is bypassed entirely. The fingerprint is\nmemoized per process via a WeakMap keyed by the static-models array\nreference, so repeated cold-start calls do not re-hash.\n\n## Canonical model equivalence and coalescing\n\nThe registry keeps every concrete provider model and then builds a canonical layer above them.\n\nCanonical ids are official upstream ids only, for example:\n\n- `anthropic-model-opus-4-6`\n- `anthropic-model-haiku-4-5`\n- `gpt-5.3-openai-code`\n\n### `models.yml` equivalence config\n\nExample:\n\n```yaml\nproviders:\n zenmux:\n baseUrl: https://api.zenmux.example/v1\n apiKey: ZENMUX_API_KEY\n api: openai-codex-responses\n models:\n - id: openai-code\n name: Zenmux OpenAI code\n reasoning: true\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 200000\n maxTokens: 32768\n\nequivalence:\n overrides:\n zenmux/openai-code: gpt-5.3-openai-code\n p-openai-code/openai-code: gpt-5.3-openai-code\n exclude:\n - demo/openai-code-preview\n```\n\nBuild order for canonical grouping:\n\n1. exact user override from `equivalence.overrides`\n2. bundled official-id matches from built-in model metadata\n3. conservative heuristic normalization for gateway/provider variants\n4. fallback to the concrete model's own id\n\nCurrent heuristics are intentionally narrow:\n\n- embedded upstream prefixes can be stripped when present, for example `anthropic/...` or `openai/...`\n- dotted and dashed version variants can normalize only when they map to an existing official id, for example `4.6 -> 4-6`\n- ambiguous families or versions are not merged without a bundled match or explicit override\n\n### Canonical resolution behavior\n\nWhen multiple concrete variants share a canonical id, resolution uses:\n\n1. availability and auth\n2. `config.yml` `modelProviderOrder`\n3. existing registry/provider order if `modelProviderOrder` is unset\n\nDisabled or unauthenticated providers are skipped.\n\nSession state and transcripts continue to record the concrete provider/model that actually executed the turn.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `input`, `cost`, `contextWindow`, `maxTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`, `extraBody`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-responses`\n- base URL: `OLLAMA_BASE_URL` or `http://127.0.0.1:11434`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls Ollama endpoints and normalizes discovered OpenAI-compatible models to `openai-responses`.\n\n### Implicit llama.cpp discovery\n\nIf `llama.cpp` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `llama.cpp`\n- api: `openai-responses`\n- base URL: `LLAMA_CPP_BASE_URL` or `http://127.0.0.1:8080`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls llama.cpp model endpoints and synthesizes model entries with local defaults.\n\n### Implicit LM Studio discovery\n\nIf `lm-studio` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `lm-studio`\n- api: `openai-completions`\n- base URL: `LM_STUDIO_BASE_URL` or `http://127.0.0.1:1234/v1`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery fetches models (`GET /models`) and synthesizes model entries with local defaults.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n ollama:\n baseUrl: http://127.0.0.1:11434\n api: openai-responses\n auth: none\n discovery:\n type: ollama\n\n llama.cpp:\n baseUrl: http://127.0.0.1:8080\n api: openai-responses\n auth: none\n discovery:\n type: llama.cpp\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n### Broker mode\n\nWhen `GJC_AUTH_BROKER_URL` (or `auth.broker.url`) is set, the local SQLite credential store is replaced by `RemoteAuthCredentialStore`. Layers 2 and 3 above (stored API key / OAuth in `agent.db`) are served from a broker-supplied snapshot whose `refresh` tokens are redacted; expiry triggers `POST /v1/credential/:id/refresh` on the broker rather than a local refresh.\n\n`AuthStorage.setConfigApiKey` lets a `models.yml` `apiKey` win over a broker-resolved OAuth token without overriding a runtime `--api-key`. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full broker / gateway design and env surface (`GJC_AUTH_BROKER_URL`, `GJC_AUTH_BROKER_TOKEN`, `auth.broker.url`, `auth.broker.token`).\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact canonical model id\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\nResolution precedence for exact selectors:\n\n1. exact `provider/modelId` bypasses coalescing\n2. exact canonical id resolves through the canonical index\n3. exact bare concrete id still works\n4. fuzzy and glob matching run after the exact paths\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default` plus the agent assignment targets `executor`, `architect`, `planner`, `critic`\n\nRole aliases like `pi/default` expand through `settings.modelRoles`. Each role value can also append a thinking selector such as `:minimal`, `:low`, `:medium`, or `:high`.\n\nIf a role points at another role, the target model still inherits normally and any explicit suffix on the referring role wins for that role-specific use.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `modelProviderOrder` (global canonical-provider precedence)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI code provider transport)\n\n`modelRoles` may store either:\n\n- `provider/modelId` to pin a concrete provider variant\n- a canonical id such as `gpt-5.3-openai-code` to allow provider coalescing\n\nFor `enabledModels` and CLI `--models`:\n\n- exact canonical ids expand to all concrete variants in that canonical group\n- explicit `provider/modelId` entries stay exact\n- globs and fuzzy matches still operate on concrete models\n\nGlobal `enabledModels` and `disabledProviders` entries may also be scoped to a path prefix:\n\n```yaml\nenabledModels:\n - anthropic-model-sonnet-4-5\n - path: ~/work\n models:\n - anthropic/anthropic-model-opus-4-5\ndisabledProviders:\n - ollama\n - path: ~/private\n providers:\n - anthropic\n```\n\nString entries apply everywhere. Scoped entries apply when the current working directory is the configured path or one of its subdirectories. Use `path`, `paths`, `pathPrefix`, or `pathPrefixes`; use `models` for `enabledModels`, `providers` for `disabledProviders`, or `values` for either.\n\n## `/model` and `--list-models`\n\nBoth surfaces keep provider-prefixed models visible and selectable.\n\nThey now also expose canonical/coalesced models:\n\n- `/model` includes a canonical view alongside provider tabs\n- `--list-models` prints a canonical section plus the concrete provider rows\n\nSelecting a canonical entry stores the canonical selector. Selecting a provider row stores the explicit `provider/modelId`.\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error. It is **off by default** (`contextPromotion.enabled` is `false`); opt in to enable it.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI code provider websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for Spark -> non-Spark on the same provider:\n\n```yaml\nproviders:\n openai-code:\n modelOverrides:\n gpt-5.3-openai-code-spark:\n contextPromotionTarget: openai-code/gpt-5.3-openai-code\n```\n\nThe built-in model generator also assigns this automatically for `*-spark` models when a same-provider base model exists.\n\n## Compatibility and routing fields\n\nThe `compat` block on a provider or model overrides the URL-based auto-detection in `packages/ai/src/providers/openai-completions-compat.ts`. It is validated by `OpenAICompatSchema` in `packages/coding-agent/src/config/model-registry.ts` and consumed by every `openai-completions` transport (`packages/ai/src/providers/openai-completions.ts`). The canonical type is `OpenAICompat` in `packages/ai/src/types.ts`.\n\n`models.yml` accepts the following keys (all optional; unset falls back to URL detection):\n\nRequest shaping:\n\n- `supportsStore` — emit `store: false` on requests. Default: auto (off for non-standard endpoints).\n- `supportsDeveloperRole` — use the `developer` system role for reasoning models instead of `system`. Default: auto.\n- `sendSessionHeaders` — forward the agent session id as `session_id` and `x-session-id` request headers so OpenAI-compatible relays/proxies can do session-affinity routing and reuse a server-side prompt cache. Default: `false`. Caller-set `headers`/`requestTransform` values are never overwritten.\n- `supportsUsageInStreaming` — send `stream_options: { include_usage: true }` to receive token usage on streaming responses. Default: `true`.\n- `maxTokensField` — `\"max_completion_tokens\"` or `\"max_tokens\"`. Default: auto.\n- `supportsToolChoice` — emit the `tool_choice` parameter when the caller forces a specific tool. Default: `true`. Set `false` for endpoints that 400 on `tool_choice` (e.g. DeepSeek when reasoning is on).\n- `disableReasoningOnForcedToolChoice` — drop `reasoning_effort` / OpenRouter `reasoning` whenever `tool_choice` forces a call. Default: auto (Kimi/Anthropic-fronted endpoints).\n- `extraBody` — extra top-level fields merged into every request body (gateway hints, controller selectors, etc.).\n\nReasoning / thinking:\n\n- `supportsReasoningEffort` — accept `reasoning_effort`. Default: auto (off for Grok and zAI).\n- `reasoningEffortMap` — partial map from internal effort levels (`minimal|low|medium|high|xhigh`) to provider-specific strings (e.g. DeepSeek maps `xhigh -> \"max\"`).\n- `thinkingFormat` — request shape for thinking: `\"openai\"` (`reasoning_effort`), `\"openrouter\"` (`reasoning: { effort }`), `\"zai\"` (`thinking: { type: \"enabled\" }`), `\"qwen\"` (top-level `enable_thinking`), or `\"qwen-chat-template\"` (`chat_template_kwargs.enable_thinking`). Default: `\"openai\"`.\n- `reasoningContentField` — assistant field carrying chain-of-thought: `\"reasoning_content\"`, `\"reasoning\"`, or `\"reasoning_text\"`. Default: auto.\n- `requiresReasoningContentForToolCalls` — assistant tool-call turns must round-trip the reasoning field (DeepSeek-R1, Kimi, OpenRouter when reasoning is on). Default: `false`.\n- `requiresAssistantContentForToolCalls` — assistant tool-call turns must include non-empty text content (Kimi). Default: `false`.\n\nTool / message normalization:\n\n- `requiresToolResultName` — tool-result messages need a `name` field (Mistral). Default: auto.\n- `requiresAssistantAfterToolResult` — a user message after a tool result needs an assistant turn in between. Default: auto.\n- `requiresThinkingAsText` — convert thinking blocks to text wrapped in `<thinking>` delimiters (Mistral). Default: auto.\n- `requiresMistralToolIds` — normalize tool-call ids to exactly 9 alphanumeric chars. Default: auto.\n- `supportsStrictMode` — accept the per-tool `strict` field on tool schemas. Default: conservative auto-detect per provider/baseUrl.\n- `toolStrictMode` — `\"all_strict\"` forces strict on every tool, `\"none\"` forces it off; unset keeps the existing per-tool mixed behavior.\n\nGateway routing (only applied when `baseUrl` matches the gateway):\n\n- `openRouterRouting.only` / `openRouterRouting.order` — provider routing on `openrouter.ai` (see <https://openrouter.ai/docs/provider-routing>).\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order` — provider routing on `ai-gateway.vercel.sh` (see <https://vercel.com/docs/ai-gateway/models-and-providers/provider-options>).\n\nProvider-level `compat` is the baseline; per-model `compat` is deep-merged on top, with `openRouterRouting`, `vercelGatewayRouting`, and `extraBody` merged as nested objects.\n\n### Anthropic compatibility (`anthropic-messages`)\n\nFor `anthropic-messages` models the runtime uses a separate `AnthropicCompat` shape (`packages/ai/src/types.ts`). The `models.yml` schema currently exposes only the strict-tools opt-out as a top-level provider field (see below); the remaining Anthropic-side knobs (`disableAdaptiveThinking`, `supportsEagerToolInputStreaming`, `supportsLongCacheRetention`) are set by built-in catalog metadata and are not user-configurable from `models.yml`.\n\n### Strict tool schemas (`disableStrictTools`)\n\nAnthropic's API supports a `strict` field on tool definitions that forces the model to always follow the provided schema exactly. This is enabled by default for all `anthropic-messages` providers because it guarantees schema conformance in agentic systems.\n\nThird-party providers that front the Anthropic API (AWS Bedrock, Azure, self-hosted proxies) do not always implement this field and will reject requests that include it. Set `disableStrictTools: true` at the provider level to opt out:\n\n```yaml\nproviders:\n bedrock-anthropic:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com/anthropic\n apiKey: AWS_BEARER_TOKEN\n api: anthropic-messages\n disableStrictTools: true\n models:\n - id: anthropic-model-sonnet-4-20250514\n name: Anthropic model Sonnet 4 (Bedrock)\n input: [text, image]\n contextWindow: 200000\n maxTokens: 16384\n cost:\n input: 3.00\n output: 15.00\n cacheRead: 0.30\n cacheWrite: 3.75\n```\n\n`disableStrictTools` is a provider-level flag that applies to all models in the provider.\n\nTool schemas going on the wire are normalized by the unified flow in\n`packages/ai/src/utils/schema/normalize.ts` (Google/CCA/MCP dispatchers\nplus the OpenAI strict-mode sanitize+enforce pipeline). See\n[`ai-schema-normalize.md`](./ai-schema-normalize.md) for the strict-mode\nedge cases (local `$ref` inlining, single-item `allOf` collapse,\n`anyOf`-wrapper description hoist, enum/const primitive-type inference)\nand the per-provider dispatcher mapping.\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n local-openai:\n baseUrl: http://127.0.0.1:8000/v1\n auth: none\n api: openai-completions\n models:\n - id: Qwen/Qwen2.5-Coder-32B-Instruct\n name: Qwen 2.5 Coder 32B (local)\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n anthropic-proxy:\n baseUrl: https://proxy.example.com/anthropic\n apiKey: ANTHROPIC_PROXY_API_KEY\n api: anthropic-messages\n authHeader: true\n disableStrictTools: true # if the proxy doesn't support strict tool schemas\n models:\n - id: anthropic-model-sonnet-4-20250514\n name: Anthropic model Sonnet 4 (Proxy)\n reasoning: true\n input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n openrouter:\n baseUrl: https://my-proxy.example.com/v1\n headers:\n X-Team: platform\n modelOverrides:\n anthropic/anthropic-model-sonnet-4:\n name: Sonnet 4 (Corp)\n compat:\n openRouterRouting:\n only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`. Explicit `.json` / `.jsonc` paths remain supported only when passed programmatically to `ModelRegistry`; the default user config is `~/.gjc/agent/models.yml`.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
|
|
@@ -52,6 +52,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
52
52
|
"provider-streaming-internals.md": "# Provider streaming internals\n\nThis document explains how token/tool streaming is normalized in `@gajae-code/ai`, then propagated through `@gajae-code/agent-core` and `coding-agent` session events.\n\n## End-to-end flow\n\n1. `streamSimple()` (`packages/ai/src/stream.ts`) maps generic options and dispatches to a provider stream function.\n2. Provider stream functions translate provider-native stream events into the unified `AssistantMessageEvent` sequence. Current built-ins include Anthropic, OpenAI Responses/Completions/OpenAI code/Azure Responses, Google Gemini/Gemini CLI/Vertex, Bedrock Converse, Ollama, Cursorand GitLab Duo/Kimi wrappers.\n3. Each provider pushes events into `AssistantMessageEventStream` (`packages/ai/src/utils/event-stream.ts`), which throttles delta events and exposes:\n - async iteration for incremental updates\n - `result()` for final `AssistantMessage`\n4. `agentLoop` (`packages/agent/src/agent-loop.ts`) consumes those events, mutates in-flight assistant state, and emits `message_update` events carrying the raw `assistantMessageEvent`.\n5. `AgentSession` (`packages/coding-agent/src/session/agent-session.ts`) subscribes to agent events, persists messages, and applies session behaviors (retry, compaction, TTSR, streaming-edit abort checks).\n\n## Unified stream contract in `@gajae-code/ai`\n\nAll providers emit the same shape (`AssistantMessageEvent` in `packages/ai/src/types.ts`):\n\n- `start`\n- content block lifecycle triplets:\n - text: `text_start` → `text_delta`\\* → `text_end`\n - thinking: `thinking_start` → `thinking_delta`\\* → `thinking_end`\n - tool call: `toolcall_start` → `toolcall_delta`\\* → `toolcall_end`\n- terminal event:\n - `done` with `reason: \"stop\" | \"length\" | \"toolUse\"`\n - or `error` with `reason: \"aborted\" | \"error\"`\n\n`AssistantMessageEventStream` guarantees:\n\n- final result is resolved by terminal event (`done` or `error`)\n- deltas are batched/throttled (~50ms)\n- buffered deltas are flushed before non-delta events and before completion\n\n## Delta throttling and harmonization behavior\n\n`AssistantMessageEventStream` treats `text_delta`, `thinking_delta`, and `toolcall_delta` as mergeable events:\n\n- buffered deltas are merged only when **type + contentIndex** match\n- merge keeps the latest `partial` snapshot\n- non-delta events force immediate flush\n\nThis smooths high-frequency provider streams for TUI/event consumers, but is not provider backpressure: providers still produce at full speed, while the local stream buffers.\n\n## Provider normalization details\n\n## Anthropic (`anthropic-messages`)\n\nSource: `packages/ai/src/providers/anthropic.ts`\n\nNormalization points:\n\n- `message_start` initializes usage (input/output/cache tokens)\n- `content_block_start` maps to text/thinking/toolcall starts\n- `content_block_delta` maps:\n - `text_delta` → `text_delta`\n - `thinking_delta` → `thinking_delta`\n - `input_json_delta` → `toolcall_delta`\n - `signature_delta` updates `thinkingSignature` only (no event)\n- `content_block_stop` emits corresponding `*_end`\n- `message_delta.stop_reason` maps via `mapStopReason()`\n\nTool-call argument streaming:\n\n- each tool block carries internal `partialJson`\n- every JSON delta appends to `partialJson`\n- `arguments` are reparsed on each delta via `parseStreamingJson()`\n- `toolcall_end` reparses once more, then strips `partialJson`\n\n## OpenAI Responses family (`openai-responses`, `openai-code-responses`, `azure-openai-responses`)\n\nSources: `packages/ai/src/providers/openai-responses.ts`, `openai-code-responses.ts`, and `azure-openai-responses.ts`\n\nNormalization points:\n\n- `response.output_item.added` starts reasoning/text/function-call blocks\n- reasoning summary events (`response.reasoning_summary_text.delta`) become `thinking_delta`\n- output/refusal deltas become `text_delta`\n- `response.function_call_arguments.delta` becomes `toolcall_delta`\n- `response.output_item.done` emits `thinking_end` / `text_end` / `toolcall_end`\n- `response.completed` maps status to stop reason and usage\n\nTool-call argument streaming:\n\n- same `partialJson` accumulation pattern as Anthropic\n- providers that send only `response.function_call_arguments.done` still populate final args\n- tool call IDs are normalized as `\"<call_id>|<item_id>\"`\n\n## Google Generative AI (`google-generative-ai`)\n\nSource: `packages/ai/src/providers/google.ts`\n\nNormalization points:\n\n- iterates `candidate.content.parts`\n- text parts are split into thinking vs text by `isThinkingPart(part)`\n- block transitions close previous block before starting a new one\n- `part.functionCall` is treated as a complete tool call (start/delta/end emitted immediately)\n- finish reason mapped by `mapStopReason()` from `google-shared.ts`\n\nTool-call argument streaming:\n\n- function call args arrive as structured object, not incremental JSON text\n- implementation emits one synthetic `toolcall_delta` containing `JSON.stringify(arguments)`\n- no partial JSON parser needed for Google in this path\n\n## Partial tool-call JSON accumulation and recovery\n\nShared behavior for Anthropic/OpenAI Responses uses `parseStreamingJson()` (`packages/ai/src/utils/json-parse.ts`):\n\n1. try `JSON.parse`\n2. fallback to `partial-json` parser for incomplete fragments\n3. if both fail, return `{}`\n\nImplications:\n\n- malformed or truncated argument deltas do not crash stream processing immediately\n- in-progress `arguments` may temporarily be `{}`\n- later valid deltas can recover structured arguments because parsing is retried on every append\n- final `toolcall_end` performs one more parse attempt before emission\n\n## Stop reasons vs transport/runtime errors\n\nProvider stop reasons are mapped to normalized `stopReason`:\n\n- Anthropic: `end_turn`→`stop`, `max_tokens`→`length`, `tool_use`→`toolUse`, safety/refusal cases→`error`\n- OpenAI Responses: `completed`→`stop`, `incomplete`→`length`, `failed/cancelled`→`error`\n- Google: `STOP`→`stop`, `MAX_TOKENS`→`length`, safety/prohibited/malformed-function-call classes→`error`\n\nError semantics are split in two stages:\n\n1. **Model completion semantics** (provider reported finish reason/status)\n2. **Transport/runtime failure** (network/client/parser/abort exceptions)\n\nIf provider stream throws or signals failure, each provider wrapper catches and emits terminal `error` event with:\n\n- `stopReason = \"aborted\"` when abort signal is set\n- otherwise `stopReason = \"error\"`\n- `errorMessage = formatErrorMessageWithRetryAfter(error)`\n\n## Malformed chunk / SSE parse failure behavior\n\nFor these provider paths, chunk/SSE framing is handled by vendor SDK streams (Anthropic SDK, OpenAI SDK, Google SDK). This code does not implement a custom SSE decoder here.\n\nObserved behavior in current implementation:\n\n- malformed chunk/SSE parsing at SDK level surfaces as an exception or stream `error` event\n- provider wrapper converts that into unified terminal `error` event\n- no provider-specific resume/retry inside the stream function itself\n- higher-level retries are handled in `AgentSession` auto-retry logic (message-level retry, not stream-chunk replay)\n\n## Cancellation boundaries\n\nCancellation is layered:\n\n- AI provider request: `options.signal` is passed into provider client stream call.\n- Provider wrapper: after stream loop, aborted signal forces error path (`\"Request was aborted\"`).\n- Agent loop: checks `signal.aborted` before handling each provider event and can synthesize an aborted assistant message from the latest partial.\n- Session/agent controls: `AgentSession.abort()` -> `agent.abort()` -> shared abort controller cancellation.\n\nTool execution cancellation is separate from model stream cancellation:\n\n- tool runners use `AbortSignal.any([agentSignal, steeringAbortSignal])`\n- steering interrupts can abort remaining tool execution while preserving already-produced tool results\n\n## Backpressure boundaries\n\nThere is no hard backpressure mechanism between provider SDK stream and downstream consumers:\n\n- `EventStream` uses in-memory queues with no max size\n- throttling reduces UI update rate but does not slow provider intake\n- if consumers lag significantly, queued events can grow until completion\n\nCurrent design favors responsiveness and simple ordering over bounded-buffer flow control.\n\n## How stream events surface as agent/session events\n\n`agentLoop.streamAssistantResponse()` bridges `AssistantMessageEvent` to `AgentEvent`:\n\n- on `start`: pushes placeholder assistant message and emits `message_start`\n- on block events (`text_*`, `thinking_*`, `toolcall_*`): updates last assistant message, emits `message_update` with raw `assistantMessageEvent`\n- on terminal (`done`/`error`): resolves final message from `response.result()`, emits `message_end`\n\n`AgentSession` then consumes those events for session-level behaviors:\n\n- TTSR watches `message_update.assistantMessageEvent` for `text_delta`, `thinking_delta`, and `toolcall_delta`\n- streaming edit guard inspects `toolcall_delta`/`toolcall_end` on `edit` calls and can abort early\n- persistence writes finalized messages at `message_end`\n- auto-retry examines assistant `stopReason === \"error\"` plus `errorMessage` heuristics\n\n## Unified vs provider-specific responsibilities\n\nUnified (common contract):\n\n- event shape (`AssistantMessageEvent`)\n- final result extraction (`done`/`error`)\n- delta throttling + merge rules\n- agent/session event propagation model\n\nProvider-specific (not fully abstracted):\n\n- upstream event taxonomies and mapping logic\n- stop-reason translation tables\n- tool-call ID conventions\n- reasoning/thinking block semantics and signatures\n- usage token semantics and availability timing\n- message conversion constraints per API\n\n## Implementation files\n\n- [`../../ai/src/stream.ts`](../packages/ai/src/stream.ts) — provider dispatch, option mapping, API key/session plumbing, custom API dispatch, and provider-specific credential handling.\n- [`../../ai/src/utils/event-stream.ts`](../packages/ai/src/utils/event-stream.ts) — generic stream queue + assistant delta throttling.\n- [`../../ai/src/utils/json-parse.ts`](../packages/ai/src/utils/json-parse.ts) — partial JSON parsing for streamed tool arguments.\n- [`../../ai/src/providers/anthropic.ts`](../packages/ai/src/providers/anthropic.ts) — Anthropic event translation and tool JSON delta accumulation.\n- [`../../ai/src/providers/openai-responses.ts`](../packages/ai/src/providers/openai-responses.ts), [`openai-code-responses.ts`](../packages/ai/src/providers/openai-code-responses.ts), [`azure-openai-responses.ts`](../packages/ai/src/providers/azure-openai-responses.ts) — Responses-family event translation and status mapping.\n- [`../../ai/src/providers/google.ts`](../packages/ai/src/providers/google.ts), [`google-gemini-cli.ts`](../packages/ai/src/providers/google-gemini-cli.ts), [`google-vertex.ts`](../packages/ai/src/providers/google-vertex.ts) — Gemini stream chunk-to-block translation variants.\n- [`../../ai/src/providers/google-shared.ts`](../packages/ai/src/providers/google-shared.ts) — Gemini finish-reason mapping and shared conversion rules.\n- [`../../ai/src/providers/amazon-bedrock.ts`](../packages/ai/src/providers/amazon-bedrock.ts), [`openai-completions.ts`](../packages/ai/src/providers/openai-completions.ts), [`ollama.ts`](../packages/ai/src/providers/ollama.ts), [`cursor.ts`](../packages/ai/src/providers/cursor.ts) — additional built-in stream adapters using the same event contract.\n- [`../../agent/src/agent-loop.ts`](../packages/agent/src/agent-loop.ts) — provider stream consumption and `message_update` bridging.\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — session-level handling of streaming updates, abort, retry, and persistence.\n",
|
|
53
53
|
"python-repl.md": "# Eval Tool Python Backend\n\nThis document describes the Python execution stack in `packages/coding-agent`.\nIt covers tool behavior, runner lifecycle, environment handling, execution semantics, output rendering, supported magics, and operational failure modes.\n\n## Scope and Key Files\n\n- Tool surface: `src/tools/eval.ts`\n- Session/per-call kernel orchestration: `src/eval/py/executor.ts`\n- Subprocess kernel client: `src/eval/py/kernel.ts`\n- Python wrapper / NDJSON server: `src/eval/py/runner.py`\n- Prelude helpers loaded into every kernel: `src/eval/py/prelude.py`\n- MIME bundle renderer (text + structured outputs): `src/eval/py/display.ts`\n- Interactive-mode renderer for user-triggered Python runs: `src/modes/components/eval-execution.ts`\n- Runtime/env filtering and Python resolution: `src/eval/py/runtime.ts`\n\n## What eval's Python backend is\n\nThe `eval` tool executes one or more Python cells inside a long-lived `python3` subprocess that speaks NDJSON over stdin/stdout. No Jupyter, no kernel gateway, no extra pip dependencies — a vanilla Python 3.8+ interpreter is enough. Rich `display()` output (PIL, pandas, plotly, matplotlib figures) keeps working because the wrapper reimplements the MIME-bundle dispatch that IPython previously provided.\n\nTool params:\n\n```ts\n{\n cells: Array<{ code: string; title?: string }>;\n timeout?: number; // seconds, clamped to 1..600, default 30\n reset?: boolean; // reset selected runtime before the first cell only\n}\n```\n\nThe tool is `concurrency = \"exclusive\"` for a session, so calls do not overlap.\n\n## Kernel lifecycle\n\nEach kernel is a single Python subprocess: `python -u <runner.py>`. The runner is bundled with the host binary (Bun text import), written to `~/.gjc/python-env`-adjacent tmp cache once per script-hash, and reused by every subsequent spawn.\n\nKernel startup sequence:\n\n1. Availability check (`checkPythonKernelAvailability`) — verifies that a Python interpreter resolves and runs.\n2. Spawn `python -u runner.py` with filtered env and `cwd`.\n3. Send an init request that runs `os.chdir(cwd)`, injects env entries, and adds `cwd` to `sys.path`.\n4. Execute `PYTHON_PRELUDE` (idempotent — only initializes once per process).\n\nKernel shutdown:\n\n- Send `{\"type\": \"exit\"}` over stdin.\n- Wait for process exit with `SHUTDOWN_GRACE_MS` budget.\n- Escalate to `SIGTERM` and finally `SIGKILL` if the process does not exit in time.\n\n## Wire protocol (NDJSON, host ↔ runner)\n\nOne JSON object per line, UTF-8, `\\n` terminated.\n\nHost → runner:\n\n```jsonc\n{\"id\": \"<reqId>\", \"code\": \"<source>\", \"silent\": false, \"storeHistory\": true}\n{\"type\": \"exit\"}\n```\n\nRunner → host:\n\n```jsonc\n{\"type\": \"started\", \"id\": \"<reqId>\"}\n{\"type\": \"stdout\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"stderr\", \"id\": \"<reqId>\", \"data\": \"...\"}\n{\"type\": \"display\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"result\", \"id\": \"<reqId>\", \"bundle\": {<mime>: <value>}}\n{\"type\": \"error\", \"id\": \"<reqId>\", \"ename\": \"...\", \"evalue\": \"...\", \"traceback\": [\"...\"]}\n{\"type\": \"done\", \"id\": \"<reqId>\", \"status\": \"ok\"|\"error\", \"executionCount\": N, \"cancelled\": false}\n```\n\nStatus events the prelude emits (e.g. `_emit_status(\"find\", count=…)`) ship inside display bundles under `application/x-gjc-status` so the existing TUI status renderer keeps working.\n\n## Magics\n\nThe runner's source transformer rewrites IPython-style magics to plain Python calls before parsing. Supported set:\n\n| Magic | Effect |\n| --- | --- |\n| `%pip <args>` | `python -m pip <args>` with live streaming output. Newly installed packages are evicted from `sys.modules` so the next `import` picks up the fresh install. |\n| `%cd <path>` | `os.chdir(path)` (with `~` expansion); emits status event. |\n| `%pwd` | Returns `os.getcwd()`. |\n| `%ls [path]` | Returns `sorted(os.listdir(path))`. |\n| `%env [KEY[=VAL]]` | List, read, or set env vars (matches prelude `env()` semantics). |\n| `%set_env KEY VALUE` | Set `os.environ[KEY]`. |\n| `%time <expr>` / `%timeit <expr>` | Time the expression; emits status event with elapsed ms. |\n| `%who` / `%whos` | List user-namespace names. |\n| `%reset` | Clear user globals and re-inject prelude. |\n| `%load <path>` | Read a file into a fresh cell and execute. |\n| `%run <path>` | `runpy.run_path` and merge globals back. |\n| `%%bash` / `%%sh` | Run the cell body via `bash`/`sh`. |\n| `%%capture [name]` | Run body with stdout/stderr captured into `name`. |\n| `%%timeit` | Time the cell body. |\n| `%%writefile <path>` | Write body to file. |\n| `!cmd` / `var = !cmd` | Run command via subprocess shell; returns an SList-style result with `.n` / `.s` helpers. |\n| `var = %name args` | Assignment forms work for line magics and `!cmd`. |\n\nUnknown magic names raise `NameError: UsageError: ...` inside the cell.\n\n## Session persistence semantics\n\n`python.kernelMode` controls retained kernel reuse:\n\n- `session` (default)\n - Reuses kernel sessions keyed by session file plus cwd when a session file exists; otherwise by cwd.\n - Execution is serialized per session via a queue.\n - Idle sessions are evicted after 5 minutes.\n - At most 4 sessions; oldest is evicted on overflow.\n - Heartbeat checks detect dead kernels.\n - Auto-restart allowed once; repeated crash ⇒ hard failure.\n- `per-call`\n - Spawns a fresh subprocess for each request.\n - Shuts the subprocess down after the request.\n - No cross-call state persistence.\n\n### Multi-cell behavior in a single tool call\n\nCells run sequentially in the same kernel instance for that tool call.\n\nIf an intermediate cell fails:\n\n- Earlier cell state remains in memory.\n- Tool returns a targeted error indicating which cell failed.\n- Later cells are not executed.\n\n`reset=true` only applies to the first cell execution in that call.\n\n## Environment filtering and runtime resolution\n\nEnvironment is filtered before launching the runner:\n\n- Allowlist includes core vars like `PATH`, `HOME`, locale vars, `VIRTUAL_ENV`, `PYTHONPATH`, etc.\n- Allow-prefixes: `LC_`, `XDG_`, `GJC_`\n- Denylist strips common API keys (OpenAI/Anthropic/Gemini/etc.)\n\nRuntime selection order:\n\n1. Active/located venv (`VIRTUAL_ENV`, then `<cwd>/.venv`, `<cwd>/venv`)\n2. Managed venv at `~/.gjc/python-env`\n3. `python` or `python3` on PATH\n\nWhen a venv is selected, its bin/Scripts path is prepended to `PATH`.\n\nThe runner additionally receives `PYTHONUNBUFFERED=1` and `PYTHONIOENCODING=utf-8` so streamed output reaches the host promptly.\n\n## Tool availability and mode selection\n\n`eval.py` / `eval.js` (both default `true`) plus optional `GJC_PY` override controls eval backend exposure:\n\n- Python backend only (`eval.py=true`, `eval.js=false`)\n- JavaScript backend only (`eval.py=false`, `eval.js=true`)\n- both backends\n\n`GJC_PY` accepted values:\n\n- `0` / `bash` → JavaScript backend only\n- `1` / `py` → Python backend only\n- `mix` / `both` → both backends\n\nIf Python preflight fails and `eval.js` is enabled, `eval` remains available and dispatches to JavaScript unless `language: \"python\"` is explicitly requested.\n\n## Execution flow and cancellation/timeout\n\n### Tool-level timeout\n\n`eval` timeout is in seconds, default 30, clamped to `1..600`. The tool combines caller abort signal and timeout signal with `AbortSignal.any(...)`.\n\n### Kernel execution cancellation\n\nOn abort/timeout:\n\n- The host sends `kill(\"SIGINT\")` to the runner subprocess.\n- The runner's exec-time signal handler raises `KeyboardInterrupt` inside the user code.\n- Result includes `cancelled=true`; timeout path annotates output as `Command timed out after <n> seconds`.\n- Between requests the runner installs `SIG_IGN` for SIGINT so a stray cancel does not tear down the kernel.\n\nIf a second cancel is required (runner stuck in C code), the host escalates to `SIGTERM` and the session restarts on the next call.\n\n### stdin behavior\n\nInteractive stdin is not supported. The runner does not forward `input()` prompts; user code that calls `input()` blocks until cancellation.\n\n## Output capture and rendering\n\n### Captured output classes\n\nFrom runner frames:\n\n- `stdout` / `stderr` → plain text chunks\n- `display` / `result` → rich display handling (MIME bundle)\n- `error` → traceback text\n- `application/x-gjc-status` MIME inside `display` → structured status events\n\nDisplay MIME precedence:\n\n1. `text/markdown`\n2. `text/plain`\n3. `text/html` (converted to basic markdown)\n\nAdditionally captured as structured outputs:\n\n- `application/json` → JSON tree data\n- `image/png` / `image/jpeg` → image payloads\n- `application/x-gjc-status` → status events\n\n### Matplotlib\n\nThe runner sets `MPLBACKEND=Agg` as an environ default so figures render off-screen. After every cell, `pyplot.get_fignums()` is iterated; each figure is saved to PNG, emitted as an `image/png` display, and closed.\n\n### Storage and truncation\n\nOutput is streamed through `OutputSink` and may be persisted to artifact storage. Tool results can include truncation metadata and `artifact://<id>` for full output recovery.\n\n### Renderer behavior\n\n- Tool renderer (`eval.ts`):\n - shows code-cell blocks with per-cell status\n - collapsed preview defaults to 10 lines\n - supports expanded mode for full output and richer status detail\n- Interactive renderer (`eval-execution.ts`):\n - used for user-triggered Python execution in TUI\n - collapsed preview defaults to 20 lines\n - clamps very long individual lines to 4000 chars for display safety\n - shows cancellation/error/truncation notices\n\n## Operational troubleshooting\n\n- **Python backend not available** — Check `eval.py`, `GJC_PY`, and that `python`/`python3` is on PATH. If preflight fails and `eval.js` is enabled, omit `language` or pass `language: \"js\"` to use JavaScript.\n- **No Python on PATH** — Install a system Python 3.8+ or place a venv at `~/.gjc/python-env`. `gjc setup python --check` reports the resolved interpreter.\n- **Execution hangs then times out** — Increase tool `timeout` (max 600s) if workload is legitimate. For stuck native code, cancellation triggers `SIGINT` first then escalates; the session restarts on the next request.\n- **stdin/input prompts in Python code** — `input()` is not supported; pass data programmatically.\n- **Working directory errors** — Tool validates `cwd` exists and is a directory before execution.\n\n## Relevant environment variables\n\n- `GJC_PY` — tool exposure override\n- `GJC_PYTHON_SKIP_CHECK=1` — bypass Python preflight/warm checks\n- `GJC_PYTHON_INTEGRATION=1` — enable gated integration tests that spawn a real Python\n- `GJC_PYTHON_IPC_TRACE=1` — log NDJSON frames exchanged with the runner subprocess\n",
|
|
54
54
|
"render-mermaid.md": "# RenderMermaid\n\n`RenderMermaid` is an optional built-in tool that renders Mermaid source to terminal-friendly text.\n\n## Enable it\n\nDisabled by default. Turn it on in `/settings` under **Tools → Render Mermaid**, or in `~/.gjc/agent/config.yml`:\n\n```yaml\nrenderMermaid:\n enabled: true\n```\n\n## What it does\n\n- Tool name: `render_mermaid`\n- Input: Mermaid source in the required `mermaid` field\n- Output: rendered ASCII/Unicode text, not SVG or PNG\n- Storage: when artifact storage is available, the full render is also saved as an `artifact://...`\n\nThere are no model-specific or environment-variable prerequisites. Once enabled, any model that can call built-in tools can use it.\n\n## Parameters\n\n```json\n{\n \"mermaid\": \"graph TD\\n A[Start] --> B[Stop]\",\n \"config\": {\n \"useAscii\": false,\n \"paddingX\": 2,\n \"paddingY\": 2,\n \"boxBorderPadding\": 0\n }\n}\n```\n\nAvailable `config` fields:\n\n- `useAscii` — `true` for plain ASCII, `false` for Unicode box-drawing characters (default and usually more readable)\n- `paddingX` — horizontal spacing between nodes\n- `paddingY` — vertical spacing between nodes\n- `boxBorderPadding` — inner padding inside node boxes\n\n## Current limitations\n\n`RenderMermaid` uses the `beautiful-mermaid` ASCII renderer. It works best for flowcharts and small diagrams.\n\nComplex sequence diagrams, especially with `alt` / `else` blocks, can become very wide in a terminal. That is current renderer behavior, not a provider or model configuration problem.\n\nIf a sequence diagram is hard to read:\n\n1. Keep Unicode output (`useAscii: false`)\n2. Reduce spacing with a tighter config such as `paddingX: 2`, `paddingY: 2`, `boxBorderPadding: 0`\n3. Prefer smaller sub-diagrams over one large sequence diagram\n4. Open the saved artifact if the inline preview is truncated in the TUI\n\n## Example\n\nInput:\n\n```mermaid\ngraph TD\n A[Start] --> B{Decision}\n B -->|Yes| C[Action]\n B -->|No| D[End]\n```\n\nTypical result:\n\n```text\n┌─────┐\n│Start│\n└─────┘\n │\n ▼\n┌────────┐\n│Decision│\n└────────┘\n```\n",
|
|
55
|
+
"research-plan-ledger.md": "# Research plan items and evidence ledger\n\nResearch/deep-research workflows need a planning contract that is stronger than an execution-order checklist. A plan item should name the claim under investigation, the uncertainty around it, what evidence is required, what counterexamples would falsify it, and how a verifier should handle source conflicts.\n\nThis document defines the public product-facing spike for issue #932. It intentionally avoids private operator, session, channel, and routing internals.\n\n## Research plan item schema\n\n```ts\ntype ResearchPlanConfidence = \"low\" | \"medium\" | \"high\";\n\ntype ResearchPlanItem = {\n claim: string;\n confidence: ResearchPlanConfidence;\n unknowns: string[];\n evidenceNeeded: string[];\n counterexampleQueries: string[];\n sourceConflictPolicy: string;\n dropCondition: string;\n verifierChecks: string[];\n};\n```\n\nField intent:\n\n- `claim`: The smallest claim that can survive or fail verification.\n- `confidence`: Planner's initial confidence before evidence collection.\n- `unknowns`: Known gaps the final answer must resolve or explicitly carry forward.\n- `evidenceNeeded`: Evidence workers must collect before the claim can be accepted.\n- `counterexampleQueries`: Directed search prompts for evidence that would weaken or falsify the claim.\n- `sourceConflictPolicy`: How the verifier treats conflicting sources, stale sources, or mismatched methodology.\n- `dropCondition`: The explicit condition that removes this claim from the final answer.\n- `verifierChecks`: Checklist the verifier applies before accepting the claim.\n\n## Evidence ledger schema\n\n```ts\ntype ResearchEvidenceVerdict = \"support\" | \"contradict\" | \"uncertain\";\n\ntype ResearchEvidenceEntry = {\n claim: string;\n source: string;\n confidence: ResearchPlanConfidence;\n verdict: ResearchEvidenceVerdict;\n notes?: string;\n};\n\ntype ResearchLedgerVerdict = {\n claim: string;\n finalVerdict: \"accepted\" | \"rejected\" | \"uncertain\";\n survivingSources: ResearchEvidenceEntry[];\n rejectReason?: string;\n unresolvedUnknowns: string[];\n};\n```\n\nThe ledger is claim-centric. Workers add evidence entries against plan-item claims; the verifier reduces those entries into a final verdict. Accepted claims can be cited in the final answer. Rejected claims are named with `rejectReason`. Uncertain claims are either excluded or marked explicitly as unresolved.\n\n## Ralplan/research workflow shape\n\n1. Planner emits `ResearchPlanItem[]` alongside the normal plan narrative when the task is research-heavy.\n2. Workers gather independent evidence for each item, including counterexample-oriented searches.\n3. Verifier checks contradictions, source quality, stale information, and unresolved uncertainty using the item's `verifierChecks`, `sourceConflictPolicy`, and `dropCondition`.\n4. The final answer cites accepted claims, lists rejected claims with reasons, and marks any surviving uncertainty.\n\n## Example\n\n```ts\nconst item: ResearchPlanItem = {\n claim: \"Model X reduces latency by 30% on production-like workloads\",\n confidence: \"medium\",\n unknowns: [\"production workload mix\"],\n evidenceNeeded: [\"benchmark with production-like fixture\", \"baseline comparison\"],\n counterexampleQueries: [\"regression on long-context workload\", \"cold-start latency increase\"],\n sourceConflictPolicy: \"Reject the claim when any credible counterexample contradicts the benchmark.\",\n dropCondition: \"Drop if a counterexample contradicts the claim or key unknowns remain unresolved.\",\n verifierChecks: [\"check source freshness\", \"compare benchmark harness\", \"inspect counterexample evidence\"],\n};\n```\n\nIf the ledger contains a supporting benchmark and a credible long-context counterexample, the verifier rejects the broad claim instead of letting a plausible summary survive by vibes.\n\n## Current spike\n\nThe first implementation spike lives in `packages/coding-agent/src/research-plan/ledger.ts` and provides:\n\n- TypeScript interfaces for research plan items, evidence entries, and final verdicts.\n- Validators for product-facing plan/evidence objects.\n- A deterministic verifier helper that rejects plausible claims when counterexample/source-conflict/drop-condition evidence applies.\n- Regression tests in `packages/coding-agent/test/research-plan-ledger.test.ts`.\n\nFuture runtime integration can make `/skill:ralplan` emit these structures as a fenced JSON block or structured sidecar in the persisted ralplan artifact. The spike keeps the schema independent from private session state so it can be exposed in docs and tests safely.\n",
|
|
55
56
|
"resolve-tool-runtime.md": "# Resolve tool runtime internals\n\nThis document explains how preview/apply workflows are modeled in coding-agent and how built-in or custom tools can participate via the tool-choice queue and `pushPendingAction`.\n\n## Scope and key files\n\n- [`src/tools/resolve.ts`](../packages/coding-agent/src/tools/resolve.ts)\n- [`src/tools/ast-edit.ts`](../packages/coding-agent/src/tools/ast-edit.ts)\n- [`src/extensibility/custom-tools/types.ts`](../packages/coding-agent/src/extensibility/custom-tools/types.ts)\n- [`src/extensibility/custom-tools/loader.ts`](../packages/coding-agent/src/extensibility/custom-tools/loader.ts)\n- [`src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n\n## What `resolve` does\n\n`resolve` is a hidden tool that finalizes a pending preview action.\n\n- `action: \"apply\"` executes the queued action's `apply(reason)` callback and returns that result with resolve metadata.\n- `action: \"discard\"` invokes `reject(reason)` if provided; otherwise returns `Discarded: <label>. Reason: <reason>`.\n\nIf no pending action exists, `resolve` fails with:\n\n- `No pending action to resolve. Nothing to apply or discard.`\n\n## Pending actions use the tool-choice queue\n\nPreview producers call `queueResolveHandler(...)`, which pushes a one-shot forced `resolve` directive onto the session tool-choice queue and adds a `resolve-reminder` steering message.\n\nRuntime behavior:\n\n- the queued handler owns the pending `apply`/`reject` callbacks,\n- `resolve` looks up the current queue invoker with `session.peekQueueInvoker()`,\n- if the model rejects the forced tool choice, the queue directive is requeued,\n- `resolve` does not maintain a separate pending-action stack.\n\nMultiple pending previews therefore follow the active tool-choice queue ordering, not an independent pending-action store.\n\n## Built-in producer example (`ast_edit`)\n\n`ast_edit` previews structural replacements first. When the preview has replacements and is not applied yet, it queues a resolve handler that contains:\n\n- label (human-readable summary)\n- `sourceToolName` (`ast_edit`)\n- `apply(reason: string)` callback that reruns AST edit with `dryRun: false`\n\n`resolve(action=\"apply\", reason=\"...\")` passes `reason` into this callback.\n\n## Custom tools: `pushPendingAction`\n\nCustom tools can register resolve-compatible pending actions through `CustomToolAPI.pushPendingAction(...)`. The custom tool loader forwards these actions to `queueResolveHandler(...)` when that hook is available.\n\n`CustomToolPendingAction`:\n\n- `label: string` (required)\n- `apply(reason: string): Promise<AgentToolResult<unknown>>` (required) — invoked on apply; `reason` is the string passed to `resolve`\n- `reject?(reason: string): Promise<AgentToolResult<unknown> | undefined>` (optional) — invoked on discard; return value replaces the default \"Discarded\" message if provided\n- `details?: unknown` exists on the public custom-tool type but is not currently forwarded by the loader into resolve metadata\n- `sourceToolName?: string` (optional, defaults to `\"custom_tool\"`)\n\n### Minimal usage example\n\n```ts\nimport type { CustomToolFactory } from \"@gajae-code/coding-agent\";\n\nconst factory: CustomToolFactory = (pi) => ({\n name: \"batch_rename_preview\",\n label: \"Batch Rename Preview\",\n description: \"Previews renames and defers commit to resolve\",\n parameters: pi.zod.object({\n files: pi.zod.array(pi.zod.string()),\n }),\n\n async execute(_toolCallId, params) {\n const previewSummary = `Prepared rename plan for ${params.files.length} files`;\n\n pi.pushPendingAction({\n label: `Batch rename: ${params.files.length} files`,\n sourceToolName: \"batch_rename_preview\",\n apply: async (reason) => {\n // apply writes here\n return {\n content: [\n { type: \"text\", text: `Applied batch rename. Reason: ${reason}` },\n ],\n };\n },\n reject: async (reason) => {\n // optional: cleanup or notify on discard\n return {\n content: [\n { type: \"text\", text: `Discarded batch rename. Reason: ${reason}` },\n ],\n };\n },\n });\n\n return {\n content: [\n {\n type: \"text\",\n text: `${previewSummary}. Call resolve to apply or discard.`,\n },\n ],\n };\n },\n});\n\nexport default factory;\n```\n\n## Runtime availability and failures\n\n`pushPendingAction` is wired by the custom tool loader through the active session's resolve queue hook.\n\nIf the runtime did not provide the resolve queue hook, `pushPendingAction` throws:\n\n- `Pending action store unavailable for custom tools in this runtime.`\n\n## Tool-choice behavior\n\nWhen `queueResolveHandler(...)` registers a preview, the agent runtime forces a one-shot `resolve` tool choice so pending previews are explicitly finalized before normal tool flow continues.\n\n## Developer guidance\n\n- Use pending actions only for destructive or high-impact operations that should support explicit apply/discard.\n- Keep `label` concise and specific; it is shown in resolve renderer output.\n- Ensure `apply(reason)` is deterministic and idempotent enough for one-shot execution; `reason` is informational and should not change behavior.\n- Implement `reject(reason)` when the discard needs cleanup (temp state, locks, notifications); omit it for stateless previews where the default message suffices.\n- If your tool can stage multiple previews, remember they are mediated by the tool-choice queue rather than a separate pending-action stack.\n",
|
|
56
57
|
"rpc.md": "# RPC Protocol Reference\n\nRPC mode runs the coding agent as a newline-delimited JSON protocol over stdio.\n\n- **stdin**: commands (`RpcCommand`), `workflow_gate_response`, extension UI responses, and host-tool updates/results\n- **stdout**: a ready frame, command responses (`RpcResponse`), session/agent events, `workflow_gate`, extension UI requests, host-tool requests/cancellations\n\nPrimary implementation:\n\n- `src/modes/rpc/rpc-mode.ts`\n- `src/modes/rpc/rpc-types.ts`\n- `src/session/agent-session.ts`\n- `packages/agent/src/agent.ts`\n- `packages/agent/src/agent-loop.ts`\n\n## Startup\n\n```bash\ngjc --mode rpc [regular CLI options]\n```\n\nBehavior notes:\n\n- `@file` CLI arguments are rejected in RPC mode.\n- RPC mode disables automatic session title generation by default to avoid an extra model call.\n- RPC mode resets workflow-altering `todo.*`, `task.*`, `async.*`, and `bash.autoBackground.*` settings to their built-in defaults instead of inheriting user overrides.\n- The process reads stdin as JSONL (`readJsonl(Bun.stdin.stream())`).\n- At startup it writes `{ \"type\": \"ready\" }` before processing commands.\n- When stdin closes, pending host-tool calls are rejected and the process exits with code `0`.\n- Responses/events are written as one JSON object per line.\n\n## Transport and Framing\n\nEach frame is a single JSON object followed by `\\n`.\n\nAgent session events are wrapped in canonical `event` frames. Ready, response, workflow gate, extension UI/error, host tool, and host URI frames remain flat.\n\n### Outbound frame categories (stdout)\n\n1. Ready frame (`{ type: \"ready\" }`)\n2. `RpcResponse` (`{ type: \"response\", ... }`)\n3. Canonical event frames wrapping `AgentSessionEvent` objects (`{ type: \"event\", ... }`)\n4. `RpcWorkflowGateEvent` (`{ type: \"workflow_gate\", ... }`)\n5. `RpcExtensionUIRequest` (`{ type: \"extension_ui_request\", ... }`)\n6. Host tool requests/cancellations (`host_tool_call`, `host_tool_cancel`)\n7. Host URI requests/cancellations (`host_uri_request`, `host_uri_cancel`)\n8. Extension errors (`{ type: \"extension_error\", extensionPath, event, error }`)\n\n### Inbound frame categories (stdin)\n\n1. `RpcCommand`\n2. `RpcWorkflowGateResponse` (`{ type: \"workflow_gate_response\", gate_id, answer }`)\n3. `RpcExtensionUIResponse` (`{ type: \"extension_ui_response\", ... }`)\n4. Host tool updates/results (`host_tool_update`, `host_tool_result`)\n5. Host URI results (`host_uri_result`)\n\n## Request/Response Correlation\n\nAll commands accept optional `id?: string`.\n\n- If provided, normal command responses echo the same `id`.\n- `RpcClient` relies on this for pending-request resolution.\n\nImportant edge behavior from runtime:\n\n- Unknown command responses are emitted with `id: undefined` (even if the request had an `id`).\n- Parse/handler exceptions in the input loop emit `command: \"parse\"` with `id: undefined`.\n- `prompt` and `abort_and_prompt` return immediate success, then may emit a later error response with the **same** id if async prompt scheduling fails.\n\n## Command Schema (canonical)\n\n`RpcCommand` is defined in `src/modes/rpc/rpc-types.ts`:\n\n### Prompting\n\n- `{ id?, type: \"prompt\", message: string, images?: ImageContent[], streamingBehavior?: \"steer\" | \"followUp\" }`\n- `{ id?, type: \"steer\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"follow_up\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"abort\" }`\n- `{ id?, type: \"abort_and_prompt\", message: string, images?: ImageContent[] }`\n- `{ id?, type: \"new_session\", parentSession?: string }`\n\n### State\n\n- `{ id?, type: \"get_state\", include?: (\"tools\" | \"dumpTools\" | \"systemPrompt\")[] }` (`dumpTools` is accepted as an alias for the older response field name.)\n- `{ id?, type: \"set_todos\", phases: TodoPhase[] }`\n- `{ id?, type: \"set_host_tools\", tools: RpcHostToolDefinition[] }`\n- `{ id?, type: \"set_host_uri_schemes\", schemes: RpcHostUriSchemeDefinition[] }`\n- `{ id?, type: \"workflow_gate_response\", gate_id: string, answer: unknown }`\n\n### Model\n\n- `{ id?, type: \"set_model\", provider: string, modelId: string }`\n- `{ id?, type: \"cycle_model\" }`\n- `{ id?, type: \"get_available_models\" }`\n\n### Thinking\n\n- `{ id?, type: \"set_thinking_level\", level: ThinkingLevel }`\n- `{ id?, type: \"cycle_thinking_level\" }`\n\n### Queue modes\n\n- `{ id?, type: \"set_steering_mode\", mode: \"all\" | \"one-at-a-time\" }`\n- `{ id?, type: \"set_follow_up_mode\", mode: \"all\" | \"one-at-a-time\" }`\n- `{ id?, type: \"set_interrupt_mode\", mode: \"immediate\" | \"wait\" }`\n\n### Compaction\n\n- `{ id?, type: \"compact\", customInstructions?: string }`\n- `{ id?, type: \"set_auto_compaction\", enabled: boolean }`\n\n### Retry\n\n- `{ id?, type: \"set_auto_retry\", enabled: boolean }`\n- `{ id?, type: \"abort_retry\" }`\n\n### Bash\n\n- `{ id?, type: \"bash\", command: string }`\n- `{ id?, type: \"abort_bash\" }`\n\n### Session\n\n- `{ id?, type: \"get_session_stats\" }`\n- `{ id?, type: \"export_html\", outputPath?: string }`\n- `{ id?, type: \"switch_session\", sessionPath: string }`\n- `{ id?, type: \"branch\", entryId: string }`\n- `{ id?, type: \"get_branch_messages\" }`\n- `{ id?, type: \"get_last_assistant_text\" }`\n- `{ id?, type: \"set_session_name\", name: string }`\n\n### Messages\n\n- `{ id?, type: \"get_messages\" }`\n\n## Response Schema\n\nAll command results use `RpcResponse`:\n\n- Success: `{ id?, type: \"response\", command: <command>, success: true, data?: ... }`\n- Failure: `{ id?, type: \"response\", command: string, success: false, error: string | object }`; typed control-plane failures use object-valued errors such as `{ \"code\": \"scope_denied\", ... }`.\n\nData payloads are command-specific and defined in `rpc-types.ts`.\n\n\nBy default, `get_state` omits large static fields. Request `include: [\"tools\"]` to include `dumpTools`, `include: [\"systemPrompt\"]` to include `systemPrompt`, or both when a host needs a one-shot full session dump.\n### `get_state` payload\n\n```json\n{\n \"model\": { \"provider\": \"...\", \"id\": \"...\" },\n \"thinkingLevel\": \"off|minimal|low|medium|high|xhigh\",\n \"isStreaming\": false,\n \"isCompacting\": false,\n \"steeringMode\": \"all|one-at-a-time\",\n \"followUpMode\": \"all|one-at-a-time\",\n \"interruptMode\": \"immediate|wait\",\n \"sessionFile\": \"...\",\n \"sessionId\": \"...\",\n \"sessionName\": \"...\",\n \"autoCompactionEnabled\": true,\n \"messageCount\": 0,\n \"queuedMessageCount\": 0,\n \"todoPhases\": [\n {\n \"id\": \"phase-1\",\n \"name\": \"Todos\",\n \"tasks\": [\n {\n \"id\": \"task-1\",\n \"content\": \"Map the tool surface\",\n \"status\": \"in_progress\"\n }\n ]\n }\n ],\n \"contextUsage\": {\n \"tokens\": 0,\n \"contextWindow\": 200000,\n \"percent\": 0\n }\n // Optional with include: [\"systemPrompt\"]:\n // \"systemPrompt\": [\"...\"],\n // Optional with include: [\"tools\"] (or [\"dumpTools\"]):\n // \"dumpTools\": [\n // { \"name\": \"read\", \"description\": \"Read files and URLs\", \"parameters\": {} }\n // ]\n}\n```\n\n### `set_todos` payload\n\nReplaces the in-memory todo state for the current session and returns the normalized phase list:\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"set_todos\",\n \"phases\": [\n {\n \"id\": \"phase-1\",\n \"name\": \"Evaluation\",\n \"tasks\": [\n {\n \"id\": \"task-1\",\n \"content\": \"Map the read tool surface\",\n \"status\": \"in_progress\"\n },\n {\n \"id\": \"task-2\",\n \"content\": \"Exercise edit operations\",\n \"status\": \"pending\"\n }\n ]\n }\n ]\n}\n```\n\nThis is useful for hosts that want to pre-seed a plan before the first prompt.\n\n### `set_host_tools` payload\n\nReplaces the current set of host-owned tools that the RPC server may call back\ninto over stdio:\n\n```json\n{\n \"id\": \"req_3\",\n \"type\": \"set_host_tools\",\n \"tools\": [\n {\n \"name\": \"echo_host\",\n \"label\": \"Echo Host\",\n \"description\": \"Echo a value from the embedding host\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"message\": { \"type\": \"string\" }\n },\n \"required\": [\"message\"],\n \"additionalProperties\": false\n }\n }\n ]\n}\n```\n\nThe response payload is:\n\n```json\n{\n \"toolNames\": [\"echo_host\"]\n}\n```\n\nThese tools are added to the active session tool registry before the next model\ncall. Re-sending `set_host_tools` replaces the previous host-owned set.\n\n### `set_host_uri_schemes` payload\n\nReplaces the current set of host-owned URL schemes the RPC server should\ndispatch reads/writes through:\n\n```json\n{\n \"id\": \"req_4\",\n \"type\": \"set_host_uri_schemes\",\n \"schemes\": [\n {\n \"scheme\": \"db\",\n \"description\": \"Virtual db row files\",\n \"writable\": true,\n \"immutable\": false\n }\n ]\n}\n```\n\nThe response payload is:\n\n```json\n{\n \"schemes\": [\"db\"]\n}\n```\n\nSchemes are case-insensitive on the wire and normalized to lowercase before\nthe response is sent. Re-sending `set_host_uri_schemes` replaces the entire\nprevious set — schemes missing from the new list are unregistered.\n\n## Event Stream Schema\n\nRPC mode forwards `AgentSessionEvent` objects from `AgentSession.subscribe(...)` as canonical `event` frames:\n\n```json\n{\n \"type\": \"event\",\n \"protocol_version\": 2,\n \"session_id\": \"...\",\n \"seq\": 1,\n \"frame_id\": \"...\",\n \"payload\": {\n \"event_type\": \"agent_start\",\n \"event\": { \"type\": \"agent_start\" }\n }\n}\n```\n\n`seq` is monotonic per session starting at `1`. `payload.event_type` duplicates the inner event `type` for routing, and `payload.event` contains the original `AgentSessionEvent` fields.\n\nCommon inner event types:\n\n- `agent_start`, `agent_end`\n- `turn_start`, `turn_end`\n- `message_start`, `message_update`, `message_end`\n- `tool_execution_start`, `tool_execution_update`, `tool_execution_end`\n- `auto_compaction_start`, `auto_compaction_end`\n- `auto_retry_start`, `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n- `todo_auto_clear`\n\nNon-event stdout categories remain flat: `ready`, `response`, `workflow_gate`, `extension_ui_request`, `extension_error`, `host_tool_call`, `host_tool_cancel`, `host_uri_request`, and `host_uri_cancel`.\n\n`message_update` includes streaming deltas in the inner event's `assistantMessageEvent` (text/thinking/toolcall deltas).\n\nExtension runner errors are emitted separately as flat frames:\n\n```json\n{\n \"type\": \"extension_error\",\n \"extensionPath\": \"...\",\n \"event\": \"...\",\n \"error\": \"...\"\n}\n```\n\n## Prompt/Queue Concurrency and Ordering\n\nThis is the most important operational behavior.\n\n### Immediate ack vs completion\n\n`prompt` and `abort_and_prompt` are **acknowledged immediately**:\n\n```json\n{ \"id\": \"req_1\", \"type\": \"response\", \"command\": \"prompt\", \"success\": true }\n```\n\nThat means:\n\n- command acceptance != run completion\n- final completion is observed via `agent_end`\n\n### While streaming\n\n`AgentSession.prompt()` requires `streamingBehavior` during active streaming:\n\n- `\"steer\"` => queued steering message (interrupt path)\n- `\"followUp\"` => queued follow-up message (post-turn path)\n\nIf omitted during streaming, prompt fails.\n\n### Queue defaults\n\nFrom `packages/agent/src/agent.ts` defaults:\n\n- `steeringMode`: `\"one-at-a-time\"`\n- `followUpMode`: `\"one-at-a-time\"`\n- `interruptMode`: `\"immediate\"`\n\n### Mode semantics\n\n- `set_steering_mode` / `set_follow_up_mode`\n - `\"one-at-a-time\"`: dequeue one queued message per turn\n - `\"all\"`: dequeue entire queue at once\n- `set_interrupt_mode`\n - `\"immediate\"`: tool execution checks steering between tool calls; pending steering can abort remaining tool calls in the turn\n - `\"wait\"`: defer steering until turn completion\n\n## Workflow Gate Sub-Protocol\n\nInteractive workflow stages emit a machine-addressable gate frame before the legacy extension UI request:\n\n```json\n{\n \"type\": \"workflow_gate\",\n \"gate_id\": \"wg_4845_ralplan_000001\",\n \"stage\": \"ralplan\",\n \"kind\": \"approval\",\n \"schema\": { \"type\": \"string\", \"enum\": [\"approve\", \"request-changes\", \"reject\"] },\n \"schema_hash\": \"<sha256 of canonical schema>\",\n \"options\": [{ \"value\": \"approve\", \"label\": \"Approve execution\" }],\n \"context\": { \"title\": \"Approve plan?\", \"summary\": \"…\" },\n \"created_at\": \"2026-06-05T05:00:00.000Z\",\n \"required\": true\n}\n```\n\nFields:\n\n- `gate_id`: run-scoped, monotonic, stable id of the form `wg_<run>_<stage>_NNNNNN`.\n- `stage`: one of `\"deep-interview\"`, `\"ralplan\"`, or `\"ultragoal\"` (`team` is reserved and rejected for v1).\n- `kind`: one of `\"question\"`, `\"approval\"`, or `\"execution\"`.\n- `schema`: documented JSON Schema 2020-12 subset for the expected answer; `schema_hash` is the canonical hash of `schema` and equals the server-side validation hash.\n- `options`: optional `RpcWorkflowGateOption[]` (`{ value, label, description? }`), emitted for select-style gates.\n- `context`: `RpcWorkflowGateContext` (`title`, `prompt`, `summary`, `stage_state`, `artifact_refs`, `language`).\n- `created_at`: ISO timestamp the gate was opened; `required` is always `true`.\n\nHosts answer with:\n\n```json\n{ \"id\": \"resp_1\", \"type\": \"workflow_gate_response\", \"gate_id\": \"wg_4845_ralplan_000001\", \"answer\": \"approve\" }\n```\n\nA valid answer resolves the pending gate and returns:\n\n```json\n{ \"id\": \"resp_1\", \"type\": \"response\", \"command\": \"workflow_gate_response\", \"success\": true }\n```\n\nA schema mismatch is **not** a command failure: the response succeeds and the\nresolution data carries `status: \"rejected\"` plus a typed validation `error`\nwith code `invalid_workflow_gate_answer`:\n\n```json\n{\n \"id\": \"resp_1\",\n \"type\": \"response\",\n \"command\": \"workflow_gate_response\",\n \"success\": true,\n \"data\": {\n \"gate_id\": \"wg_1\",\n \"status\": \"rejected\",\n \"answer_hash\": \"…\",\n \"resolved_at\": \"…\",\n \"error\": {\n \"code\": \"invalid_workflow_gate_answer\",\n \"gate_id\": \"wg_1\",\n \"schema_hash\": \"…\",\n \"errors\": [{ \"path\": \"/answer\", \"keyword\": \"type\", \"message\": \"must be boolean\" }]\n }\n }\n}\n```\n\nAnswering a gate that does not exist is a recoverable command failure carrying\nthe broker error code `unknown_gate` (other broker codes are `already_resolved`,\n`idempotency_conflict`, and `invalid_workflow_stage`).\n## Extension UI Sub-Protocol\n\nExtensions in RPC mode use request/response UI frames.\n\n### Outbound request\n\n`RpcExtensionUIRequest` (`type: \"extension_ui_request\"`) methods:\n\n- `select`, `confirm`, `input`, `editor`, `cancel`\n- `notify`, `setStatus`, `setWidget`, `setTitle`, `set_editor_text`\n\nRuntime note:\n\n- Automatic session title generation is disabled in RPC mode, and `setTitle` UI\n requests are also suppressed by default because most hosts do not have a\n meaningful terminal-title surface. Set `GJC_RPC_EMIT_TITLE=1` to opt back in to\n the UI event only.\n\nExample:\n\n```json\n{\n \"type\": \"extension_ui_request\",\n \"id\": \"123\",\n \"method\": \"confirm\",\n \"title\": \"Confirm\",\n \"message\": \"Continue?\",\n \"timeout\": 30000\n}\n```\n\n### Inbound response\n\n`RpcExtensionUIResponse` (`type: \"extension_ui_response\"`):\n\n- `{ type: \"extension_ui_response\", id: string, value: string }`\n- `{ type: \"extension_ui_response\", id: string, confirmed: boolean }`\n- `{ type: \"extension_ui_response\", id: string, cancelled: true, timedOut?: boolean }`\n\nIf a dialog has a timeout, RPC mode resolves to a default value when timeout/abort fires.\n\n## Host Tool Sub-Protocol\n\nRPC hosts can expose custom tools to the agent by sending `set_host_tools`, then\nserving execution requests over the same transport.\n\n### Outbound request\n\nWhen the agent wants the host to execute one of those tools, RPC mode emits:\n\n```json\n{\n \"type\": \"host_tool_call\",\n \"id\": \"host_1\",\n \"toolCallId\": \"toolu_123\",\n \"toolName\": \"echo_host\",\n \"arguments\": { \"message\": \"hello\" }\n}\n```\n\nIf the tool execution is later aborted, RPC mode emits:\n\n```json\n{\n \"type\": \"host_tool_cancel\",\n \"id\": \"host_cancel_1\",\n \"targetId\": \"host_1\"\n}\n```\n\n### Inbound updates and completion\n\nHosts can optionally stream progress:\n\n```json\n{\n \"type\": \"host_tool_update\",\n \"id\": \"host_1\",\n \"partialResult\": {\n \"content\": [{ \"type\": \"text\", \"text\": \"working\" }]\n }\n}\n```\n\nCompletion uses:\n\n```json\n{\n \"type\": \"host_tool_result\",\n \"id\": \"host_1\",\n \"result\": {\n \"content\": [{ \"type\": \"text\", \"text\": \"done\" }]\n }\n}\n```\n\nSet top-level `isError: true` on `host_tool_result` to reject the pending host tool call and surface the returned text content as a tool error.\n\n## Host URI Sub-Protocol\n\nRPC hosts can also own custom URL schemes (virtual files). After\n`set_host_uri_schemes`, every read of `<scheme>://…` and write of\n`<scheme>://…` (when registered as `writable`) is bounced back to the host\nover the same transport.\n\n### Outbound request\n\nWhen a session tool resolves a host-owned URL, RPC mode emits:\n\n```json\n{\n \"type\": \"host_uri_request\",\n \"id\": \"uri_1\",\n \"operation\": \"read\",\n \"url\": \"db://users/42\"\n}\n```\n\nWrites look the same with `\"operation\": \"write\"` and an additional\n`\"content\": \"...\"` field carrying the full replacement bytes.\n\nIf the request is later aborted (caller cancels, session ends), RPC mode\nemits:\n\n```json\n{\n \"type\": \"host_uri_cancel\",\n \"id\": \"uri_cancel_1\",\n \"targetId\": \"uri_1\"\n}\n```\n\n### Inbound result\n\nFor successful reads:\n\n```json\n{\n \"type\": \"host_uri_result\",\n \"id\": \"uri_1\",\n \"content\": \"id=42\\nname=Alice\\n\",\n \"contentType\": \"text/plain\",\n \"notes\": [\"fresh from cache\"],\n \"immutable\": false\n}\n```\n\nFor successful writes, omit content:\n\n```json\n{ \"type\": \"host_uri_result\", \"id\": \"uri_1\" }\n```\n\nTo reject the request, set `isError: true` and either populate `error` with\na message or fall back to `content` for textual error surfacing:\n\n```json\n{\n \"type\": \"host_uri_result\",\n \"id\": \"uri_1\",\n \"isError\": true,\n \"error\": \"row 42 not found\"\n}\n```\n\n### Constraints\n\n- The agent's `edit` tool does not target host URIs. Hosts that want to\n mutate virtual files expose `write` and let the model use the `write` tool\n with replacement content.\n- Schemes are global to the process; `set_host_uri_schemes` replaces the\n previous set, unregistering anything not in the new list.\n- Schemes are normalized to lowercase before registration.\n\n## Error Model and Recoverability\n\n### Command-level failures\n\nFailures are `success: false` with string `error`.\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"response\",\n \"command\": \"set_model\",\n \"success\": false,\n \"error\": \"Model not found: provider/model\"\n}\n```\n\n### Recoverability expectations\n\n- Most command failures are recoverable; process remains alive.\n- Malformed JSONL / parse-loop exceptions emit a `parse` error response and continue reading subsequent lines.\n- Empty `set_session_name` is rejected (`Session name cannot be empty`).\n- Extension UI responses with unknown `id` are ignored.\n- Process termination conditions are stdin close or explicit extension-triggered shutdown after the current command.\n\n## Compact Command Flows\n\n### 1) Prompt and stream\n\nstdin:\n\n```json\n{ \"id\": \"req_1\", \"type\": \"prompt\", \"message\": \"Summarize this repo\" }\n```\n\nstdout sequence (typical):\n\n```json\n{ \"id\": \"req_1\", \"type\": \"response\", \"command\": \"prompt\", \"success\": true }\n{ \"type\": \"agent_start\" }\n{ \"type\": \"message_update\", \"assistantMessageEvent\": { \"type\": \"text_delta\", \"delta\": \"...\" }, \"message\": { \"role\": \"assistant\", \"content\": [] } }\n{ \"type\": \"agent_end\", \"messages\": [] }\n```\n\n### 2) Prompt during streaming with explicit queue policy\n\nstdin:\n\n```json\n{\n \"id\": \"req_2\",\n \"type\": \"prompt\",\n \"message\": \"Also include risks\",\n \"streamingBehavior\": \"followUp\"\n}\n```\n\n### 3) Inspect and tune queue behavior\n\nstdin:\n\n```json\n{ \"id\": \"q1\", \"type\": \"get_state\" }\n{ \"id\": \"q2\", \"type\": \"set_steering_mode\", \"mode\": \"all\" }\n{ \"id\": \"q3\", \"type\": \"set_interrupt_mode\", \"mode\": \"wait\" }\n```\n\n### 4) Extension UI round trip\n\nstdout:\n\n```json\n{\n \"type\": \"extension_ui_request\",\n \"id\": \"ui_7\",\n \"method\": \"input\",\n \"title\": \"Branch name\",\n \"placeholder\": \"feature/...\"\n}\n```\n\nstdin:\n\n```json\n{ \"type\": \"extension_ui_response\", \"id\": \"ui_7\", \"value\": \"feature/rpc-host\" }\n```\n\n## OpenClaw / Hermes host integrations\n\nFor OpenClaw- or Hermes-style hosts, keep MCP servers and skills on the host side and expose the selected capabilities through RPC host tools. Do not import GJC runtime MCP internals directly; those package paths are intentionally quarantined. See [OpenClaw / Hermes RPC integration notes](./openclaw-hermes-rpc-integration.md).\n\n## Notes on `RpcClient` helper\n\n`src/modes/rpc/rpc-client.ts` is a convenience wrapper, not the protocol definition.\n\nCurrent helper characteristics:\n\n- Spawns `bun <cliPath> --mode rpc`\n- Correlates responses by generated `req_<n>` ids\n- Dispatches recognized `AgentEvent` types to event listeners\n- Dispatches top-level `workflow_gate` frames to `onWorkflowGate()` listeners\n- Supports host-owned custom tools via `setCustomTools()` and automatic handling of `host_tool_call` / `host_tool_cancel`\n- Exposes `respondGate()` for `workflow_gate_response` and waits for the accepted/rejected resolution envelope\n- Does **not** expose helper methods for every protocol command (for example, `set_interrupt_mode` and `set_session_name` are in protocol types but not wrapped as dedicated methods)\n\nUse raw protocol frames if you need complete surface coverage.\n\n## Workflow gates (agent-driven lifecycle)\n\nThe workflow-gate contract makes every human-gated lifecycle moment\n(deep-interview questions, ralplan approval, ultragoal execution sign-off)\nmachine-addressable so an external agent can answer it over RPC without\nscreen-scraping.\n\n### Outbound event: `workflow_gate`\n\n```json\n{\n \"type\": \"workflow_gate\",\n \"gate_id\": \"wg_4845_ralplan_000001\",\n \"stage\": \"ralplan\",\n \"kind\": \"approval\",\n \"schema\": { \"type\": \"string\", \"enum\": [\"approve\", \"request-changes\", \"reject\"] },\n \"schema_hash\": \"<sha256 of canonical schema>\",\n \"options\": [{ \"value\": \"approve\", \"label\": \"Approve execution\" }],\n \"context\": { \"title\": \"Approve plan?\", \"summary\": \"…\" },\n \"created_at\": \"2026-06-05T05:00:00.000Z\",\n \"required\": true\n}\n```\n\n- `gate_id` is **run-scoped and monotonic**: `wg_<run>_<stage>_<NNNNNN>`.\n- `stage` is one of `deep-interview` | `ralplan` | `ultragoal`. `team` is\n reserved and rejected for v1 (single-agent only).\n- `kind` is `question` | `approval` | `execution`.\n- `schema` is a **documented subset of JSON Schema 2020-12**. Supported keywords:\n `type`, `enum`, `const`, `properties`, `required`, `additionalProperties`,\n `items`, `minLength`, `maxLength`, `minimum`, `maximum`, `title`,\n `description`, `oneOf`, `anyOf`. Any other keyword is rejected at gate\n construction (`invalid_workflow_gate_schema`) so the server never advertises a\n schema it will not validate. `schema_hash` equals the server-side validation\n hash for that gate.\n\n### Inbound command: `workflow_gate_response`\n\n```json\n{ \"type\": \"workflow_gate_response\", \"gate_id\": \"wg_4845_ralplan_000001\", \"answer\": \"approve\", \"idempotency_key\": \"k1\" }\n```\n\nThe answer is validated against the advertised schema **before acceptance**:\n\n- Valid → resolution persisted before the workflow advances; response:\n `{ \"type\": \"response\", \"command\": \"workflow_gate_response\", \"success\": true, \"data\": { \"gate_id\": \"…\", \"status\": \"accepted\", \"answer_hash\": \"…\", \"resolved_at\": \"…\" } }`.\n- Invalid → the gate stays **pending** and the resolution carries a typed\n `invalid_workflow_gate_answer` error listing each `{ path, keyword, message, expected? }`.\n- Idempotency: replaying the same `idempotency_key` + identical body returns the\n cached resolution; the same key with a different body is an\n `idempotency_conflict`; answering an already-accepted gate is `already_resolved`.\n- Client helpers wait for this accepted/rejected resolution envelope; they must not treat the write of `workflow_gate_response` itself as completion.\n\n### Entering unattended mode: `negotiate_unattended`\n\nUnattended (zero-human) operation is **fail-closed**. The external agent must\ndeclare its budget, scopes, and action allowlist up front:\n\n```json\n{\n \"type\": \"negotiate_unattended\",\n \"declaration\": {\n \"actor\": \"openclaw/hermes\",\n \"budget\": { \"max_tokens\": 2000000, \"max_tool_calls\": 5000, \"max_wall_time_ms\": 3600000, \"max_cost_usd\": 20 },\n \"scopes\": [\"prompt\", \"control\", \"bash\"],\n \"action_allowlist\": [\"bash.readonly\", \"file.write\"]\n }\n}\n```\n\nA missing or partial declaration refuses unattended mode. Budget, scope, and\naudit enforcement are layered on this contract by the unattended control plane\n(see issues #318/#319/#320). Attended mode is unaffected: clients that never send\n`negotiate_unattended` keep the existing extension-UI / permission behavior.\n\n\n> **Status (live, #315/#318/#321):** the `workflow_gate` /\n> `workflow_gate_response` / `negotiate_unattended` frames, the answer-schema\n> validator, and the durable gate broker are defined, tested, and wired into\n> live session dispatch. When an unattended control plane is attached to the\n> session, `dispatchRpcCommand` routes `negotiate_unattended` and\n> `workflow_gate_response` through it (see\n> `packages/coding-agent/src/modes/shared/agent-wire/command-dispatch.ts`); a\n> session without that control plane returns a typed \"not available\" error for\n> these frames rather than silently dropping them.\n\n\n### Answering gates from a client (#322)\n\nBoth clients expose typed `workflow_gate` receive + respond helpers so an agent\ncan answer a gate from its own memory via a callback.\n\nFor bridge sessions, gate responses are **not** posted through `/commands`. The\nclient must first own the UI/control plane, then post the answer body to\n`POST /v1/sessions/{session_id}/ui-responses/{gate_id}` with\n`X-GJC-Bridge-Owner-Token: <ownerToken>`. `Idempotency-Key` may be supplied as a\nheader and the same value is also accepted in the JSON body as `idempotency_key`.\n\n`@gajae-code/bridge-client` (TypeScript):\n\n```ts\nimport { BridgeClient } from \"@gajae-code/bridge-client\";\n\nconst client = new BridgeClient({ baseUrl, token });\n// Headless policy: every received gate is routed to the resolver and answered.\nfor await (const { gate, answer } of client.consumeWorkflowGates(sessionId, ownerToken, gate => {\n if (gate.kind === \"approval\") return { decision: \"approve\" };\n if (gate.kind === \"question\") return { selected: [gate.options?.[0]?.value], other: false };\n return { decision: \"approve\" };\n})) {\n console.log(`answered ${gate.gate_id} (${gate.kind}) with`, answer);\n}\n// Or answer a single gate directly:\nawait client.respondGate(sessionId, gateId, ownerToken, { decision: \"approve\" });\n```\n\n`python/gjc-rpc` (Python):\n\n```python\nfrom gjc_rpc import RpcClient, WorkflowGate\n\nclient = RpcClient(executable=\"gjc\")\n\ndef resolver(gate: WorkflowGate) -> object:\n if gate.kind == \"approval\":\n return {\"decision\": \"approve\"}\n if gate.kind == \"question\":\n return {\"selected\": [gate.options[0].value] if gate.options else [], \"other\": False}\n return {\"decision\": \"approve\"}\n\n# Headless policy: route every received gate to the resolver and respond.\nclient.run_workflow_gate_policy(resolver)\nclient.start()\n# Or answer a single gate directly: client.respond_gate(gate_id, {\"decision\": \"approve\"})\n```\n",
|
|
57
58
|
"rulebook-matching-pipeline.md": "# Rulebook Matching Pipeline\n\nThis document describes how coding-agent discovers rules from supported config formats, normalizes them into a single `Rule` shape, resolves precedence conflicts, and splits the result into:\n\n- **Rulebook rules** (available to the model via system prompt + `rule://` URLs)\n- **TTSR rules** (time-travel stream interruption rules)\n\nIt reflects the current implementation, including partial semantics and metadata that is parsed but not enforced.\n\n## Implementation files\n\n- [`../src/capability/rule.ts`](../packages/coding-agent/src/capability/rule.ts)\n- [`../src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`../src/discovery/index.ts`](../packages/coding-agent/src/discovery/index.ts)\n- [`../src/discovery/helpers.ts`](../packages/coding-agent/src/discovery/helpers.ts)\n- [`../src/discovery/builtin.ts`](../packages/coding-agent/src/discovery/builtin.ts)\n- [`../src/discovery/cursor.ts`](../packages/coding-agent/src/discovery/cursor.ts)\n- [`../src/discovery/windsurf.ts`](../packages/coding-agent/src/discovery/windsurf.ts)\n- [`../src/discovery/cline.ts`](../packages/coding-agent/src/discovery/cline.ts)\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/system-prompt.ts`](../packages/coding-agent/src/system-prompt.ts)\n- [`../src/internal-urls/rule-protocol.ts`](../packages/coding-agent/src/internal-urls/rule-protocol.ts)\n- [`../src/utils/frontmatter.ts`](../packages/coding-agent/src/utils/frontmatter.ts)\n\n## 1. Canonical rule shape\n\nAll providers normalize source files into `Rule`:\n\n```ts\ninterface Rule {\n name: string;\n path: string;\n content: string;\n globs?: string[];\n alwaysApply?: boolean;\n description?: string;\n condition?: string[];\n scope?: string[];\n interruptMode?: \"never\" | \"prose-only\" | \"tool-only\" | \"always\";\n _source: SourceMeta;\n}\n```\n\nCapability identity is `rule.name` (`ruleCapability.key = rule => rule.name`).\n\nConsequence: precedence and deduplication are **name-based only**. Two different files with the same `name` are considered the same logical rule.\n\n## 2. Discovery sources and normalization\n\n`src/discovery/index.ts` auto-registers providers. For `rules`, current providers are:\n\n- `native` (priority `100`)\n- `cursor` (priority `50`)\n- `windsurf` (priority `50`)\n- `cline` (priority `40`)\n\n### Native provider (`builtin.ts`)\n\nLoads `.gjc` rules from:\n\n- project: `<cwd>/.gjc/rules/*.{md,mdc}`\n- user: `~/.gjc/agent/rules/*.{md,mdc}`\n\nNormalization:\n\n- `name` = filename without `.md`/`.mdc`\n- frontmatter parsed via `parseFrontmatter`\n- `content` = body (frontmatter stripped)\n- `globs`, `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` are parsed by `buildRuleFromMarkdown`\n\nImportant caveat: `condition` values that look like file globs are converted into `tool:edit(...)` / `tool:write(...)` scope shorthands with catch-all condition `.*`.\n\n### Cursor provider (`cursor.ts`)\n\nLoads from:\n\n- user: `~/.cursor/rules/*.{mdc,md}`\n- project: `<cwd>/.cursor/rules/*.{mdc,md}`\n\nNormalization (`transformMDCRule`):\n\n- `description`: kept only if string\n- `alwaysApply`: only `true` is preserved (`false` becomes `undefined`)\n- `globs`: accepts array (string elements only) or single string\n- `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` are parsed by shared rule helpers\n- `name` from filename without extension\n\n### Windsurf provider (`windsurf.ts`)\n\nLoads from:\n\n- user: `~/.codeium/windsurf/memories/global_rules.md` (fixed rule name `global_rules`)\n- project: `<cwd>/.windsurf/rules/*.md`\n\nNormalization:\n\n- `globs`: array-of-string or single string\n- `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` parsed by shared rule helpers\n- `name` is fixed to `global_rules` for the user global file and derived from filename for project rules\n\n### Cline provider (`cline.ts`)\n\nSearches upward from `cwd` for nearest `.clinerules`:\n\n- if directory: loads `*.md` inside it\n- if file: loads single file as rule named `clinerules`\n\nNormalization:\n\n- `globs`: array-of-string or single string\n- `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` parsed by shared rule helpers\n- `name` is fixed to `clinerules` for a `.clinerules` file and derived from filename for `.clinerules/*.md`\n\n## 3. Frontmatter parsing behavior and ambiguity\n\nAll providers use `parseFrontmatter` (`utils/frontmatter.ts`) with these semantics:\n\n1. Frontmatter is parsed only when content starts with `---` and has a closing `\\n---`.\n2. Body is trimmed after frontmatter extraction.\n3. If YAML parse fails:\n - warning is logged,\n - parser falls back to simple `key: value` line parsing (`^(\\w+):\\s*(.*)$`).\n\nAmbiguity consequences:\n\n- Fallback parser does not support arrays, nested objects, quoting rules, or hyphenated keys.\n- Fallback values become strings (for example `alwaysApply: true` becomes string `\"true\"`), so providers requiring boolean/string types may drop metadata.\n- `ttsr_trigger` works in fallback (underscore key); keys like `thinking-level` would not.\n- Files without valid frontmatter still load as rules with empty metadata and full content body.\n\n## 4. Provider precedence and deduplication\n\n`loadCapability(\"rules\")` (`capability/index.ts`) merges provider outputs and then deduplicates by `rule.name`.\n\n### Precedence model\n\n- Providers are ordered by priority descending.\n- Equal priority keeps registration order (`cursor` before `windsurf` from `discovery/index.ts`).\n- Dedup is first-wins: first encountered rule name is kept; later same-name items are marked `_shadowed` in `all` and excluded from `items`.\n\nEffective rule provider order is currently:\n\n1. `native` (100)\n2. `cursor` (50)\n3. `windsurf` (50)\n4. `cline` (40)\n\n### Intra-provider ordering caveat\n\nWithin a provider, item order comes from `loadFilesFromDir` glob result ordering plus explicit push order. This is deterministic enough for normal use but not explicitly sorted in code.\n\nNotable source-order differences:\n\n- `native` appends project then user config dirs.\n- `cursor` appends user then project results.\n- `windsurf` appends user `global_rules` first, then project rules.\n- `cline` loads only nearest `.clinerules` source.\n\n## 5. Split into Rulebook, Always-Apply, and TTSR buckets\n\nAfter rule discovery in `createAgentSession` (`sdk.ts`):\n\n1. All discovered rules are scanned.\n2. Rules with `condition` entries are registered into `TtsrManager`; legacy `ttsr_trigger` / `ttsrTrigger` are accepted during rule parsing as condition fallbacks.\n3. A separate `rulebookRules` list is built with this predicate:\n\n```ts\n!isTtsrRule && rule.alwaysApply !== true && !!rule.description;\n```\n\n4. An `alwaysApplyRules` list is built:\n\n```ts\n!isTtsrRule && rule.alwaysApply === true;\n```\n\n### Bucket behavior\n\n- **TTSR bucket**: any rule with a non-empty parsed `condition` that `TtsrManager.addRule(...)` accepts. Takes priority over other buckets.\n- **Always-apply bucket**: `alwaysApply === true`, not TTSR. Full content injected into system prompt. Resolvable via `rule://`.\n- **Rulebook bucket**: must have description, must not be TTSR, must not be `alwaysApply`. Listed in system prompt by name+description; content read on demand via `rule://`.\n- A rule with both `condition` and `alwaysApply` goes to TTSR only (TTSR takes priority).\n- A rule with both `alwaysApply` and `description` goes to always-apply only (not rulebook).\n\n## 6. How metadata affects runtime surfaces\n\n### `description`\n\n- Required for inclusion in rulebook.\n- Rendered in system prompt `<rules>` block.\n- Missing description means rule is not available via `rule://` and not listed in system prompt rules.\n\n### `globs`\n\n- Carried through on `Rule`.\n- Rendered as `<glob>...</glob>` entries in the system prompt rules block.\n- Exposed in rules UI state (`extensions` mode list).\n- **Not enforced for automatic matching in this pipeline.** There is no runtime glob matcher selecting rules by current file/tool target.\n\n### `alwaysApply`\n\n- Parsed and preserved by providers.\n- Used in UI display (`\"always\"` trigger label in extensions state manager).\n- Used as an exclusion condition from `rulebookRules`.\n- **Full rule content is auto-injected into the system prompt** (before the rulebook rules section).\n- Rule is also addressable via `rule://<name>` for re-reading.\n\n### `condition`, `scope`, and `interruptMode`\n\n- `condition` is the current TTSR trigger field; legacy `ttsr_trigger` / `ttsrTrigger` are accepted as fallback inputs during parsing.\n- `scope` narrows TTSR matching scope. A condition token that looks like a file glob becomes `tool:edit(<glob>)` and `tool:write(<glob>)` scope entries plus catch-all condition `.*`.\n- `interruptMode` can override the global TTSR interrupt mode for the rule.\n\n## 7. System prompt inclusion path\n\n`buildSystemPromptInternal` receives both `rules` (rulebook) and `alwaysApplyRules`.\n\nAlways-apply rules are rendered first, injecting their raw content directly into the prompt.\n\nRulebook rules are rendered in a `# Rules` section with:\n\n- `Read rule://<name> when working in matching domain`\n- Each rule's `name`, `description`, and optional `<glob>` list\n\nThis is advisory/contextual: prompt text asks the model to read applicable rules, but code does not enforce glob applicability.\n\n## 8. `rule://` internal URL behavior\n\n`RuleProtocolHandler` is registered with:\n\n```ts\nnew RuleProtocolHandler({\n getRules: () => [...rulebookRules, ...alwaysApplyRules],\n});\n```\n\nImplications:\n\n- `rule://<name>` resolves against both **rulebookRules** and **alwaysApplyRules**.\n- TTSR-only rules and rules with no description and no `alwaysApply` are not addressable via `rule://`.\n- Resolution is exact name match.\n- Unknown names return error listing available rule names.\n- Returned content is raw `rule.content` (frontmatter stripped), content type `text/markdown`.\n\n## 9. Known partial / non-enforced semantics\n\n1. Provider descriptions mention legacy files (`.cursorrules`, `.windsurfrules`), but current loader code paths do not actually read those files.\n2. `globs` metadata is surfaced to prompt/UI but not enforced by rule selection logic.\n3. Rule selection for `rule://` includes rulebook and always-apply rules, but not TTSR-only rules.\n4. Discovery warnings (`loadCapability(\"rules\").warnings`) are produced but `createAgentSession` does not currently surface/log them in this path.\n",
|
|
@@ -61,6 +62,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
61
62
|
"session-switching-and-recent-listing.md": "# Session switching and recent session listing\n\nThis document describes how coding-agent discovers recent sessions, resolves `--resume` targets, presents session pickers, and switches the active runtime session.\n\nIt focuses on current implementation behavior, including fallback paths and caveats.\n\n## Implementation files\n\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/cli/session-picker.ts`](../packages/coding-agent/src/cli/session-picker.ts)\n- [`../src/modes/components/session-selector.ts`](../packages/coding-agent/src/modes/components/session-selector.ts)\n- [`../src/modes/controllers/selector-controller.ts`](../packages/coding-agent/src/modes/controllers/selector-controller.ts)\n- [`../src/main.ts`](../packages/coding-agent/src/main.ts)\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/modes/interactive-mode.ts`](../packages/coding-agent/src/modes/interactive-mode.ts)\n- [`../src/modes/utils/ui-helpers.ts`](../packages/coding-agent/src/modes/utils/ui-helpers.ts)\n\n## Recent-session discovery\n\n### Directory scope\n\n`SessionManager` stores sessions under a cwd-scoped directory by default:\n\n- `~/.gjc/agent/sessions/--<cwd-encoded>--/*.jsonl`\n\n`SessionManager.list(cwd, sessionDir?)` reads only that directory unless an explicit `sessionDir` is provided.\n\n### Two listing paths with different payloads\n\nThere are two different listing pipelines:\n\n1. `getRecentSessions(sessionDir, limit)` (welcome/summary view)\n - Reads only a 4KB prefix (`readTextPrefix(..., 4096)`) from each file.\n - Parses header + earliest user text preview.\n - Returns lightweight `RecentSessionInfo` with lazy `name` and `timeAgo` getters.\n - Sorts by file `mtime` descending.\n\n2. `SessionManager.list(...)` / `SessionManager.listAll()` (resume pickers and ID matching)\n - Reads full session files.\n - Builds `SessionInfo` objects (`id`, `cwd`, `title`, `messageCount`, `firstMessage`, `allMessagesText`, timestamps).\n - Drops sessions with zero `message` entries.\n - Sorts by `modified` descending.\n\n### Metadata fallback behavior\n\nFor recent summaries (`RecentSessionInfo`):\n\n- display name preference: `header.title` -> first user prompt -> `header.id` -> filename\n- name is truncated to 40 chars for compact displays\n- control characters/newlines are stripped/sanitized from title-derived names\n\nFor `SessionInfo` list entries:\n\n- `title` is `header.title` or latest compaction `shortSummary`\n- `firstMessage` is first user message text or `\"(no messages)\"`\n\n## `--continue` resolution and terminal breadcrumb preference\n\n`SessionManager.continueRecent(cwd, sessionDir?)` resolves the target in this order:\n\n1. Read terminal-scoped breadcrumb (`~/.gjc/agent/terminal-sessions/<terminal-id>`)\n2. Validate breadcrumb:\n - current terminal can be identified\n - breadcrumb cwd matches current cwd (resolved path compare)\n - referenced file still exists\n3. If breadcrumb is invalid/missing, fall back to newest file by mtime in the session dir (`findMostRecentSession`)\n4. If none found, create a new session\n\nTerminal ID derivation prefers TTY path and falls back to env-based identifiers (`KITTY_WINDOW_ID`, `TMUX_PANE`, `TERM_SESSION_ID`, `WT_SESSION`).\n\nBreadcrumb writes are best-effort and non-fatal.\n\n## Startup-time resume target resolution (`main.ts`)\n\n### `--resume <value>`\n\n`createSessionManager(...)` handles string-valued `--resume` in two modes:\n\n1. Path-like value (contains `/`, `\\\\`, or ends with `.jsonl`)\n - direct `SessionManager.open(sessionArg, parsed.sessionDir)`\n\n2. ID prefix value\n - find match in `SessionManager.list(cwd, sessionDir)` by `id.startsWith(sessionArg)`\n - if no local match and `sessionDir` is not forced, try `SessionManager.listAll()`\n - first match is used (no ambiguity prompt)\n\nCross-project match behavior:\n\n- if matched session cwd differs from current cwd, CLI prompts whether to fork into current project\n- yes -> `SessionManager.forkFrom(...)`\n- no -> throws error (`Session \"...\" is in another project (...)`)\n\nNo match -> throws error (`Session \"...\" not found.`).\n\n### `--resume` (no value)\n\nHandled after initial session-manager construction:\n\n1. list local sessions with `SessionManager.list(cwd, parsed.sessionDir)`\n2. if empty: print `No sessions found` and exit early\n3. open TUI picker (`selectSession`)\n4. if canceled: print `No session selected` and exit early\n5. if selected: `SessionManager.open(selectedPath)`\n\n### `--continue`\n\nUses `SessionManager.continueRecent(...)` directly (breadcrumb-first behavior above).\n\n## Picker-based selection internals\n\n## CLI picker (`src/cli/session-picker.ts`)\n\n`selectSession(sessions)` creates a standalone TUI with `SessionSelectorComponent` and resolves exactly once:\n\n- selection -> resolves selected path\n- cancel (Esc) -> resolves `null`\n- hard exit (Ctrl+C path) -> stops TUI and `process.exit(0)`\n\n## Interactive in-session picker (`SelectorController.showSessionSelector`)\n\nFlow:\n\n1. fetch sessions from current session dir via `SessionManager.list(currentCwd, currentSessionDir)`\n2. mount `SessionSelectorComponent` in editor area using `showSelector(...)`\n3. callbacks:\n - select -> close selector and call `handleResumeSession(sessionPath)`\n - cancel -> restore editor and rerender\n - exit -> `ctx.shutdown()`\n\n## Session selector component behavior\n\n`SessionList` supports:\n\n- arrow/page navigation\n- Enter to select\n- Esc to cancel\n- Ctrl+C to exit\n- fuzzy search across session id/title/cwd/first message/all messages/path\n\nEmpty-list render behavior:\n\n- renders a message instead of crashing\n- Enter on empty does nothing (no callback)\n- Esc/Ctrl+C still work\n\nCaveat: UI text says `Press Tab to view all`, but this component currently has no Tab handler and current wiring only lists current-scope sessions.\n\n## Runtime switch execution (`AgentSession.switchSession`)\n\n`switchSession(sessionPath)` is the core in-process switch path.\n\nLifecycle/state transition:\n\n1. capture `previousSessionFile`\n2. emit `session_before_switch` hook event (`reason: \"resume\"`, cancellable)\n3. if canceled -> return `false` with no switch\n4. disconnect from current agent event stream\n5. abort active generation/tool flow\n6. clear queued steering/follow-up/next-turn message buffers\n7. flush session writer (`sessionManager.flush()`) to persist pending writes\n8. `sessionManager.setSessionFile(sessionPath)`\n - updates session file pointer\n - writes terminal breadcrumb\n - loads entries / migrates / blob-resolves / reindexes\n - if missing/invalid file data: initializes a new session at that path and rewrites header\n9. update `agent.sessionId`\n10. rebuild display context via `buildDisplaySessionContext()`\n11. restore persisted/discovered MCP tool selections and rebuild active tools/system prompt when discovery is enabled\n12. emit `session_switch` hook event (`reason: \"resume\"`, `previousSessionFile`)\n13. replace agent messages with rebuilt context and sync todos\n14. close provider sessions when switching to a different session or when same-session reload changed replay messages\n15. restore default model from `sessionContext.models.default` if available and present in model registry\n16. restore thinking level and service tier:\n - thinking uses persisted `thinking_level_change`, otherwise the configured default clamped to model capability\n - service tier uses persisted `service_tier_change`, otherwise the configured `serviceTier` setting (`\"none\"` becomes unset)\n17. reconnect agent listeners and return `true`\n\n## UI state rebuild after interactive switch\n\n`SelectorController.handleResumeSession` performs UI reset around `switchSession`:\n\n- stop loading animation\n- clear status container\n- clear pending-message UI and pending tool map\n- reset streaming component/message references\n- call `session.switchSession(...)`\n- clear chat container and rerender from session context (`renderInitialMessages`)\n- reload todos from new session artifacts\n- show `Resumed session`\n\nSo visible conversation/todo state is rebuilt from the new session file.\n\n## Startup resume vs in-session switch\n\n### Startup resume (`--continue`, `--resume`, direct open)\n\n- Session file is chosen before `createAgentSession(...)`.\n- `sdk.ts` builds `existingSession = sessionManager.buildSessionContext()`.\n- Agent messages are restored once during session creation.\n- Model/thinking are selected during creation (including restore/fallback logic).\n- Interactive mode then runs `#restoreModeFromSession()` to re-enter persisted mode state (currently plan/plan_paused).\n\n### In-session switch (`/resume`-style selector path)\n\n- Uses `AgentSession.switchSession(...)` on an already-running `AgentSession`.\n- Messages/model/thinking are rebuilt immediately in place.\n- Hook `session_before_switch`/`session_switch` events are emitted.\n- UI chat/todos are refreshed.\n- No dedicated post-switch mode restore call is made in selector flow; mode re-entry behavior is not symmetric with startup `#restoreModeFromSession()`.\n\n## Failure and edge-case behavior\n\n### Cancellation paths\n\n- CLI picker cancel -> returns `null`, caller prints `No session selected`, process exits early.\n- Interactive picker cancel -> editor restored, no session change.\n- Hook cancellation (`session_before_switch`) -> `switchSession()` returns `false`.\n\n### Empty list paths\n\n- CLI `--resume` (no value): empty list prints `No sessions found` and exits.\n- Interactive selector: empty list renders message and remains cancellable.\n\n### Missing/invalid target session file\n\nWhen opening/switching to a specific path (`setSessionFile`):\n\n- ENOENT -> treated as empty -> new session initialized at that exact path and persisted.\n- malformed/invalid header (or effectively unreadable parsed entries) -> treated as empty -> new session initialized and persisted.\n\nThis is recovery behavior, not hard failure.\n\n### Hard failures\n\nSwitch/open can still throw on true I/O failures (permission errors, rewrite failures, etc.), which propagate to callers.\n\n### ID prefix matching caveats\n\n- ID matching uses `startsWith` and takes first match in sorted list.\n- No ambiguity UI if multiple sessions share prefix.\n- `SessionManager.list(...)` excludes sessions with zero messages, so those sessions are not resumable via ID match/list picker.\n",
|
|
62
63
|
"session-tree-plan.md": "# Session tree architecture (current)\n\nReference: [session.md](../docs/session.md)\n\nThis document describes how session tree navigation works today: in-memory tree model, leaf movement rules, branching behavior, and extension/event integration.\n\n## What this subsystem is\n\nThe session is stored as an append-only entry log, but runtime behavior is tree-based:\n\n- Every non-header entry has `id` and `parentId`.\n- The active position is `leafId` in `SessionManager`.\n- Appending an entry always creates a child of the current leaf.\n- Branching does **not** rewrite history; it only changes where the leaf points before the next append.\n\nKey files:\n\n- `src/session/session-manager.ts` — tree data model, traversal, leaf movement, branch/session extraction\n- `src/session/agent-session.ts` — `/tree` navigation flow, summarization, hook/event emission\n- `src/modes/components/tree-selector.ts` — interactive tree UI behavior and filtering\n- `src/modes/controllers/selector-controller.ts` — selector orchestration for `/tree` and `/branch`\n- `src/modes/controllers/input-controller.ts` — command routing (`/tree`, `/branch`, double-escape behavior)\n- `src/session/messages.ts` — conversion of `branch_summary`, `compaction`, and `custom_message` entries into LLM context messages\n\n## Tree data model in `SessionManager`\n\nRuntime indices:\n\n- `#byId: Map<string, SessionEntry>` — fast lookup for any entry\n- `#leafId: string | null` — current position in the tree\n- `#labelsById: Map<string, string>` — resolved labels by target entry id\n\nTree APIs:\n\n- `getBranch(fromId?)` walks parent links to root and returns root→node path\n- `getTree()` returns `SessionTreeNode[]` (`entry`, `children`, `label`)\n - parent links become children arrays\n - entries with missing parents are treated as roots\n - children are sorted oldest→newest by timestamp\n- `getChildren(parentId)` returns direct children\n- `getLabel(id)` resolves current label from `labelsById`\n\n`getTree()` is a runtime projection; persistence remains append-only JSONL entries.\n\n## Leaf movement semantics\n\nThere are three leaf movement primitives:\n\n1. `branch(entryId)`\n - Validates entry exists\n - Sets `leafId = entryId`\n - No new entry is written\n\n2. `resetLeaf()`\n - Sets `leafId = null`\n - Next append creates a new root entry (`parentId = null`)\n\n3. `branchWithSummary(branchFromId, summary, details?, fromExtension?)`\n - Accepts `branchFromId: string | null`\n - Sets `leafId = branchFromId`\n - Appends a `branch_summary` entry as child of that leaf\n - When `branchFromId` is `null`, `fromId` is persisted as `\"root\"`\n\n## `/tree` navigation behavior (same session file)\n\n`AgentSession.navigateTree()` is navigation, not file forking.\n\nFlow:\n\n1. Validate target and compute abandoned path (`collectEntriesForBranchSummary`)\n2. Emit `session_before_tree` with `TreePreparation`\n3. Optionally summarize abandoned entries (hook-provided summary or built-in summarizer)\n4. Compute new leaf target:\n - selecting a **user** message: leaf moves to its parent, and message text is returned for editor prefill\n - selecting a **custom_message**: same rule as user message (leaf = parent, text prefills editor)\n - selecting any other entry: leaf = selected entry id\n5. Apply leaf move:\n - with summary: `branchWithSummary(newLeafId, ...)`\n - without summary and `newLeafId === null`: `resetLeaf()`\n - otherwise: `branch(newLeafId)`\n6. Rebuild agent context from new leaf and emit `session_tree`\n\nImportant: summary entries are attached at the **new navigation position**, not on the abandoned branch tail.\n\n## `/branch` behavior (new session file)\n\n`/branch` and `/tree` are intentionally different:\n\n- `/tree` navigates within the current session file.\n- `/branch` creates a new session branch file (or in-memory replacement for non-persistent mode).\n\nUser-facing `/branch` flow (`SelectorController.showUserMessageSelector` → `AgentSession.branch`):\n\n- Branch source must be a **user message**.\n- Selected user text is extracted for editor prefill.\n- If selected user message is root (`parentId === null`): start a new session via `newSession({ parentSession: previousSessionFile })`.\n- Otherwise: `createBranchedSession(selectedEntry.parentId)` to fork history up to the selected prompt boundary.\n\n`SessionManager.createBranchedSession(leafId)` specifics:\n\n- Builds root→leaf path via `getBranch(leafId)`; throws if missing.\n- Excludes existing `label` entries from copied path.\n- Rebuilds fresh label entries from resolved `labelsById` for entries that remain in path.\n- Persistent mode: writes new JSONL file and switches manager to it; returns new file path.\n- In-memory mode: replaces in-memory entries; returns `undefined`.\n\n## Context reconstruction and summary/custom integration\n\n`buildSessionContext()` (in `session-manager.ts`) resolves the active root→leaf path and builds effective LLM context state:\n\n- Tracks latest thinking/model/service-tier/mode/TTSR/MCP-selection state on path.\n- Handles latest compaction on path:\n - emits compaction summary first\n - replays kept messages from `firstKeptEntryId` to compaction point\n - then replays post-compaction messages\n- Includes `branch_summary` and `custom_message` entries as `AgentMessage` objects.\n\n`session/messages.ts` then maps these message types for model input:\n\n- `branchSummary` and `compactionSummary` become user-role templated context messages\n- `custom`/`hookMessage` become user-role content messages\n\nSo tree movement changes context by changing the active leaf path, not by mutating old entries.\n\n## Labels and tree UI behavior\n\nLabel persistence:\n\n- `appendLabelChange(targetId, label?)` writes `label` entries on the current leaf chain.\n- `labelsById` is updated immediately (set or delete).\n- `getTree()` resolves current label onto each returned node.\n\nTree selector behavior (`tree-selector.ts`):\n\n- Flattens tree for navigation, keeps active-path highlighting, and prioritizes displaying the active branch first.\n- Supports filter modes: `default`, `no-tools`, `user-only`, `labeled-only`, `all`.\n- Supports free-text search over rendered semantic content.\n- `Shift+L` opens inline label editing and writes via `appendLabelChange`.\n\nCommand routing:\n\n- `/tree` always opens tree selector.\n- `/branch` opens user-message selector unless `doubleEscapeAction=tree`, in which case it also uses tree selector UX.\n\n## Extension and hook touchpoints for tree operations\n\nCommand-time extension API (`ExtensionCommandContext`):\n\n- `branch(entryId)` — create branched session file\n- `navigateTree(targetId, { summarize? })` — move within current tree/file\n\nEvents around tree navigation:\n\n- `session_before_tree`\n - receives `TreePreparation`:\n - `targetId`\n - `oldLeafId`\n - `commonAncestorId`\n - `entriesToSummarize`\n - `userWantsSummary`\n - may cancel navigation\n - may provide summary payload used instead of built-in summarizer\n - receives abort `signal` (Escape cancellation path)\n- `session_tree`\n - emits `newLeafId`, `oldLeafId`\n - includes `summaryEntry` when a summary was created\n - `fromExtension` indicates summary origin\n\nAdjacent but related lifecycle hooks:\n\n- `session_before_branch` / `session_branch` for `/branch` flow\n- `session_before_compact`, `session.compacting`, `session_compact` for compaction entries that later affect tree-context reconstruction\n\n## Real constraints and edge conditions\n\n- `branch()` cannot target `null`; use `resetLeaf()` for root-before-first-entry state.\n- `branchWithSummary()` supports `null` target and records `fromId: \"root\"`.\n- Selecting current leaf in tree selector is a no-op.\n- Summarization requires an active model; if absent, summarize navigation fails fast.\n- If summarization is aborted, navigation is cancelled and leaf is unchanged.\n- In-memory sessions never return a branch file path from `createBranchedSession`.\n- Tree context reconstruction includes service-tier and MCP tool-selection state, but those entries do not become LLM messages.\n\n## Plan approval session naming\n\nWhen a user approves a plan from plan mode (`InteractiveMode.#approvePlan`), the approval handler seeds the session name from the plan's title so the resulting (fresh or compacted) session does not stay unnamed.\n\nTrigger:\n\n- Plan approval reaches `#approvePlan(...)` with `options.title` populated from the plan-approval details.\n- This runs for every approval choice (`Approve and execute`, `Approve and compact context`, plain `Approve`); the synthetic `plan-approved` prompt is what otherwise bypasses the input-controller's title-generation path.\n\nNaming source:\n\n- The normalized plan title is humanized via `humanizePlanTitle(title)` (`packages/coding-agent/src/plan-mode/approved-plan.ts`):\n - replaces runs of `-`/`_` with a single space\n - trims whitespace\n - capitalizes the first character\n - returns `\"\"` for whitespace-only / separator-only input\n- The humanized name is applied with `sessionManager.setSessionName(name, \"auto\")`. Because `setSessionName` is a no-op when `titleSource === \"user\"`, the seeded name never overrides a name the user already chose (e.g. on the `preserveContext` path where the session continues with prior naming).\n- On successful apply, the terminal title (`setSessionTerminalTitle`) and the editor border color are refreshed to reflect the new name.\n\nExamples (from `humanizePlanTitle`):\n\n- `migrate-mcp-loader` → `Migrate mcp loader`\n- `fix_session_naming` → `Fix session naming`\n- `foo--bar__baz` → `Foo bar baz`\n- `RefactorRouter` → `RefactorRouter` (no separators to expand)\n- `\"\"` / `\"---\"` → `\"\"` (no name applied)\n\n## Legacy compatibility still present\n\nSession migrations still run on load:\n\n- v1→v2 adds `id`/`parentId` and converts compaction index anchor to id anchor\n- v2→v3 migrates legacy `hookMessage` role to `custom`\n\nCurrent runtime behavior is version-3 tree semantics after migration.\n",
|
|
63
64
|
"session.md": "# Session Storage and Entry Model\n\nThis document is the source of truth for how coding-agent sessions are represented, persisted, migrated, and reconstructed at runtime.\n\n## Scope\n\nCovers:\n\n- Session JSONL format and versioning\n- Entry taxonomy and tree semantics (`id`/`parentId` + leaf pointer)\n- Migration/compatibility behavior when loading old or malformed files\n- Context reconstruction (`buildSessionContext`)\n- Persistence guarantees, failure behavior, truncation/blob externalization\n- Storage abstractions (`FileSessionStorage`, `MemorySessionStorage`) and related utilities\n\nDoes not cover `/tree` UI rendering behavior beyond semantics that affect session data.\n\n## Implementation Files\n\n- [`src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`src/session/messages.ts`](../packages/coding-agent/src/session/messages.ts)\n- [`src/session/session-storage.ts`](../packages/coding-agent/src/session/session-storage.ts)\n- [`src/session/history-storage.ts`](../packages/coding-agent/src/session/history-storage.ts)\n- [`src/session/blob-store.ts`](../packages/coding-agent/src/session/blob-store.ts)\n\n## On-Disk Layout\n\nDefault session file location:\n\n```text\n~/.gjc/agent/sessions/--<cwd-encoded>--/<timestamp>_<sessionId>.jsonl\n```\n\n`<cwd-encoded>` is derived from the working directory by stripping leading slash and replacing `/`, `\\\\`, and `:` with `-`.\n\nBlob store location:\n\n```text\n~/.gjc/agent/blobs/<sha256>\n```\n\nTerminal breadcrumb files are written under:\n\n```text\n~/.gjc/agent/terminal-sessions/<terminal-id>\n```\n\nBreadcrumb content is two lines: original cwd, then session file path. `continueRecent()` prefers this terminal-scoped pointer before scanning most-recent mtime.\n\n## File Format\n\nSession files are JSONL: one JSON object per line.\n\n- Line 1 is always the session header (`type: \"session\"`).\n- Remaining lines are `SessionEntry` values.\n- Entries are append-only at runtime; branch navigation moves a pointer (`leafId`) rather than mutating existing entries.\n\n### Header (`SessionHeader`)\n\n```json\n{\n \"type\": \"session\",\n \"version\": 3,\n \"id\": \"1f9d2a6b9c0d1234\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\",\n \"cwd\": \"/work/pi\",\n \"title\": \"optional session title\",\n \"titleSource\": \"auto\",\n \"parentSession\": \"optional lineage marker\"\n}\n```\n\nNotes:\n\n- `version` is optional in v1 files; absence means v1.\n- `parentSession` is an opaque lineage string. Current code writes either a session id or a session path depending on flow (`fork`, `forkFrom`, `createBranchedSession`, or explicit `newSession({ parentSession })`). Treat as metadata, not a typed foreign key.\n\n### Entry Base (`SessionEntryBase`)\n\nAll non-header entries include:\n\n```json\n{\n \"type\": \"...\",\n \"id\": \"8-char-id\",\n \"parentId\": \"previous-or-branch-parent\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\"\n}\n```\n\n`parentId` can be `null` for a root entry (first append, or after `resetLeaf()`).\n\n## Entry Taxonomy\n\n`SessionEntry` is the union of:\n\n- `message`\n- `thinking_level_change`\n- `service_tier_change`\n- `compaction`\n- `branch_summary`\n- `custom`\n- `custom_message`\n- `label`\n- `ttsr_injection`\n- `session_init`\n- `mode_change`\n- `mcp_tool_selection`\n\n### `message`\n\nStores an `AgentMessage` directly.\n\n```json\n{\n \"type\": \"message\",\n \"id\": \"a1b2c3d4\",\n \"parentId\": null,\n \"timestamp\": \"2026-02-16T10:21:00.000Z\",\n \"message\": {\n \"role\": \"assistant\",\n \"provider\": \"anthropic\",\n \"model\": \"anthropic-model-sonnet-4-5\",\n \"content\": [{ \"type\": \"text\", \"text\": \"Done.\" }],\n \"usage\": {\n \"input\": 100,\n \"output\": 20,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"cost\": {\n \"input\": 0,\n \"output\": 0,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"total\": 0\n }\n },\n \"timestamp\": 1760000000000\n }\n}\n```\n\n### `model_change`\n\n```json\n{\n \"type\": \"model_change\",\n \"id\": \"b1c2d3e4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:21:30.000Z\",\n \"model\": \"openai/gpt-4o\",\n \"role\": \"default\"\n}\n```\n\n`role` is optional; missing is treated as `default` in context reconstruction.\n\n### `service_tier_change`\n\n```json\n{\n \"type\": \"service_tier_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:21:45.000Z\",\n \"serviceTier\": \"flex\"\n}\n```\n\n`serviceTier` can also be `null`.\n\n### `thinking_level_change`\n\n```json\n{\n \"type\": \"thinking_level_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:22:00.000Z\",\n \"thinkingLevel\": \"high\"\n}\n```\n\n### `compaction`\n\n```json\n{\n \"type\": \"compaction\",\n \"id\": \"d1e2f3a4\",\n \"parentId\": \"c1d2e3f4\",\n \"timestamp\": \"2026-02-16T10:23:00.000Z\",\n \"summary\": \"Conversation summary\",\n \"shortSummary\": \"Short recap\",\n \"firstKeptEntryId\": \"a1b2c3d4\",\n \"tokensBefore\": 42000,\n \"details\": { \"readFiles\": [\"src/a.ts\"] },\n \"preserveData\": { \"hookState\": true },\n \"fromExtension\": false\n}\n```\n\n### `branch_summary`\n\n```json\n{\n \"type\": \"branch_summary\",\n \"id\": \"e1f2a3b4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:24:00.000Z\",\n \"fromId\": \"a1b2c3d4\",\n \"summary\": \"Summary of abandoned path\",\n \"details\": { \"note\": \"optional\" },\n \"fromExtension\": true\n}\n```\n\nIf branching from root (`branchFromId === null`), `fromId` is the literal string `\"root\"`.\n\n### `custom`\n\nExtension state persistence; ignored by `buildSessionContext`.\n\n```json\n{\n \"type\": \"custom\",\n \"id\": \"f1a2b3c4\",\n \"parentId\": \"e1f2a3b4\",\n \"timestamp\": \"2026-02-16T10:25:00.000Z\",\n \"customType\": \"my-extension\",\n \"data\": { \"state\": 1 }\n}\n```\n\n### `custom_message`\n\nExtension-provided message that does participate in LLM context. `content` can be a string or text/image content blocks, and `attribution` records whether the user or agent initiated it.\n\n```json\n{\n \"type\": \"custom_message\",\n \"id\": \"a2b3c4d5\",\n \"parentId\": \"f1a2b3c4\",\n \"timestamp\": \"2026-02-16T10:26:00.000Z\",\n \"customType\": \"my-extension\",\n \"content\": \"Injected context\",\n \"display\": true,\n \"details\": { \"debug\": false },\n \"attribution\": \"agent\"\n}\n```\n\n### `label`\n\n```json\n{\n \"type\": \"label\",\n \"id\": \"b2c3d4e5\",\n \"parentId\": \"a2b3c4d5\",\n \"timestamp\": \"2026-02-16T10:27:00.000Z\",\n \"targetId\": \"a1b2c3d4\",\n \"label\": \"checkpoint\"\n}\n```\n\n`label: undefined` clears a label for `targetId`.\n\n### `ttsr_injection`\n\n```json\n{\n \"type\": \"ttsr_injection\",\n \"id\": \"c2d3e4f5\",\n \"parentId\": \"b2c3d4e5\",\n \"timestamp\": \"2026-02-16T10:28:00.000Z\",\n \"injectedRules\": [\"ruleA\", \"ruleB\"]\n}\n```\n\n### `mcp_tool_selection`\n\n```json\n{\n \"type\": \"mcp_tool_selection\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:28:30.000Z\",\n \"selectedToolNames\": [\"server.tool\"]\n}\n```\n\n### `session_init`\n\n```json\n{\n \"type\": \"session_init\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:29:00.000Z\",\n \"systemPrompt\": \"...\",\n \"task\": \"...\",\n \"tools\": [\"read\", \"edit\"],\n \"outputSchema\": { \"type\": \"object\" }\n}\n```\n\n### `mode_change`\n\n```json\n{\n \"type\": \"mode_change\",\n \"id\": \"e2f3a4b5\",\n \"parentId\": \"d2e3f4a5\",\n \"timestamp\": \"2026-02-16T10:30:00.000Z\",\n \"mode\": \"plan\",\n \"data\": { \"planFile\": \"/tmp/plan.md\" }\n}\n```\n\n## Versioning and Migration\n\nCurrent session version: `3`.\n\n### v1 -> v2\n\nApplied when header `version` is missing or `< 2`:\n\n- Adds `id` and `parentId` to each non-header entry.\n- Reconstructs a linear parent chain using file order.\n- Migrates compaction field `firstKeptEntryIndex` -> `firstKeptEntryId` when present.\n- Sets header `version = 2`.\n\n### v2 -> v3\n\nApplied when header `version < 3`:\n\n- For `message` entries: rewrites legacy `message.role === \"hookMessage\"` to `\"custom\"`.\n- Sets header `version = 3`.\n\n### Migration Trigger and Persistence\n\n- Migrations run during session load (`setSessionFile`).\n- If any migration ran, the entire file is rewritten to disk immediately.\n- Migration mutates in-memory entries first, then persists rewritten JSONL.\n\n## Load and Compatibility Behavior\n\n`loadEntriesFromFile(path)` behavior:\n\n- Missing file (`ENOENT`) -> returns `[]`.\n- Non-parseable lines are handled by lenient JSONL parser (`parseJsonlLenient`).\n- If first parsed entry is not a valid session header (`type !== \"session\"` or missing string `id`) -> returns `[]`.\n\n`SessionManager.setSessionFile()` behavior:\n\n- `[]` from loader is treated as empty/nonexistent session and replaced with a new initialized session file at that path.\n- Valid files are loaded, migrated if needed, blob refs resolved, then indexed.\n\n## Tree and Leaf Semantics\n\nThe underlying model is append-only tree + mutable leaf pointer:\n\n- Every append method creates exactly one new entry whose `parentId` is current `leafId`.\n- The new entry becomes the new `leafId`.\n- `branch(entryId)` moves only `leafId`; existing entries remain unchanged.\n- `resetLeaf()` sets `leafId = null`; next append creates a new root entry (`parentId: null`).\n- `branchWithSummary()` sets leaf to branch target and appends a `branch_summary` entry.\n\n`getEntries()` returns all non-header entries in insertion order. Existing entries are not deleted in normal operation; rewrites preserve logical history while updating representation (migrations, move, targeted rewrite helpers).\n\n## Context Reconstruction (`buildSessionContext`)\n\n`buildSessionContext(entries, leafId, byId?)` resolves what is sent to the model.\n\nAlgorithm:\n\n1. Determine leaf:\n - `leafId === null` -> return empty context.\n - explicit `leafId` -> use that entry if found.\n - otherwise fallback to last entry.\n2. Walk `parentId` chain from leaf to root and reverse to root->leaf path.\n3. Derive runtime state across path:\n - `thinkingLevel` from latest `thinking_level_change` (default `\"off\"`)\n - `serviceTier` from latest `service_tier_change`\n - model map from `model_change` entries (`role ?? \"default\"`)\n - fallback `models.default` from assistant message provider/model if no explicit model change\n - deduplicated `injectedTtsrRules` from all `ttsr_injection` entries\n - selected MCP discovery tools from latest `mcp_tool_selection`\n - mode/modeData from latest `mode_change` (default mode `\"none\"`)\n4. Build message list:\n - `message` entries pass through\n - `custom_message` entries become `custom` AgentMessages via `createCustomMessage`\n - `branch_summary` entries become `branchSummary` AgentMessages via `createBranchSummaryMessage`\n - if a `compaction` exists on path:\n - emit compaction summary first (`createCompactionSummaryMessage`)\n - emit path entries starting at `firstKeptEntryId` up to the compaction boundary\n - emit entries after the compaction boundary\n\n`custom`, `session_init`, `service_tier_change`, `mcp_tool_selection`, and `ttsr_injection` entries do not inject model context directly.\n\n## Persistence Guarantees and Failure Model\n\n### Persist vs in-memory\n\n- `SessionManager.create/open/continueRecent/forkFrom` -> persistent mode (`persist = true`).\n- `SessionManager.inMemory` -> non-persistent mode (`persist = false`) with `MemorySessionStorage`.\n\n### Write pipeline\n\nWrites are serialized through an internal promise chain (`#persistChain`) and `NdjsonFileWriter`.\n\n- `append*` updates in-memory state immediately.\n- Persistence is deferred until at least one assistant message exists.\n - Before first assistant: entries are retained in memory; no file append occurs.\n - When first assistant exists: full in-memory session is flushed to file.\n - Afterwards: new entries append incrementally.\n\nRationale in code: avoid persisting sessions that never produced an assistant response.\n\n### Durability operations\n\n- `flush()` flushes writer and calls `fsync()`.\n- Atomic full rewrites (`#rewriteFile`) write to temp file, flush+fsync, close, then rename over target.\n- Used for migrations, `setSessionName`, `rewriteEntries`, move operations, and tool-call arg rewrites.\n\n### Error behavior\n\n- Persistence errors are latched (`#persistError`) and rethrown on subsequent operations.\n- First error is logged once with session file context.\n- Writer close is best-effort but propagates the first meaningful error.\n\n## Data Size Controls and Blob Externalization\n\nBefore persisting entries:\n\n- Large strings are truncated to `MAX_PERSIST_CHARS` (500,000 chars) with notice:\n - `\"[Session persistence truncated large content]\"`\n- Transient fields `partialJson` and `jsonlEvents` are removed.\n- If object has both `content` and `lineCount`, line count is recomputed after truncation.\n- Image blocks in `content` arrays with base64 length >= 1024 are externalized to blob refs:\n - stored as `blob:sha256:<hash>`\n - raw bytes written to blob store (`BlobStore.put`)\n\nOn load, blob refs are resolved back to base64 for message/custom_message image blocks.\n\n## Storage Abstractions\n\n`SessionStorage` interface provides all filesystem operations used by `SessionManager`:\n\n- sync: `ensureDirSync`, `existsSync`, `writeTextSync`, `statSync`, `listFilesSync`\n- async: `exists`, `readText`, `readTextPrefix`, `writeText`, `rename`, `unlink`, `openWriter`\n\nImplementations:\n\n- `FileSessionStorage`: real filesystem (Bun + node fs)\n- `MemorySessionStorage`: map-backed in-memory implementation for tests/non-persistent sessions\n\n`SessionStorageWriter` exposes `writeLine`, `flush`, `fsync`, `close`, `getError`.\n\n## Session Discovery Utilities\n\nDefined in `session-manager.ts`:\n\n- `getRecentSessions(sessionDir, limit)` -> lightweight metadata for UI/session picker\n- `findMostRecentSession(sessionDir)` -> newest by mtime\n- `list(cwd, sessionDir?)` -> sessions in one project scope\n- `listAll()` -> sessions across all project scopes under `~/.gjc/agent/sessions`\n\nMetadata extraction reads only a prefix (`readTextPrefix(..., 4096)`) where possible.\n\n## Related but Distinct: Prompt History Storage\n\n`HistoryStorage` (`history-storage.ts`) is a separate SQLite subsystem for prompt recall/search, not session replay.\n\n- DB: `~/.gjc/agent/history.db`\n- Table: `history(id, prompt, created_at, cwd)`\n- FTS5 index: `history_fts` with trigger-maintained sync\n- Deduplicates consecutive identical prompts using in-memory last-prompt cache\n- Async insertion (`setImmediate`) so prompt capture does not block turn execution\n\nUse session files for conversation graph/state replay; use `HistoryStorage` for prompt history UX.\n",
|
|
65
|
+
"standalone-mcp.md": "# Standalone GJC MCP support\n\nThis page answers the common user question: “Does normal `gjc` inherit my Claude Code/Codex MCP servers, or can I configure MCP servers directly for the standalone TUI?”\n\n## Short answer\n\nNormal standalone GJC (`gjc`, `gjc --tmux`, and print-mode prompts) does **not** inherit MCP servers from Claude Code, Codex, Cursor, Gemini, Windsurf, or other tools as a public startup contract.\n\nThere is also no supported user-facing `~/.gjc/agent/config.yml` setting that makes the normal standalone TUI automatically load arbitrary MCP servers for the model. Historical/internal MCP discovery code and `.mcp.json` shapes exist in the repository, but they are not the supported public integration boundary for ordinary standalone sessions.\n\nIf you rely on a specific MCP server, run it through one of the supported integration surfaces below instead of assuming another tool's MCP config will be imported into standalone GJC.\n\n## What is supported today\n\n| Need | Use | Notes |\n| --- | --- | --- |\n| External bot or multi-session controller wants to drive GJC | [Coordinator MCP](./hermes-mcp-bridge.md) via `gjc mcp-serve coordinator` | GJC exposes an **outward** MCP server with GJC coordinator tools. This is not a way to import arbitrary MCP tools into the standalone TUI. |\n| Editor/ACP client owns MCP servers and wants GJC as the agent backend | [ACP mode](./external-control-readiness.md#acp-mode) via `gjc --mode acp` or `gjc acp` | The ACP client supplies and owns MCP servers. GJC keeps those client-owned MCP tools isolated from standalone on-disk discovery. |\n| Host application already manages MCP servers and policies | [RPC host tools](./rpc.md#host-tool-sub-protocol) via `gjc --mode rpc` | Convert the selected MCP capabilities into host-owned RPC tools. The host executes the MCP call and returns `host_tool_result`. |\n| OpenClaw/Hermes-style host wants to map its own MCP/skills into GJC | [OpenClaw / Hermes RPC integration notes](./openclaw-hermes-rpc-integration.md) | Treat MCP as a host implementation detail and expose only policy-approved capabilities as RPC host tools. |\n\n## What standalone GJC does not do\n\nStandalone GJC does **not** currently promise any of these behaviors:\n\n- reading Claude Code's global MCP server list and automatically enabling it;\n- reading Codex MCP server config as an inherited runtime contract;\n- merging multiple tools' MCP configs into the normal TUI at startup;\n- making `.mcp.json`, `mcp.json`, `.codex/config.toml`, or other discovered files a stable public standalone-TUI config API;\n- exposing Coordinator MCP tools as ordinary in-session model tools.\n\nThis boundary is intentional: MCP servers often carry credentials, local filesystem reach, browser/session state, approval semantics, and tool names that belong to the host that configured them. Blind inheritance would mix policies between products and make it unclear which process owns credentials, approvals, sandboxing, and lifecycle.\n\n## Recommended workaround for a specific MCP server\n\nIf you need a context engine, internal search server, browser MCP, database MCP, or another custom MCP inside GJC:\n\n1. Keep the MCP server configured in the host that owns its credentials and policy.\n2. Start GJC through RPC (`gjc --mode rpc`) from that host.\n3. Register a narrow host-owned tool with `set_host_tools` / `RpcClient#setCustomTools()`.\n4. Have the host tool call the real MCP server and return the result to GJC as `host_tool_result`.\n\nThat shape keeps the MCP server's auth, approvals, filesystem access, and process lifetime with the host while still letting the GJC model request the capability when needed.\n\nFor multi-session orchestration, prefer Coordinator MCP instead. Coordinator MCP lets an external controller start/register sessions, send turns, answer questions, read artifacts, and write durable status reports; it does not import arbitrary MCP servers into a standalone TUI session.\n\n## Related docs\n\n- [Coordinator MCP bridge](./hermes-mcp-bridge.md)\n- [External control surface readiness](./external-control-readiness.md)\n- [RPC Protocol Reference](./rpc.md)\n- [OpenClaw / Hermes RPC integration notes](./openclaw-hermes-rpc-integration.md)\n- [Clawhip-routed GJC sessions](./gjc-session-clawhip-routing.md)\n",
|
|
64
66
|
"theme.md": "# Theming Reference\n\nThis document describes how theming works in the coding-agent today: schema, loading, runtime behavior, and failure modes.\n\n## What the theme system controls\n\nThe theme system drives:\n\n- foreground/background color tokens used across the TUI\n- markdown styling adapters (`getMarkdownTheme()`)\n- selector/editor/settings list adapters (`getSelectListTheme()`, `getEditorTheme()`, `getSettingsListTheme()`)\n- symbol preset + symbol overrides (`unicode`, `nerd`, `ascii`)\n- syntax highlighting colors used by native highlighter (`@gajae-code/natives`)\n- status line segment colors\n\nPrimary implementation: `src/modes/theme/theme.ts`.\n\n## Theme JSON shape\n\nTheme files are JSON objects validated against the runtime schema in `theme.ts` (`ThemeJsonSchema`) and mirrored by `src/modes/theme/theme-schema.json`.\n\nTop-level fields:\n\n- `name` (required)\n- `colors` (required; all color tokens required)\n- `vars` (optional; reusable color variables)\n- `export` (optional; HTML export colors)\n- `symbols` (optional)\n - `preset` (optional: `unicode | nerd | ascii`)\n - `overrides` (optional: key/value overrides for `SymbolKey`)\n\nColor values accept:\n\n- hex string (`\"#RRGGBB\"`)\n- 256-color index (`0..255`)\n- variable reference string (resolved through `vars`)\n- empty string (`\"\"`) meaning terminal default (`\\x1b[39m` fg, `\\x1b[49m` bg)\n\n## Required color tokens (current)\n\nAll tokens below are required in `colors`.\n\n### Core text and borders (11)\n\n`accent`, `border`, `borderAccent`, `borderMuted`, `success`, `error`, `warning`, `muted`, `dim`, `text`, `thinkingText`\n\n### Background blocks (7)\n\n`selectedBg`, `userMessageBg`, `customMessageBg`, `toolPendingBg`, `toolSuccessBg`, `toolErrorBg`, `statusLineBg`\n\n### Message/tool text (5)\n\n`userMessageText`, `customMessageText`, `customMessageLabel`, `toolTitle`, `toolOutput`\n\n### Markdown (10)\n\n`mdHeading`, `mdLink`, `mdLinkUrl`, `mdCode`, `mdCodeBlock`, `mdCodeBlockBorder`, `mdQuote`, `mdQuoteBorder`, `mdHr`, `mdListBullet`\n\n### Tool diff + syntax highlighting (12)\n\n`toolDiffAdded`, `toolDiffRemoved`, `toolDiffContext`,\n`syntaxComment`, `syntaxKeyword`, `syntaxFunction`, `syntaxVariable`, `syntaxString`, `syntaxNumber`, `syntaxType`, `syntaxOperator`, `syntaxPunctuation`\n\n### Mode/thinking borders (8)\n\n`thinkingOff`, `thinkingMinimal`, `thinkingLow`, `thinkingMedium`, `thinkingHigh`, `thinkingXhigh`, `bashMode`, `pythonMode`\n\n### Status line segment colors (14)\n\n`statusLineSep`, `statusLineModel`, `statusLinePath`, `statusLineGitClean`, `statusLineGitDirty`, `statusLineContext`, `statusLineSpend`, `statusLineStaged`, `statusLineDirty`, `statusLineUntracked`, `statusLineOutput`, `statusLineCost`, `statusLineSubagents`\n\n## Optional tokens\n\n### `export` section (optional)\n\nUsed for HTML export theming helpers:\n\n- `export.pageBg`\n- `export.cardBg`\n- `export.infoBg`\n\nIf omitted, export code derives defaults from resolved theme colors.\n\n### `symbols` section (optional)\n\n- `symbols.preset` sets a theme-level default symbol set.\n- `symbols.overrides` can override individual `SymbolKey` values.\n\nRuntime precedence:\n\n1. settings `symbolPreset` override (if set)\n2. theme JSON `symbols.preset`\n3. fallback `\"unicode\"`\n\nInvalid override keys are ignored and logged (`logger.debug`).\n\n## Built-in vs custom theme sources\n\nTheme lookup order (`loadThemeJson`):\n\n1. built-in embedded themes (`red-claw.json`, `blue-crab.json`, `claude-code.json`, `codex.json`, and `opencode.json` compiled into `defaultThemes`)\n2. custom theme file: `<customThemesDir>/<name>.json`\n\nCustom themes directory comes from `getCustomThemesDir()`:\n\n- default: `~/.gjc/agent/themes`\n- overridden by `GJC_CODING_AGENT_DIR` (`$GJC_CODING_AGENT_DIR/themes`)\n\n`getAvailableThemes()` returns merged built-in + custom names, sorted, with built-ins taking precedence on name collision.\n\n## Loading, validation, and resolution\n\nFor custom theme files:\n\n1. read JSON\n2. parse JSON\n3. validate against `ThemeJsonSchema`\n4. resolve `vars` references recursively\n5. convert resolved values to ANSI by terminal capability mode\n\nValidation behavior:\n\n- missing required color tokens: explicit grouped error message\n- bad token types/values: validation errors with JSON path\n- unknown theme file: `Theme not found: <name>`\n\nVar reference behavior:\n\n- supports nested references\n- throws on missing variable reference\n- throws on circular references\n\n## Terminal color mode behavior\n\nColor mode detection (`detectColorMode`):\n\n- `COLORTERM=truecolor|24bit` => truecolor\n- `WT_SESSION` => truecolor\n- `TERM` in `dumb`, `linux`, or empty => 256color\n- otherwise => truecolor\n\nConversion behavior:\n\n- hex -> `Bun.color(..., \"ansi-16m\" | \"ansi-256\")`\n- numeric -> `38;5` / `48;5` ANSI\n- `\"\"` -> default fg/bg reset\n\n## Runtime switching behavior\n\n### Initial theme (`initTheme`)\n\n`main.ts` initializes theme with settings:\n\n- `symbolPreset`\n- `colorBlindMode`\n- `theme.dark`\n- `theme.light`\n\nAuto theme slot selection uses terminal appearance in this order:\n\n1. terminal-reported OSC 11 background luminance, unless the macOS/Zellij fallback path is active\n2. `COLORFGBG` background index (`< 8` => dark, `>= 8` => light)\n3. macOS appearance fallback only for the known-broken macOS/Zellij OSC 11 path\n4. dark slot fallback\n\nBuilt-in theme note: `red-claw` is the default dark GJC theme, and `blue-crab` is the default light-slot theme. Both are crustacean brand themes with separate semantic error/warning/diff-removal tokens and crab-oriented symbol overrides. Three additional bundled migration themes — `claude-code`, `codex`, and `opencode` — mirror the look of those tools for easy eye-migration. All three are dark-classified and recommended for `theme.dark`, but are selectable in either slot; they keep GJC's default symbol identity (no crab-symbol overrides).\n\nCurrent defaults from settings schema:\n\n- `theme.dark = \"red-claw\"`\n- `theme.light = \"blue-crab\"`\n- `symbolPreset = \"unicode\"`\n- `colorBlindMode = false`\n\n### Explicit switching (`setTheme`)\n\n- loads selected theme\n- updates global `theme` singleton\n- optionally starts watcher\n- triggers `onThemeChange` callback\n\nOn failure:\n\n- falls back to built-in `dark`\n- returns `{ success: false, error }`\n\n### Preview switching (`previewTheme`)\n\n- applies temporary preview theme to global `theme`\n- does **not** change persisted settings by itself\n- returns success/error without fallback replacement\n\nThe settings theme picker is confirm-only; arrow-key browsing does not call `previewTheme`, so the rendered theme and displayed/persisted theme name stay aligned until Enter confirms a new selection.\n\n## Watchers and live reload\n\nWhen watcher is enabled (`setTheme(..., true)` / interactive init):\n\n- watches `<customThemesDir>/<currentTheme>.json` only when that file exists\n- built-ins are effectively not watched; built-in theme lookup also takes precedence over same-name custom files\n- matching file changes schedule a debounced reload; reload errors or temporary file absence keep the last successfully loaded theme\n- the watcher does not perform a delete/rename fallback; it waits for a future successful reload or explicit theme switch\n\nAuto mode also reevaluates dark/light slot mapping from terminal appearance changes, `SIGWINCH`, and the macOS fallback observer when active.\n\n## Color-blind mode behavior\n\n`colorBlindMode` changes only one token at runtime:\n\n- `toolDiffAdded` is HSV-adjusted (green shifted toward blue)\n- adjustment is applied only when resolved value is a hex string\n\nOther tokens are unchanged.\n\n## Where theme settings are persisted\n\nTheme-related settings are persisted by `Settings` to global config YAML:\n\n- path: `<agentDir>/config.yml`\n- default agent dir: `~/.gjc/agent`\n- effective default file: `~/.gjc/agent/config.yml`\n\nPersisted keys:\n\n- `theme.dark`\n- `theme.light`\n- `symbolPreset`\n- `colorBlindMode`\n\nLegacy migration exists: old flat `theme: \"name\"` is migrated to nested `theme.dark` or `theme.light` based on luminance detection; legacy built-in names `dark`/`light` map to `red-claw`/`blue-crab` unless matching custom theme files exist.\n\n## Creating a custom theme (practical)\n\n1. Create file in custom themes dir, e.g. `~/.gjc/agent/themes/my-theme.json`.\n2. Include `name`, optional `vars`, and **all required** `colors` tokens.\n3. Optionally include `symbols` and `export`.\n4. Select the theme in Settings (`Display -> Dark theme` or `Display -> Light theme`) depending on which auto slot you want. All bundled themes are selectable: the crustacean defaults `red-claw` and `blue-crab`, plus the migration themes `claude-code`, `codex`, and `opencode` (dark-classified, recommended for the dark slot but selectable in either).\n\nMinimal skeleton:\n\n```json\n{\n \"name\": \"my-theme\",\n \"vars\": {\n \"accent\": \"#7aa2f7\",\n \"muted\": 244\n },\n \"colors\": {\n \"accent\": \"accent\",\n \"border\": \"#4c566a\",\n \"borderAccent\": \"accent\",\n \"borderMuted\": \"muted\",\n \"success\": \"#9ece6a\",\n \"error\": \"#f7768e\",\n \"warning\": \"#e0af68\",\n \"muted\": \"muted\",\n \"dim\": 240,\n \"text\": \"\",\n \"thinkingText\": \"muted\",\n\n \"selectedBg\": \"#2a2f45\",\n \"userMessageBg\": \"#1f2335\",\n \"userMessageText\": \"\",\n \"customMessageBg\": \"#24283b\",\n \"customMessageText\": \"\",\n \"customMessageLabel\": \"accent\",\n \"toolPendingBg\": \"#1f2335\",\n \"toolSuccessBg\": \"#1f2d2a\",\n \"toolErrorBg\": \"#2d1f2a\",\n \"toolTitle\": \"\",\n \"toolOutput\": \"muted\",\n\n \"mdHeading\": \"accent\",\n \"mdLink\": \"accent\",\n \"mdLinkUrl\": \"muted\",\n \"mdCode\": \"#c0caf5\",\n \"mdCodeBlock\": \"#c0caf5\",\n \"mdCodeBlockBorder\": \"muted\",\n \"mdQuote\": \"muted\",\n \"mdQuoteBorder\": \"muted\",\n \"mdHr\": \"muted\",\n \"mdListBullet\": \"accent\",\n\n \"toolDiffAdded\": \"#9ece6a\",\n \"toolDiffRemoved\": \"#f7768e\",\n \"toolDiffContext\": \"muted\",\n\n \"syntaxComment\": \"#565f89\",\n \"syntaxKeyword\": \"#bb9af7\",\n \"syntaxFunction\": \"#7aa2f7\",\n \"syntaxVariable\": \"#c0caf5\",\n \"syntaxString\": \"#9ece6a\",\n \"syntaxNumber\": \"#ff9e64\",\n \"syntaxType\": \"#2ac3de\",\n \"syntaxOperator\": \"#89ddff\",\n \"syntaxPunctuation\": \"#9aa5ce\",\n\n \"thinkingOff\": 240,\n \"thinkingMinimal\": 244,\n \"thinkingLow\": \"#7aa2f7\",\n \"thinkingMedium\": \"#2ac3de\",\n \"thinkingHigh\": \"#bb9af7\",\n \"thinkingXhigh\": \"#f7768e\",\n\n \"bashMode\": \"#2ac3de\",\n \"pythonMode\": \"#bb9af7\",\n\n \"statusLineBg\": \"#16161e\",\n \"statusLineSep\": 240,\n \"statusLineModel\": \"#bb9af7\",\n \"statusLinePath\": \"#7aa2f7\",\n \"statusLineGitClean\": \"#9ece6a\",\n \"statusLineGitDirty\": \"#e0af68\",\n \"statusLineContext\": \"#2ac3de\",\n \"statusLineSpend\": \"#7dcfff\",\n \"statusLineStaged\": \"#9ece6a\",\n \"statusLineDirty\": \"#e0af68\",\n \"statusLineUntracked\": \"#f7768e\",\n \"statusLineOutput\": \"#c0caf5\",\n \"statusLineCost\": \"#ff9e64\",\n \"statusLineSubagents\": \"#bb9af7\"\n }\n}\n```\n\n## Testing custom themes\n\nUse this workflow:\n\n1. Start interactive mode (watcher enabled from startup).\n2. Open settings and confirm the custom theme in the dark/light theme picker; arrow-key browsing is intentionally non-mutating.\n3. For custom theme files, edit the JSON while running and confirm auto-reload on save.\n4. Exercise critical surfaces:\n - markdown rendering\n - tool blocks (pending/success/error)\n - diff rendering (added/removed/context)\n - status line readability\n - thinking level border changes\n - bash/python mode border colors\n5. Validate both symbol presets if your theme depends on glyph width/appearance.\n\n## Real constraints and caveats\n\n- All `colors` tokens are required for custom themes.\n- `export` and `symbols` are optional.\n- `$schema` in theme JSON is informational; runtime validation is enforced by a Zod schema in code.\n- `setTheme` failure falls back to `dark`; `previewTheme` failure does not replace current theme.\n- File watcher reload errors or temporary missing files keep the current loaded theme until a successful reload or explicit theme switch.\n",
|
|
65
67
|
"tools/ask.md": "# ask\n\n> Prompts the interactive user for one or more choices or free-form answers.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ask.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ask.md`\n- Key collaborators:\n - `packages/coding-agent/src/config/settings-schema.ts` — `ask.timeout` / `ask.notify` defaults\n - `packages/coding-agent/src/modes/theme/theme.ts` — checkbox and tree glyphs for TUI rendering\n - `packages/coding-agent/src/tui.ts` — status-line rendering\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `questions` | `Question[]` | Yes | One or more questions. Empty arrays are rejected by schema and also guarded at runtime. |\n\n### `Question`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `id` | `string` | Yes | Stable identifier used in multi-question results. |\n| `question` | `string` | Yes | Prompt text shown to the user. |\n| `options` | `{ label: string }[]` | Yes | Explicit options. The UI always appends `Other (type your own)`; callers must not include it. |\n| `multi` | `boolean` | No | Enables multi-select mode. Default: `false`. |\n| `recommended` | `number` | No | Zero-based recommended option index. In single-select mode the label gets ` (Recommended)` appended in the UI. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is plain text:\n - single question: `User selected: ...` and/or `User provided custom input: ...`\n - multiple questions: `User answers:` followed by one line per `id`\n- `details`:\n - single question: `{ question, options, multi, selectedOptions, customInput? }`\n - multiple questions: `{ results: QuestionResult[] }`, where each item includes `id`, `question`, `options`, `multi`, `selectedOptions`, and optional `customInput`\n- Cancellation and headless cases throw instead of returning a structured success result.\n\n## Flow\n1. `AskTool.createIf()` only registers the tool when `session.hasUI` is true; headless sessions never get it.\n2. `execute()` requires `context.ui`; if missing it aborts the context and throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n3. It reads `ask.timeout` from settings, converts seconds to milliseconds, and disables timeout entirely while plan mode is enabled (`packages/coding-agent/src/tools/ask.ts`).\n4. If `ask.notify` is not `off`, it sends a terminal notification: `Waiting for input`.\n5. For each question, `askSingleQuestion()` drives either:\n - single-select list + optional editor for `Other`\n - multi-select checkbox loop + `Done selecting` sentinel + optional editor for `Other`\n6. In multi-question mode, left/right arrow handlers enable back/forward navigation between questions and preserve prior selections.\n7. If a timeout fires before any selection/custom input, the tool auto-selects the recommended option, or the first option when no valid `recommended` index exists.\n8. If the user cancels without timeout, `execute()` aborts the tool context and throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n9. On success it formats human-readable text plus structured `details`; the TUI renderer uses `details` for rich display.\n\n## Modes / Variants\n- Single question: returns flattened `details` fields for one question.\n- Multiple questions: returns `details.results[]` and allows back/forward navigation across questions.\n- Single-select: one option or custom input.\n- Multi-select: toggled checkbox list, `Done selecting` sentinel only when forward navigation is not active.\n\n## Side Effects\n- User-visible prompts / interactive UI\n - Opens a selection dialog via `context.ui.select(...)`.\n - Opens a text editor dialog via `context.ui.editor(...)` for `Other`.\n - Sends a terminal notification unless `ask.notify=off`.\n- Session state\n - Reads plan-mode state to disable timeouts.\n - Calls `context.abort()` on headless use or user cancellation.\n- Background work / cancellation\n - Wraps UI waits in `untilAborted(...)` so abort signals interrupt pending dialogs.\n\n## Limits & Caps\n- `questions` must contain at least 1 item (`askSchema` in `packages/coding-agent/src/tools/ask.ts`).\n- `ask.timeout` default is `30` seconds; `0` disables timeout (`packages/coding-agent/src/config/settings-schema.ts`).\n- Prompt guidance says provide 2-5 options, but code does not enforce that (`packages/coding-agent/src/prompts/tools/ask.md`).\n- Timeout only applies to the option picker; once the user chooses `Other`, the editor has no timeout (`packages/coding-agent/src/prompts/tools/ask.md`).\n\n## Errors\n- Missing interactive UI: throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n- User cancels picker/editor without timeout: throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n- Abort signal during input: converted to `ToolAbortError(\"Ask input was cancelled\")`.\n- Empty `questions` at runtime returns a text error payload instead of throwing: `Error: questions must not be empty`.\n\n## Notes\n- `recommended` is only a UI hint; invalid indexes are ignored.\n- In single-select mode the returned `selectedOptions` value strips the appended ` (Recommended)` suffix.\n- Multi-select results preserve selection order by `Set` insertion order, not original option order after arbitrary toggles.\n- Option labels and prompt text are returned verbatim in `details`; the tool does not interpret them beyond UI affordances like `Other` and ` (Recommended)`.\n",
|
|
66
68
|
"tools/ast-edit.md": "# ast_edit\n\n> Preview and apply structural rewrites over source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-edit.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-edit.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native rewrite planning and file mutation\n - `crates/pi-natives/src/language/mod.rs` — language aliases and extension inference\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/resolve.ts` — preview/apply queueing\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number diff references\n - `packages/coding-agent/src/hashline/hash.ts` — stable hashline diff anchors\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `{ pat: string; out: string }[]` | Yes | One or more rewrite rules. `pat` must be non-empty. Duplicate `pat` values fail before native execution. Empty `out` deletes the matched node. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n\nShared AST pattern grammar and language catalog: see [`ast_grep`](./ast-grep.md#inputs).\n\n- `ast_edit` uses the same `$NAME`, `$_`, `$$$NAME`, and `$$$` metavariable semantics.\n- The tool prompt adds rewrite-specific constraints:\n - metavariable names must be uppercase and must stand for whole AST nodes,\n - captures from `pat` are substituted into `out`,\n - each rewrite is a 1:1 structural substitution; one capture cannot expand into multiple sibling nodes unless the grammar itself permits that expansion at that position.\n\n## Outputs\n- Single-shot preview result from `ast_edit` itself.\n- Model-facing `content` is one text block showing proposed edits, grouped by file for directory/multi-file runs.\n - Each change renders as two lines: `-REF|before` and `+REF|after` in hashline mode, or `-LINE:COLUMN before` / `+LINE:COLUMN after` when hashlines are off.\n - Only the first line of each `before`/`after` snippet is shown, truncated to 120 characters in the wrapper.\n - `Limit reached; narrow paths.` and formatted parse issues are appended when applicable.\n- If no rewrites match, text is `No replacements made` plus formatted parse issues when present.\n- `details` includes aggregate preview metadata:\n - `totalReplacements`, `filesTouched`, `filesSearched`, `applied`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileReplacements`, `displayContent`, `meta`\n- The tool always previews first (`applied: false` in the direct result). Actual file writes happen only later through `resolve(action: \"apply\", ...)`.\n- When preview produced replacements, `ast_edit` also queues a pending `resolve` action. Successful apply returns a separate `resolve` result, not another `ast_edit` result.\n\n## Flow\n1. `AstEditTool.execute()` validates each op in `packages/coding-agent/src/tools/ast-edit.ts`:\n - empty `pat` fails,\n - at least one op is required,\n - duplicate `pat` values fail,\n - ops are converted to a `Record<pattern, replacement>`.\n2. The wrapper reads `GJC_MAX_AST_FILES` via `$envpos(..., 1000)` and uses that as the native `maxFiles` cap for both preview and apply.\n3. Path normalization, internal URL handling, missing-path partitioning, and multi-path resolution follow the same `path-utils.ts` flow as `ast_grep`.\n4. The wrapper stats the resolved base path to decide whether to render grouped directory output.\n5. `runAstEditOnce(...)` always runs native `astEdit(...)` with `dryRun: true` and `failOnParseError: false` on the first pass.\n6. Native `ast_edit` in `crates/pi-natives/src/ast.rs`:\n - normalizes the rewrite map and sorts rules by pattern string,\n - resolves strictness (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers a single language for the whole call unless `lang` was supplied,\n - compiles every rewrite pattern for that language,\n - parses each file, skips files with syntax-error trees, collects `replace_by(...)` edits for every match, enforces replacement and file caps, and returns textual before/after slices plus source ranges.\n7. The TS wrapper deduplicates parse errors, groups changes by file, and renders preview diff lines.\n8. If preview found replacements and `applied` is false, `queueResolveHandler(...)` registers a forced `resolve` action and injects a `resolve-reminder` steering message.\n9. On `resolve(action: \"apply\")`, the queued callback reruns the same rewrite set with `dryRun: false`, recomputes counts, and rejects the apply as an error if the live result no longer matches the preview (`stalePreview`).\n10. On a non-stale apply, the callback returns `Applied N replacements in M files.`; on discard, `resolve` returns a discard message without mutating files.\n\n## Modes / Variants\n- Single file: preview or apply against one file.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router resolves them to a backing file path.\n- Preview mode: always the direct `ast_edit` tool result.\n- Apply mode: only reachable through the queued `resolve` callback after a preview.\n- Hashline output mode vs plain line/column mode: controlled by `resolveFileDisplayMode()`.\n\n## Side Effects\n- Filesystem\n - Preview reads files and scans directories.\n - Apply rewrites files in place with `std::fs::write(...)`, but only when the computed output differs from the original source.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Queues a one-shot forced `resolve` tool choice through `queueResolveHandler(...)`.\n - Adds a `resolve-reminder` steering message.\n- User-visible prompts / interactive UI\n - Direct `ast_edit` results are previews.\n - Follow-up apply/discard is exposed through the hidden `resolve` tool.\n- Background work / cancellation\n - Native preview/apply work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- File cap exposed by the wrapper: `GJC_MAX_AST_FILES`, default `1000`, in `packages/coding-agent/src/tools/ast-edit.ts`.\n- Native `maxFiles` and `maxReplacements` are both clamped to at least `1` when provided in `crates/pi-natives/src/ast.rs`.\n- The wrapper never sets `maxReplacements`; native behavior therefore defaults to effectively unbounded replacements for a run.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` is deduplicated but not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No separate glob-expansion count cap exists. Candidate count is whatever the resolved path/glob expands to after gitignore filtering, then native `maxFiles` stops mutations after the configured number of touched files.\n- Preview text truncates each rendered `before` and `after` first line to 120 characters in `packages/coding-agent/src/tools/ast-edit.ts`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, duplicate rewrite patterns, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - inability to infer one language across all candidates when `lang` is absent,\n - unsupported explicit `lang`,\n - bad glob compilation or unreadable search roots,\n - overlapping computed edits (`Overlapping replacements detected; refine pattern to avoid ambiguous edits`),\n - out-of-bounds edit ranges or non-UTF-8 replacement text,\n - write failures during apply,\n - cancellation or timeout.\n- With `failOnParseError: false` (the wrapper always uses this), pattern compile failures and file parse failures become `parseErrors` instead of aborting the whole run.\n- If every rewrite pattern fails to compile, native `ast_edit` returns a successful zero-replacement result with `parseErrors` populated.\n- Files containing tree-sitter error nodes are skipped for rewriting; they do not get partial edits.\n- Apply can fail after a successful preview if the preview becomes stale. The resolve callback compares replacement totals and per-file counts and returns an error result rather than applying a mismatched preview silently.\n\n## Notes\n- `ast_edit` does not expose the native `lang`, `strictness`, `selector`, `maxReplacements`, `failOnParseError`, or `timeoutMs` fields to the model. The runtime fixes the call shape to a preview-first, smart-strictness, best-effort parse mode.\n- Because the wrapper does not expose `lang`, mixed-language rewrites only succeed when every candidate infers to the same canonical language. This is stricter than `ast_grep`.\n- Idempotency is not enforced syntactically. A rewrite like `foo($A) -> foo($A)` previews zero changes because output equals input; a rewrite that keeps matching its own output may still produce replacements on repeated calls.\n- Rewrites are accumulated per file, then applied from the end of the file backward after an overlap check. Independent matches can coexist; overlapping matches abort the run.\n- Native rewrite rule order is by pattern-string sort, not by the original `ops` array order, because `normalize_rewrite_map(...)` sorts the `(pattern, rewrite)` pairs.\n- Preview/apply parity is validated only by totals and per-file counts, not by a byte-for-byte diff of every replacement payload.",
|
|
@@ -89,7 +91,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
89
91
|
"tools/search.md": "# search\n\n> Search file contents with a regex across files, directories, globs, and internal URLs.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/search.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/search.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/match-line-format.ts` — model-facing anchor formatting.\n - `packages/coding-agent/src/tools/path-utils.ts` — path normalization, glob splitting, internal URL resolution.\n - `packages/coding-agent/src/tools/file-recorder.ts` — file ordering for grouped output.\n - `packages/coding-agent/src/tools/grouped-file-output.ts` — grouped per-file text layout.\n - `packages/coding-agent/src/session/streaming-output.ts` — line truncation and final byte truncation.\n - `packages/coding-agent/src/config/settings-schema.ts` — default context lines.\n - `packages/natives/native/index.d.ts` — native `grep()` types exposed to TS.\n - `crates/pi-natives/src/grep.rs` — native regex/file search implementation.\n - `docs/natives-text-search-pipeline.md` — native search pipeline overview.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `pattern` | `string` | Yes | Regex pattern. `search.ts` trims it and rejects empty input. The native matcher enables multiline only when the pattern text contains a literal newline or the two-character sequence `\\\\n`. The model prompt explicitly documents literal-brace escaping such as ``interface\\\\{\\\\}``, although the native layer also auto-escapes braces that cannot be valid repetition quantifiers. |\n| `paths` | `string[]` | Yes | One or more file paths, directory paths, glob-like paths, or internal URLs. Empty strings are rejected after trimming/quote stripping. Internal URLs must resolve to a backing file and cannot contain glob characters. |\n| `i` | `boolean` | No | Case-insensitive search. Defaults to `false`. Passed to native `ignoreCase`. |\n| `gitignore` | `boolean` | No | Respect `.gitignore` during directory scans. Defaults to `true`. Passed to native `gitignore`. |\n| `skip` | `number` | No | Global match offset. Defaults to `0`. `search.ts` floors finite numbers and rejects negative or non-finite values. |\n\n## Outputs\nThe tool returns a single text block in `content[0].text` plus structured `details`.\n\n- Match lines are formatted by `formatMatchLine()` as `*<anchor>|<line>` for matches and ` <anchor>|<line>` for context.\n - Hashline mode: `*5th|content`, ` 9x}|content`.\n - Plain mode: `*5|content`, ` 9|content`.\n- Directory results are grouped by file, with `# <path>` headings and blank lines between groups.\n- `details` may include:\n - `scopePath` — formatted search scope.\n - `matchCount`, `fileCount`, `files`, `fileMatches` — counts for the returned page, not necessarily total corpus counts.\n - `matchLimitReached` — visible-page limit hit (`100`).\n - `resultLimitReached` — native preselection limit hit (`500`).\n - `linesTruncated` — one or more matched lines were shortened to `1024` chars plus `…`.\n - `truncated` and `meta.truncation` — final text output was head-truncated by `truncateHead()`.\n - `displayContent` — TUI-only rendering text with `│` gutters instead of model anchors.\n - `missingPaths` — multi-path entries skipped because their base path did not exist.\n- No-match result text is `No matches found`, optionally followed by `Skipped missing paths: ...`.\n\n## Flow\n1. `SearchTool.execute()` validates and normalizes input in `packages/coding-agent/src/tools/search.ts`:\n - trims `pattern`, rejects empty patterns;\n - normalizes `skip` to a non-negative integer;\n - reads `search.contextBefore` and `search.contextAfter` from session settings (`1` and `3` by default);\n - enables multiline only when `pattern` contains `\\n` or an actual newline.\n2. Each `paths` entry is normalized with `normalizePathLikeInput()`.\n3. Internal URLs are resolved through `session.internalRouter`:\n - glob metacharacters (`*`, `?`, `[`, `{`) are rejected for internal URLs;\n - URLs without `resource.sourcePath` fail;\n - immutable sources are tracked so output can suppress editable hashline anchors per file.\n4. For multi-path calls, `partitionExistingPaths()` skips only ENOENT entries. If every entry is missing, the tool errors.\n5. Path resolution branches:\n - one entry: `parseSearchPath()` splits `basePath` and optional glob;\n - multiple entries: `resolveExplicitSearchPaths()` computes a common base directory, brace-union glob, exact-file list, or degenerate-root target list.\n6. `search.ts` stats the resolved base path to decide file vs directory behavior.\n7. It calls native `grep()` from `@gajae-code/natives` with:\n - `pattern`, `ignoreCase`, `multiline`, `gitignore`;\n - `hidden: true`;\n - `cache: false`;\n - `contextBefore` / `contextAfter` from settings;\n - `maxColumns: 1024`;\n - `mode: content`.\n8. Native execution happens in `crates/pi-natives/src/grep.rs`:\n - `build_matcher()` sanitizes non-quantifier braces before regex compile;\n - if compile fails with unopened/unclosed-group errors, it retries after escaping previously unescaped parentheses;\n - directory scans use the grep pipeline described in `docs/natives-text-search-pipeline.md`.\n9. Search dispatch differs by resolved path set:\n - exact explicit files or degenerate-root multi-targets: JS loops over targets and merges `grep()` results itself;\n - single file/directory base: one `grep()` call handles offset/limit natively.\n10. JS output shaping then:\n - round-robins directory matches down to `100` visible matches so one file does not monopolize the page;\n - keeps the first `100` file matches for single-file searches;\n - formats lines through `formatMatchLine()` for the model and `formatCodeFrameLine()` for TUI;\n - records non-truncated matched/context lines into the session file-read cache with `recordSparse()`.\n11. Final text is passed through `truncateHead(rawOutput, { maxLines: Number.MAX_SAFE_INTEGER })`, so the effective cap is the default byte cap from `streaming-output.ts`, not the default line cap.\n12. `toolResult()` attaches text plus limit/truncation metadata.\n\n## Modes / Variants\n1. **Single file path**\n - `grep()` searches one file.\n - Output is a flat list of match/context lines.\n - Visible limit is the first `100` matches after native offset handling.\n2. **Single directory path or single glob-like path**\n - `parseSearchPath()` may split the input into `path` + `glob`.\n - One native `grep()` scans the directory tree with `gitignore` and `hidden:true`.\n - Native `offset` handles `skip` globally across files.\n - JS round-robins the returned matches to `100` visible rows.\n3. **Multiple explicit paths/globs**\n - `resolveExplicitSearchPaths()` collapses them into a common base and either a brace-union glob, an explicit file list, or per-target searches when the only common base is the filesystem root.\n - Missing entries are skipped non-fatally unless all are missing.\n4. **Internal URL paths**\n - Supported only when the internal resource resolves to a real backing file.\n - No internal-URL globbing.\n - Immutable sources switch to the immutable display mode when formatting anchors.\n\n## Side Effects\n- Filesystem\n - Stats resolved search roots and input paths.\n - Reads matched files through native `grep()`.\n - Records sparse matched/context lines into the session file-read cache via `getFileReadCache(...).recordSparse(...)`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads session settings for context defaults.\n - Uses `session.internalRouter` to resolve internal URLs.\n - Populates tool `details.meta` with truncation/limit metadata.\n- Background work / cancellation\n - Wrapped in `untilAborted(signal, ...)` at the JS level.\n - `search.ts` does not pass `signal` or `timeoutMs` into native `grep()`, so native grep cancellation/timeouts are not used by this tool.\n\n## Limits & Caps\n- Visible page limit: `100` matches (`DEFAULT_MATCH_LIMIT` in `packages/coding-agent/src/tools/search.ts`).\n- Native preselection limit: `500` matches (`internalLimit = Math.min(DEFAULT_MATCH_LIMIT * 5, 2000)` in `packages/coding-agent/src/tools/search.ts`).\n- Line truncation: `1024` characters per emitted line (`DEFAULT_MAX_COLUMN` in `packages/coding-agent/src/session/streaming-output.ts`). Native grep marks truncated lines; JS reports `linesTruncated`.\n- Final text truncation: `truncateHead()` default byte cap `50 * 1024` bytes (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`). `search.ts` overrides `maxLines` to `Number.MAX_SAFE_INTEGER`, so normal search output is byte-capped, not line-capped.\n- Context defaults: `search.contextBefore = 1`, `search.contextAfter = 3` in `packages/coding-agent/src/config/settings-schema.ts`.\n- Pagination: `skip` is a global match offset. In single-base searches it is pushed into native `offset`; in exact-file/multi-target aggregation it is applied in JS with `matches.slice(skip)`.\n- Native directory-scan cache: available in `grep.rs`, but this tool always sets `cache: false`.\n\n## Errors\n- `Pattern must not be empty` when trimmed `pattern` is empty.\n- `Skip must be a non-negative number` for negative or non-finite `skip`.\n- `` `paths` must contain non-empty paths or globs `` when any normalized path is empty.\n- `Glob patterns are not supported for internal URLs: ...` for internal URL + glob metacharacters.\n- `Cannot search internal URL without a backing file: ...` when the router resolves a virtual resource without `sourcePath`.\n- `Path not found: ...` when the resolved base path is missing, or when every multi-path entry is missing.\n- Regex compile failures bubble from native `grep()` as tool errors. `search.ts` has a special catch for messages beginning with `regex parse error`, then otherwise rethrows.\n- Multi-file native scans skip per-file open/search failures inside `grep.rs`; the scan continues with surviving files.\n\n## Notes\n- The model-facing prompt documents standard regex syntax plus two search-specific rules: escape literal braces, and use `\\n` or a literal newline for cross-line matching.\n- Native `build_matcher()` already auto-escapes braces that cannot be valid quantifiers, so patterns like `${platform}` become searchable instead of failing. Valid quantifiers like `a{2,4}` remain unchanged.\n- Native compile retry also escapes unescaped literal parentheses only after an unopened/unclosed-group parse error. It is a fallback, not a general parser mode.\n- Internal URLs are resolved before path existence checks. After resolution, the native layer sees ordinary filesystem paths.\n- `hidden:true` is hard-coded in `search.ts`; there is no model-facing flag to exclude dotfiles.\n- `gitignore:false` only affects native directory traversal. It does not disable the tool's own path normalization or explicit-file handling.\n- When `paths` resolves to multiple exact files, `search.ts` does not apply the native `500` match cap and reports `totalMatches` internally as the post-skip length for that branch.\n- The anchor suffix in hashline mode comes from `computeLineHash()` in `packages/coding-agent/src/hashline/hash.ts`; `search` itself only formats it.\n",
|
|
90
92
|
"tools/search_tool_bm25.md": "# search_tool_bm25\n\n> Search the hidden tool-discovery index and activate the top matches for the current session.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/search-tool-bm25.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/search-tool-bm25.md`\n- Key collaborators:\n - `packages/coding-agent/src/tool-discovery/tool-index.ts` — discoverable-tool metadata and BM25 index/search.\n - `packages/coding-agent/src/session/agent-session.ts` — session discovery mode, corpus assembly, activation, cache invalidation.\n - `packages/coding-agent/src/sdk.ts` — initial hiding of discoverable built-ins and prompt-time discoverable summary.\n - `packages/coding-agent/src/tools/index.ts` — tool-session discovery hooks, essential/discoverable load modes, registry wiring.\n - `packages/coding-agent/src/config/settings-schema.ts` — `tools.discoveryMode` and legacy `mcp.discoveryMode` settings.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `query` | `string` | Yes | Natural-language or keyword query. Trimmed before search; empty-after-trim is rejected. |\n| `limit` | `integer` | No | Max matches to return and activate. Minimum `1`. Defaults to `8` (`DEFAULT_LIMIT`). |\n\n## Outputs\n- Single-shot `AgentToolResult`.\n- Model-visible `content` is one text part containing JSON with:\n\n```json\n{\"query\":\"...\",\"activated_tools\":[\"...\"],\"match_count\":2,\"total_tools\":17}\n```\n\n- Runtime-only `details` carries the ranked matches used by the TUI renderer:\n - `query`, `limit`, `total_tools`\n - `activated_tools`: tool names activated by this call\n - `active_selected_tools`: cumulative discovered-tool selections still active\n - `tools`: array of match objects with\n - `name`\n - `label`\n - `description` (`tool.summary`; this is the only snippet-like field)\n - optional `server_name`\n - optional `mcp_tool_name`\n - `schema_keys`\n - `score` rounded to 6 decimals\n- The renderer shows a status line plus up to 5 collapsed tree items by default (`COLLAPSED_MATCH_LIMIT`), each with label, optional server name, score to 3 decimals, and truncated description. The ranked match list is not serialized into `content`.\n\n## Flow\n1. `SearchToolBm25Tool.createIf()` in `packages/coding-agent/src/tools/search-tool-bm25.ts` exposes the tool only when `tools.discoveryMode !== \"off\"` and the session implements discovery hooks.\n2. `description` is rendered from `packages/coding-agent/src/prompts/tools/search-tool-bm25.md` via `renderSearchToolBm25Description()`, using the current discoverable-tool list plus per-server summary/count.\n3. `execute()` re-checks capability and settings:\n - missing discovery hooks -> `ToolError(\"Tool discovery is unavailable in this session.\")`\n - discovery disabled -> `ToolError(\"Tool discovery is disabled. Enable tools.discoveryMode or mcp.discoveryMode to use search_tool_bm25.\")`\n4. `query` is trimmed and validated; `limit` is defaulted/validated.\n5. `getDiscoverableToolSearchIndexForExecution()` fetches the cached generic search index from the session when available, otherwise rebuilds an index from the current discoverable-tool list.\n6. `getSelectedToolNames()` reads the current discovered selections so already-selected tools can be excluded from fresh results.\n7. `searchDiscoverableTools()` in `packages/coding-agent/src/tool-discovery/tool-index.ts` tokenizes the query, scores every document with BM25, sorts by descending score then `tool.name`, and returns up to `searchIndex.documents.length` results; `execute()` then filters already-selected names and slices to `limit`.\n8. If any matches remain, `activateTools()` activates all matched tool names through `session.activateDiscoveredTools()`.\n9. `details` is assembled from the activated names, current selected names, corpus size, and formatted matches; `content` is reduced to the compact JSON summary from `buildSearchToolBm25Content()`.\n10. `searchToolBm25Renderer` renders either:\n - the structured `details` view, or\n - a fallback text-only warning block if `details` is absent.\n\n## Modes / Variants\n- Discovery-mode gating:\n - `tools.discoveryMode = \"all\"`: searches hidden discoverable built-ins.\n- Search-index source:\n - generic cached discoverable index from the session\n - rebuilt ad hoc from the current discoverable-tool list if neither cache path works\n- Activation backend:\n - generic `activateDiscoveredTools()`\n - legacy `activateDiscoveredMCPTools()` fallback\n\n## Side Effects\n- Session state\n - Adds matched tools to the active session tool set through `activateDiscoveredTools()`.\n - Updates discovered-tool selection state so repeated searches accumulate selections instead of replacing them.\n - Invalidates the cached discoverable search index when newly activated built-ins change the hidden corpus (`packages/coding-agent/src/session/agent-session.ts`).\n - Tool availability changes before the next model call in the same turn; the prompt text says this explicitly.\n- User-visible prompts / interactive UI\n - The tool description includes discoverable server summaries and total discoverable-tool count.\n - The TUI renderer shows ranked matches, but the model-visible text summary does not.\n\n## Limits & Caps\n- Default result cap: `8` (`DEFAULT_LIMIT` in `packages/coding-agent/src/tools/search-tool-bm25.ts`).\n- `limit` must be a positive integer; no tool-level upper bound beyond corpus size.\n- Renderer collapsed list cap: `5` (`COLLAPSED_MATCH_LIMIT`).\n- Renderer truncation widths:\n - label: `72` chars (`MATCH_LABEL_LEN`)\n - description: `96` chars (`MATCH_DESCRIPTION_LEN`)\n- BM25 parameters in `packages/coding-agent/src/tool-discovery/tool-index.ts`:\n - `BM25_K1 = 1.2`\n - `BM25_B = 0.75`\n- Weighted corpus fields (`FIELD_WEIGHTS`):\n - `name`: `6`\n - `label`: `4`\n - `mcpToolName`: `4`\n - `serverName`: `2`\n - `summary`: `2`\n - each `schemaKey`: `1`\n- Summary fallback length for discoverable metadata: first `200` chars of `description` when no explicit summary exists (`getDiscoverableTool()` in `packages/coding-agent/src/tool-discovery/tool-index.ts`).\n\n## Errors\n- `execute()` throws `ToolError` for unavailable discovery hooks, disabled discovery mode, empty trimmed query, and non-positive/non-integer `limit`.\n- `searchDiscoverableTools()` throws `Error(\"Query must contain at least one letter or number.\")` if tokenization produces no alphanumeric tokens; `execute()` catches `Error` and rethrows `ToolError(error.message)`.\n- Empty corpus is not an error; search returns `[]`, activation is skipped, and the renderer message becomes either `No discoverable tools are currently loaded.` or `No matching tools found.`\n- `getDiscoverableToolsForDescription()` and `getDiscoverableToolSearchIndexForExecution()` swallow discovery-hook/cache errors and fall back to an empty corpus or rebuilt index.\n\n## Notes\n- The tool wire name stays `search_tool_bm25` for persisted-session back-compat, even though the source file is `search-tool-bm25.ts`.\n- Corpus composition is session-dependent and excludes already-active tools:\n - Built-in entries appear only in `\"all\"` mode and only for registry tools whose `loadMode === \"discoverable\"` and are not currently active.\n - Hidden/internal built-ins are intentionally excluded from the built-in corpus: `resolve`, `yield`, `report_finding`, `report_tool_issue` are called out in the `#collectDiscoverableBuiltinTools()` comment.\n- `AgentSession.getDiscoverableTools()` currently assembles built-in discoverable tools.\n- On startup, `packages/coding-agent/src/sdk.ts` hides non-essential discoverable built-ins in `tools.discoveryMode = \"all\"`; defaults are `read`, `bash`, and `edit` unless `tools.essentialOverride` changes them.\n- Query tokenization is simple and deterministic: camelCase is split, non-alphanumerics become spaces, tokens are lowercased, and only non-empty alphanumeric tokens survive.\n- Scores are rounded differently by surface: `details.tools[].score` keeps 6 decimals; the TUI line renders 3.\n",
|
|
91
93
|
"tools/ssh.md": "# ssh\n\n> Execute one remote command on a discovered SSH host.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ssh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ssh.md`\n- Key collaborators:\n - `packages/coding-agent/src/ssh/ssh-executor.ts` — runs `ssh`, captures output\n - `packages/coding-agent/src/ssh/connection-manager.ts` — master-connection reuse, host probing\n - `packages/coding-agent/src/ssh/sshfs-mount.ts` — optional `sshfs` mount side effect\n - `packages/coding-agent/src/discovery/ssh.ts` — discovers host configs\n - `packages/coding-agent/src/capability/ssh.ts` — canonical host shape\n - `packages/coding-agent/src/session/streaming-output.ts` — tail streaming, truncation, artifacts\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout clamp rules\n - `packages/utils/src/dirs.ts` — user/project ssh config paths\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `host` | `string` | Yes | Host name key from discovered SSH config entries, not an arbitrary hostname/IP. |\n| `command` | `string` | Yes | Remote command string passed to `ssh` as the remote command. |\n| `cwd` | `string` | No | Remote working directory. The tool prepends a shell-specific `cd`/`Set-Location` wrapper. |\n| `timeout` | `number` | No | Timeout in seconds. Default `60`; clamped to `1..3600`. |\n\n## Outputs\nThe tool returns a standard text tool result built in `packages/coding-agent/src/tools/ssh.ts`:\n\n- `content`: one text block containing combined remote stdout+stderr, or `\"(no output)\"` when empty.\n- `details.meta.truncation`: present when output exceeded the in-memory tail window; derived from the executor summary.\n\nStreaming behavior:\n\n- While the command runs, `onUpdate` receives tail-only text snapshots built from `TailBuffer` in `packages/coding-agent/src/session/streaming-output.ts`.\n- Final output is single-shot after process exit.\n\nSide-channel artifacts:\n\n- When session artifact allocation is available and output exceeds the spill threshold, full output is written to a session artifact file and the returned summary carries its `artifactId` internally.\n- The ssh tool itself does not print the `artifact://...` URI into the result text.\n\nFailure behavior:\n\n- Unknown host, missing host config, timeout, cancellation, SSH startup failure, key validation failure, or non-zero remote exit all surface as thrown `ToolError`s.\n- Non-zero remote exit includes captured output plus `Command exited with code N`.\n\n## Flow\n1. `loadSshTool()` in `packages/coding-agent/src/tools/ssh.ts` calls `loadCapability(sshCapability.id, { cwd: session.cwd })` to discover hosts.\n2. `packages/coding-agent/src/discovery/ssh.ts` loads host entries from, in this order: project managed ssh config, user managed ssh config, `ssh.json` in the repo root, `.ssh.json` in the repo root.\n3. `getSSHConfigPath(\"project\")` and `getSSHConfigPath(\"user\")` in `packages/utils/src/dirs.ts` resolve those managed files to `.gjc/ssh.json` in the project and `~/.gjc/agent/ssh.json` in the user config dir. This tool does not read `~/.ssh/config`.\n4. Capability loading deduplicates by host name with first item winning; provider order is priority-sorted and the SSH JSON provider registers at priority `5`.\n5. `loadHosts()` in `packages/coding-agent/src/tools/ssh.ts` builds `hostsByName` and drops later duplicates again with `if (!hostsByName.has(host.name))`.\n6. Tool description text is built from `packages/coding-agent/src/prompts/tools/ssh.md` plus an `Available hosts:` list. Each host entry calls `getHostInfoForHost()` to show detected shell/OS when cached; otherwise it renders `detecting...`.\n7. On execute, `SshTool.execute()` rejects any `host` not in the discovered host-name set.\n8. `ensureHostInfo()` in `packages/coding-agent/src/ssh/connection-manager.ts` ensures an SSH master connection exists, loads cached host info from disk if present, and probes remote OS/shell when cache is missing or stale.\n9. `buildRemoteCommand()` in `packages/coding-agent/src/tools/ssh.ts` prepends a cwd change when `cwd` is provided:\n - Unix-like or Windows compat shells: `cd -- '<cwd>' && <command>`\n - Windows PowerShell: `Set-Location -Path '<cwd>'; <command>`\n - Windows cmd: `cd /d \"<cwd>\" && <command>`\n10. `clampTimeout(\"ssh\", rawTimeout)` applies the `1..3600` second clamp from `packages/coding-agent/src/tools/tool-timeouts.ts`.\n11. `executeSSH()` in `packages/coding-agent/src/ssh/ssh-executor.ts` calls `ensureConnection(host)` again, opportunistically mounts the remote host root with `sshfs` if available, optionally wraps the command in `bash -c` or `sh -c` for Windows compat mode, then spawns `ssh` with `ptree.spawn`.\n12. Output from both stdout and stderr is piped into one `OutputSink`; chunks are sanitized and forwarded to streaming updates through `streamTailUpdates()`.\n13. On normal exit, the sink returns combined output plus truncation counters. On timeout or abort, `executeSSH()` returns `cancelled: true` and prefixes the output with a notice line such as `[SSH: ...]` or `[Command aborted: ...]`.\n14. `SshTool.execute()` converts `cancelled: true` into `ToolError`, converts non-zero exit codes into `ToolError`, otherwise returns the text result with truncation metadata.\n\n## Modes / Variants\n- **Tool unavailable**: `loadSshTool()` returns `null` when discovery finds no hosts, so the tool is not registered for that session.\n- **Unix-like target**: remote command is passed through directly, with optional `cd -- ... &&` prefix.\n- **Windows native shell**: cwd wrapper uses PowerShell `Set-Location` or cmd `cd /d`; command otherwise runs in the remote default Windows shell.\n- **Windows compat shell**: if host probing finds `bash` or `sh` on Windows, `executeSSH()` wraps the remote command as `bash -c '...'` or `sh -c '...'`. Host config can force compat on/off with `compat`.\n- **Cached vs probed host info**: shell/OS detection comes from in-memory cache, persisted JSON under the remote-host dir, or a fresh probe over SSH.\n- **Truncated vs untruncated output**: small output stays in memory; large output keeps only the last 50 KiB in memory and may spill full output to an artifact file.\n\n## Side Effects\n- Filesystem\n - Reads managed SSH config JSON plus legacy `ssh.json` / `.ssh.json`.\n - Validates private-key path existence and permissions before connecting.\n - Persists probed host info as JSON under the remote-host cache dir via `persistHostInfo()`.\n - May create the SSH control socket dir and, when `sshfs` exists, remote mount dirs.\n - May write full command output to a session artifact file.\n- Network\n - Opens SSH connections to the selected host.\n - May issue extra probe commands to detect OS/shell and compat shells.\n- Subprocesses / native bindings\n - Requires `ssh` on `PATH`; spawns it for connection checks, master startup, probing, and command execution.\n - May call `sshfs`, `mountpoint`, `fusermount`/`fusermount3`, or `umount`.\n - Sanitizes streamed text with `@gajae-code/natives` text sanitization.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Uses session artifact allocation when available.\n - Registers postmortem cleanup hooks for SSH master connections and sshfs mounts.\n - Tool concurrency is `exclusive`, so the agent scheduler should not run multiple ssh tool calls concurrently.\n- Background work / cancellation\n - Process spawn receives the tool `AbortSignal`.\n - Cancellation/timeout ends the running ssh process and returns a cancelled result that the tool turns into an error.\n\n## Limits & Caps\n- Timeout defaults/clamps: `default=60`, `min=1`, `max=3600` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- Output tail window: `DEFAULT_MAX_BYTES = 50 * 1024` in `packages/coding-agent/src/session/streaming-output.ts`.\n- Output sink spill threshold defaults to the same `50 KiB`; once exceeded, only the tail remains in memory.\n- SSH master reuse persistence: `ControlPersist=3600` in `packages/coding-agent/src/ssh/connection-manager.ts` and `packages/coding-agent/src/ssh/sshfs-mount.ts`.\n- SSH host info schema version: `HOST_INFO_VERSION = 2` in `packages/coding-agent/src/ssh/connection-manager.ts`; stale cache entries are reprobed.\n- Streaming tail buffer compacts after more than `10` pending chunks (`MAX_PENDING`) before trimming.\n\n## Errors\n- `Unknown SSH host: ... Available hosts: ...` when the model passes a host name not present in discovery.\n- `SSH host not loaded: ...` if the discovered-name set and `hostsByName` map diverge.\n- `ssh binary not found on PATH` when `ssh` is unavailable.\n- `SSH key not found: ...`, `SSH key is not a file: ...`, or `SSH key permissions must be 600 or stricter: ...` from key validation.\n- `Failed to start SSH master for <target>: <stderr>` when control-master startup fails.\n- Non-zero remote command exit becomes `ToolError` with captured output and `Command exited with code N`.\n- Timeout becomes a cancelled result with output notice `[SSH: <timeout message>]`, then `ToolError`.\n- Abort becomes a cancelled result with output notice `[Command aborted: <message>]`, then `ToolError`.\n- `sshfs` mount failures are logged and ignored in `executeSSH()`; they do not fail the tool call.\n- Discovery parse problems do not fail tool loading; they become capability warnings. If all sources are empty/invalid, the tool simply does not load.\n\n## Notes\n- Host discovery is JSON-based only. The tool does not parse OpenSSH config files.\n- Discovery expands environment variables recursively in the parsed JSON and expands `~` in `key`/`keyPath`.\n- Host names are capability keys; the model must pass the config key, not the raw hostname.\n- Commands run without a PTY. `executeSSH()` uses `ptree.spawn(..., { stdin: \"pipe\", stderr: \"full\" })` and does not request an interactive terminal.\n- The tool exposes `cwd` but no `env`, `pty`, upload, download, or explicit file-transfer fields.\n- Lower layers support an `artifactId` for full output and a `remotePath` mount target, but `SshTool.execute()` does not expose those knobs.\n- Both stdout and stderr are merged into one output stream; ordering is whatever arrives through the two streams.\n- `StrictHostKeyChecking=accept-new` and `BatchMode=yes` are always set for connection checks, master startup, and command runs.\n- Connection reuse is keyed by discovered host name, not by raw target tuple alone.\n- `closeAllConnections()` and sshfs unmount cleanup run through postmortem hooks, not per-call teardown.\n",
|
|
92
|
-
"tools/task.md": "# task\n\n> Launch subagents for parallel, optionally isolated work.\n\n## Source\n- Entry: `packages/coding-agent/src/task/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/task.md`\n- Key collaborators:\n - `packages/coding-agent/src/task/types.ts` — dynamic schema, progress/result types, output caps.\n - `packages/coding-agent/src/task/discovery.ts` — resolve bundled GJC agents.\n - `packages/coding-agent/src/task/agents.ts` — bundled agent definitions and frontmatter parsing.\n - `packages/coding-agent/src/task/executor.ts` — create child sessions, run subagents, collect output.\n - `packages/coding-agent/src/task/parallel.ts` — concurrency-limited scheduling and async semaphore.\n - `packages/coding-agent/src/task/isolation-backend.ts` — isolation backend resolution and platform fallback.\n - `packages/coding-agent/src/task/worktree.ts` — worktree / FUSE / ProjFS setup, patch capture, branch merge.\n - `packages/coding-agent/src/task/output-manager.ts` — session-scoped `agent://` id allocation.\n - `packages/coding-agent/src/task/simple-mode.ts` — `default` / `schema-free` / `independent` field gating.\n - `packages/coding-agent/src/internal-urls/agent-protocol.ts` — resolve `agent://<id>` to saved subagent output.\n - `packages/coding-agent/src/tools/index.ts` — tool registration and recursion-depth gating.\n - `packages/coding-agent/src/sdk.ts` — child-session router/tool wiring and per-subagent `AgentOutputManager`.\n - `docs/task-agent-discovery.md` — deeper discovery and precedence notes.\n - `docs/handoff-generation-pipeline.md` — session artifact/handoff persistence patterns used by the wider session layer.\n\n## Inputs\n\n### Default mode (`task.simple = \"default\"`)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `agent` | `string` | Yes | Exact agent name for every task item. Resolved at execution time through `discoverAgents(...)`. |\n| `tasks` | `Array<{ id: string; description: string; assignment: string }>` | Yes | Batch of small, self-contained task items. `id` max length 48 in schema; duplicate ids are rejected case-insensitively at runtime. |\n| `context` | `string` | No | Shared background prepended to every subagent system prompt. Trimmed before use. |\n| `schema` | `string` | No | JSON-encoded JTD schema. Overrides agent/session output schema when this mode allows task-level schemas. |\n| `isolated` | `boolean` | No | Only present when the tool is created with isolation enabled. Requests isolated execution for the whole batch. |\n\n`tasks[].description` is UI-only. `tasks[].assignment` is the actual per-task instruction.\n\n### Schema-free mode (`task.simple = \"schema-free\"`)\n\nSame as default, except `schema` is rejected by `validateTaskModeParams(...)` in `packages/coding-agent/src/task/index.ts`.\n\n### Independent mode (`task.simple = \"independent\"`)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `agent` | `string` | Yes | Exact agent name. |\n| `tasks` | `Array<{ id: string; description: string; assignment: string }>` | Yes | Same item shape, but each `assignment` must carry all required background because shared `context` is disabled. |\n| `isolated` | `boolean` | No | Same conditional field as above. |\n\nIn this mode both `context` and `schema` are rejected.\n\n## Outputs\nThe tool returns one text block plus `details: TaskToolDetails`.\n\n`details` fields:\n- `projectAgentsDir: string | null` — nearest discovered project `agents/` dir.\n- `results: SingleResult[]` — one entry per task in input order for synchronous execution; empty for async-launch responses.\n- `totalDurationMs: number`\n- `usage?: Usage` — sum of per-subagent assistant-message usage.\n- `outputPaths?: string[]` — written `.md` artifact paths for completed subagent outputs.\n- `progress?: AgentProgress[]` — live or final per-task progress snapshots.\n- `async?: { state: \"running\" | \"completed\" | \"failed\"; jobId: string; type: \"task\" }` — present for background execution updates/results.\n\n`SingleResult` includes:\n- identity: `index`, `id`, `agent`, `agentSource`, `description`, optional `assignment`\n- status: `exitCode`, optional `error`, optional `aborted`, optional `abortReason`\n- output: `output`, `stderr`, `truncated`, `durationMs`, `tokens`\n- artifact metadata: `outputPath?`, `patchPath?`, `branchName?`, `nestedPatches?`, `outputMeta?`\n- extracted tool data: `extractedToolData?` from registered subprocess tool handlers such as `yield` and `report_finding`\n\nArtifacts and side channels:\n- Every subagent with an artifacts dir writes `<id>.md`; `agent://<id>` resolves to that file.\n- If the output file is JSON, `agent://<id>/<path>` and `agent://<id>?q=<query>` perform JSON extraction in `packages/coding-agent/src/internal-urls/agent-protocol.ts`.\n- When the parent session persists artifacts, each subagent also gets `<id>.jsonl` session history.\n- Isolated patch mode writes `<id>.patch` per successful task before merge.\n- Async mode returns immediately after job registration, then emits `onUpdate(...)` progress snapshots and later hands completion to the session async-job pipeline.\n\n## Flow\n1. `TaskTool.create(...)` in `packages/coding-agent/src/task/index.ts` calls `discoverAgents(session.cwd)` once to build the dynamic prompt description from current agents and `task.simple` capabilities.\n2. `execute(...)` validates mode-gated fields with `validateTaskModeParams(...)`.\n3. It decides async vs sync:\n - sync when `async.enabled` is false\n - sync when the selected cached agent has `blocking === true`\n - sync when `tasks.length === 0`\n - otherwise async job scheduling\n4. Async path:\n - allocate unique output ids with `AgentOutputManager.allocateBatch(...)`\n - create one async job per task through `session.asyncJobManager.register(...)`\n - limit concurrent job bodies with `Semaphore(task.maxConcurrency)` from `packages/coding-agent/src/task/parallel.ts`\n - each job body calls `#executeSync(...)` with a one-task batch and the preallocated id\n - `onUpdate(...)` emits aggregate `progress` snapshots and `details.async`\n5. Sync path (`#executeSync(...)`) rediscovers agents from disk via `discoverAgents(...)`, so runtime resolution can differ from the earlier prompt description.\n6. It resolves the requested agent with `getAgent(...)`, rejects unknown or disabled agents, and enforces parent spawn policy plus `GJC_BLOCKED_AGENT` self-recursion prevention.\n7. It derives the effective output schema in priority order: task call `schema` (if allowed) → agent frontmatter `output` → inherited parent session schema.\n8. It validates task ids: missing ids and case-insensitive duplicates are immediate errors.\n9. If `isolated` was requested, it requires a git repo (`getRepoRoot(...)` / `captureBaseline(...)`) and resolves the actual backend through `resolveIsolationBackendForTaskExecution(...)`.\n10. It chooses an artifacts dir from the parent session when available, otherwise a temp dir, and writes `context.md` there when `session.getCompactContext?.()` returns content.\n11. It allocates unique ids again if the caller did not preallocate them, then builds `tasksWithUniqueIds`.\n12. For each task, it seeds an `AgentProgress` entry and runs `runTask(...)` through `mapWithConcurrencyLimit(...)` using `task.maxConcurrency`.\n13. Non-isolated `runTask(...)` calls `runSubprocess(...)` directly with parent cwd.\n14. Isolated `runTask(...)`:\n - creates an isolation workspace (`ensureWorktree(...)`, `ensureFuseOverlay(...)`, or `ensureProjfsOverlay(...)`)\n - applies the captured baseline for worktrees\n - runs `runSubprocess(...)` inside that workspace\n - on success, either commits to a per-task branch (`mergeMode === \"branch\"`) or captures a patch with `captureDeltaPatch(...)`\n - always cleans up the isolation workspace/backend\n15. `runSubprocess(...)` in `packages/coding-agent/src/task/executor.ts` creates a child agent session with:\n - isolated settings snapshot via `Settings.isolated(...)`, forcing `async.enabled = false` and `bash.autoBackground.enabled = false`\n - child `agentId` / `parentTaskPrefix` equal to the allocated task id\n - child internal URL router and `AgentOutputManager` from `packages/coding-agent/src/sdk.ts`\n - the shared `context`, optional `context.md` reference, optional isolation worktree path, output schema, and IRC peer roster in the system prompt template\n16. Child tool availability is derived from the agent definition plus runtime guards:\n - explicit `agent.tools` if provided\n - auto-add `task` when the agent has `spawns` and recursion depth allows it\n - remove `task` at or past `task.maxRecursionDepth`\n - expand `exec` to `eval` and `bash`\n - strip parent-owned `todo_write` after session creation\n17. `runSubprocess(...)` subscribes to child agent events, coalesces progress updates every 150 ms, forwards lifecycle/progress events on the parent event bus, and extracts tool data through `subprocessToolRegistry`.\n18. The child must finish through the hidden `yield` tool. If it does not, `runSubprocess(...)` sends up to 3 reminder prompts; the last reminder forces `toolChoice = yield` when supported.\n19. Finalization uses `finalizeSubprocessOutput(...)` to reconcile raw assistant text, `yield` payloads, structured schemas, `report_finding` data, and abort states. Output is truncated with `MAX_OUTPUT_BYTES` / `MAX_OUTPUT_LINES` before returning to the parent, but the full raw output is still written to `<id>.md`.\n20. After all sync tasks finish, `#executeSync(...)` aggregates usage, collects artifact paths, and if isolation was used merges results back:\n - branch mode: cherry-pick per-task branches with `mergeTaskBranches(...)`, then delete merged branches with `cleanupTaskBranches(...)`\n - patch mode: combine non-empty patch artifacts, dry-check with `git.patch.canApplyText(...)`, then apply or leave manual artifacts\n - nested repo patches are applied separately with `applyNestedPatches(...)`\n21. The final text summary is rendered from `packages/coding-agent/src/prompts/tools/task-summary.md` and includes `agent://<id>` handles for outputs that exist.\n\n## Modes / Variants\n- Execution mode\n - Sync inline execution — default path.\n - Async background execution — one async job per task item when `async.enabled` is on and the chosen agent is not marked `blocking`.\n- Simple mode\n - `default` — accepts shared `context` and per-call `schema`.\n - `schema-free` — accepts `context`, rejects `schema`.\n - `independent` — rejects `context` and `schema`; each assignment stands alone.\n- Isolation backend\n - `none` — no isolation.\n - `worktree` — detached git worktree plus baseline replay.\n - `fuse-overlay` — Unix FUSE overlay mount.\n - `fuse-projfs` — Windows ProjFS overlay.\n- Isolation merge strategy\n - Patch mode — capture/apply root patches, keep patch artifacts when application fails.\n - Branch mode — commit each task onto `gjc/task/<id>` branch, cherry-pick into parent, preserve failed branches for manual resolution.\n- Agent source\n - Bundled GJC agents — retained four-agent surface.\n - Bundled agents — appended last from `packages/coding-agent/src/task/agents.ts`.\n- Bundled agent types\n - `explore` — read-only scout with structured handoff output.\n - `plan` — architecture/planning agent; may spawn `explore`.\n - `designer` — UI/UX specialist.\n - `reviewer` — review agent with `report_finding` extraction.\n - `task` — general-purpose worker with full capabilities.\n - `quick_task` — low-reasoning mechanical worker using the same task prompt body.\n - `librarian` — source-grounded external API/library researcher.\n\n## Side Effects\n- Filesystem\n - Writes `context.md`, `<id>.jsonl`, and `<id>.md` under the session artifacts dir or a temp task dir.\n - In isolated patch mode writes `<id>.patch` artifacts.\n - Creates/removes worktrees or overlay mount directories.\n - In branch mode creates temporary worktrees and task branches.\n- Network\n - Child sessions may use whichever networked tools/models their active tool set permits.\n- Subprocesses / native bindings\n - `fuse-overlayfs` and `fusermount`/`fusermount3` for FUSE isolation.\n - ProjFS native bindings via `@gajae-code/natives` on Windows.\n - Git operations for baseline capture, patch apply, worktrees, branches, stash, cherry-pick, commits.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Creates child `AgentSession` instances with isolated settings snapshots.\n - Registers async jobs in `session.asyncJobManager` for background task mode.\n - Emits `task:subagent:event`, `task:subagent:progress`, and `task:subagent:lifecycle` on the parent event bus.\n - Allocates session-scoped output ids through `AgentOutputManager` so `agent://` remains unique across invocations and resumes.\n - Shares the parent `local://` root with subagents by passing `localProtocolOptions` through `createAgentSession(...)`.\n- User-visible prompts / interactive UI\n - Async mode streams aggregate progress updates.\n - Missing-`yield` recovery sends up to three internal reminder prompts to the child session.\n - Final summaries include `<system-notification>` blocks for isolation fallbacks or merge failures.\n- Background work / cancellation\n - Parent abort stops scheduling new work, aborts active child sessions, and marks unscheduled tasks as skipped.\n - Async jobs keep their own cancellation via `AsyncJobManager`.\n - Await timeouts are observation windows only: they do not stop, fail, or make a subagent stale. Inspect/list and continue work; cancel only when the subagent has actually failed, gone off-track, or become unrecoverably wrong.\n\n## Limits & Caps\n- Per-subagent output truncation: `MAX_OUTPUT_BYTES = 500_000` and `MAX_OUTPUT_LINES = 5000` in `packages/coding-agent/src/task/types.ts`. Full raw output is still written to `<id>.md` before truncation is returned to the caller.\n- Progress coalescing in child execution: `PROGRESS_COALESCE_MS = 150` in `packages/coding-agent/src/task/executor.ts`.\n- Recent output tail for progress: `RECENT_OUTPUT_TAIL_BYTES = 8 * 1024` and `recentOutput` keeps the last 8 non-empty lines in `packages/coding-agent/src/task/executor.ts`.\n- Missing-`yield` reminder retries: `MAX_YIELD_RETRIES = 3` in `packages/coding-agent/src/task/executor.ts`.\n- Task id schema cap: `tasks[].id` `maxLength: 48` in `packages/coding-agent/src/task/types.ts`.\n- Prompt text says ids should be `≤32` chars, but the runtime schema allows 48; this mismatch is real.\n- Async/full sync parallelism both use `task.maxConcurrency` from settings:\n - sync path: `mapWithConcurrencyLimit(...)`\n - async path: `Semaphore(...)` around job bodies\n- Recursion depth gate: `task.maxRecursionDepth` from settings; `packages/coding-agent/src/tools/index.ts` hides the `task` tool at or beyond the limit, and `runSubprocess(...)` also strips child `task` access at max depth.\n- Final inline summary preview per task uses `fullOutputThreshold = 5000` chars in `packages/coding-agent/src/task/index.ts`; longer outputs are summarized while `agent://<id>` points to the full artifact.\n\n## Errors\n- Most validation failures are returned as normal tool text with empty `results`, not thrown:\n - invalid simple-mode fields\n - unknown/disabled agent\n - missing tasks\n - missing/duplicate task ids\n - spawn-policy denial\n - requesting `isolated` while isolation mode is `none`\n- Isolated execution without a git repo returns `Isolated task execution requires a git repository. ...`.\n- Backend resolution can return a hard error (`ProjFS isolation initialization failed...`) or a non-fatal warning with fallback to `worktree`.\n- `mapWithConcurrencyLimit(...)` fails fast on non-abort worker exceptions; already completed results are preserved only in the thrown path’s local state, not surfaced unless the caller catches and converts them.\n- Child-session failures surface as `SingleResult.exitCode = 1` with `stderr`/`error` populated.\n- If the child omits `yield`, `finalizeSubprocessOutput(...)` injects warnings such as `SYSTEM WARNING: Subagent exited without calling yield tool after 3 reminders.`\n- Async scheduling failures are accumulated per task; if no jobs start, the tool returns `Failed to start background task jobs: ...`.\n- `agent://<id>` resolution errors are model-visible when another tool reads them: no session, no artifacts dir, missing id, conflicting extraction syntax, or invalid JSON for extraction.\n\n## Notes\n- Bundled agent definitions are embedded in `packages/coding-agent/src/task/agents.ts`.\n- `TaskTool.create(...)` caches discovered agents only for description rendering and the async blocking-agent decision. `#executeSync(...)` rediscovers agents each call.\n- Custom agent frontmatter can override bundled agents by name. Bundled definitions are embedded at build time in `packages/coding-agent/src/task/agents.ts`.\n- Child sessions do not inherit conversation history automatically. The only built-in carry-over is shared `context`, optional `context.md`, workspace tree/context files, and shared `local://` root.\n- `Settings.isolated(...)` gives each child a session-isolated settings snapshot; tool enablement is recomputed inside the child session rather than sharing mutable parent tool state.\n- Plan mode mutates an `effectiveAgent` with a read-only tool subset and plan-mode prompt text, but `runSubprocess(...)` is still invoked with `agent` rather than `effectiveAgent`. Model/thinking/schema overrides use the effective agent; prompt/tool/spawn restrictions do not fully flow through this call path.\n- Branch-mode merge temporarily stashes the parent repo before cherry-picking task branches. A stash-pop conflict is treated as merge failure and leaves recovery state behind.\n- Patch-mode only applies combined root patches if every successful task produced a patch and `git.patch.canApplyText(...)` succeeds.\n- Nested git repos are handled separately from the root repo. They are copied into isolated worktrees, diffed independently, and merged later with `applyNestedPatches(...)` because parent git cannot track their file-level changes.\n- `agent://` ids are numeric-prefixed (`0-Task`, `1-Task`, nested like `0-Parent.0-Child`) by `AgentOutputManager`; this is what prevents artifact collisions across repeated or nested task invocations.\n",
|
|
94
|
+
"tools/task.md": "# task\n\n> Launch subagents for parallel, optionally isolated work.\n\n## Source\n- Entry: `packages/coding-agent/src/task/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/task.md`\n- Key collaborators:\n - `packages/coding-agent/src/task/types.ts` — dynamic schema, progress/result types, output caps.\n - `packages/coding-agent/src/task/discovery.ts` — resolve bundled GJC agents.\n - `packages/coding-agent/src/task/agents.ts` — bundled agent definitions and frontmatter parsing.\n - `packages/coding-agent/src/task/executor.ts` — create child sessions, run subagents, collect output.\n - `packages/coding-agent/src/task/parallel.ts` — concurrency-limited scheduling and async semaphore.\n - `packages/coding-agent/src/task/isolation-backend.ts` — isolation backend resolution and platform fallback.\n - `packages/coding-agent/src/task/worktree.ts` — worktree / FUSE / ProjFS setup, patch capture, branch merge.\n - `packages/coding-agent/src/task/output-manager.ts` — session-scoped `agent://` id allocation.\n - `packages/coding-agent/src/task/simple-mode.ts` — `default` / `schema-free` / `independent` field gating.\n - `packages/coding-agent/src/internal-urls/agent-protocol.ts` — resolve `agent://<id>` to saved subagent output.\n - `packages/coding-agent/src/tools/index.ts` — tool registration and recursion-depth gating.\n - `packages/coding-agent/src/sdk.ts` — child-session router/tool wiring and per-subagent `AgentOutputManager`.\n - `docs/task-agent-discovery.md` — deeper discovery and precedence notes.\n - `docs/handoff-generation-pipeline.md` — session artifact/handoff persistence patterns used by the wider session layer.\n\n## Inputs\n\n### Default mode (`task.simple = \"default\"`)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `agent` | `string` | Yes | Exact agent name for every task item. Resolved at execution time through `discoverAgents(...)`. |\n| `tasks` | `Array<{ id: string; description: string; assignment: string }>` | Yes | Batch of small, self-contained task items. `id` max length 48 in schema; duplicate ids are rejected case-insensitively at runtime. |\n| `context` | `string` | No | Shared background prepended to every subagent system prompt. Trimmed before use. |\n| `schema` | `string` | No | JSON-encoded JTD schema. Overrides agent/session output schema when this mode allows task-level schemas. |\n| `isolated` | `boolean` | No | Only present when the tool is created with isolation enabled. Requests isolated execution for the whole batch. |\n\n`tasks[].description` is UI-only. `tasks[].assignment` is the actual per-task instruction.\n\n### Schema-free mode (`task.simple = \"schema-free\"`)\n\nSame as default, except `schema` is rejected by `validateTaskModeParams(...)` in `packages/coding-agent/src/task/index.ts`.\n\n### Independent mode (`task.simple = \"independent\"`)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `agent` | `string` | Yes | Exact agent name. |\n| `tasks` | `Array<{ id: string; description: string; assignment: string }>` | Yes | Same item shape, but each `assignment` must carry all required background because shared `context` is disabled. |\n| `isolated` | `boolean` | No | Same conditional field as above. |\n\nIn this mode both `context` and `schema` are rejected.\n\n## Outputs\nThe tool returns one text block plus `details: TaskToolDetails`.\n\n`details` fields:\n- `projectAgentsDir: string | null` — nearest discovered project `agents/` dir.\n- `results: SingleResult[]` — one entry per task in input order for synchronous execution; empty for async-launch responses.\n- `totalDurationMs: number`\n- `usage?: Usage` — sum of per-subagent assistant-message usage.\n- `outputPaths?: string[]` — written `.md` artifact paths for completed subagent outputs.\n- `progress?: AgentProgress[]` — live or final per-task progress snapshots.\n- `async?: { state: \"running\" | \"completed\" | \"failed\"; jobId: string; type: \"task\" }` — present for background execution updates/results.\n\n`SingleResult` includes:\n- identity: `index`, `id`, `agent`, `agentSource`, `description`, optional `assignment`\n- status: `exitCode`, optional `error`, optional `aborted`, optional `abortReason`\n- output: `output`, `stderr`, `truncated`, `durationMs`, `tokens`\n- artifact metadata: `outputPath?`, `patchPath?`, `branchName?`, `nestedPatches?`, `outputMeta?`\n- extracted tool data: `extractedToolData?` from registered subprocess tool handlers such as `yield` and `report_finding`\n\nArtifacts and side channels:\n- Every subagent with an artifacts dir writes `<id>.md`; `agent://<id>` resolves to that file.\n- If the output file is JSON, `agent://<id>/<path>` and `agent://<id>?q=<query>` perform JSON extraction in `packages/coding-agent/src/internal-urls/agent-protocol.ts`.\n- When the parent session persists artifacts, each subagent also gets `<id>.jsonl` session history.\n- Isolated patch mode writes `<id>.patch` per successful task before merge.\n- Async mode returns immediately after job registration, then emits `onUpdate(...)` progress snapshots and later hands completion to the session async-job pipeline.\n\n## Flow\n1. `TaskTool.create(...)` in `packages/coding-agent/src/task/index.ts` calls `discoverAgents(session.cwd)` once to build the dynamic prompt description from current agents and `task.simple` capabilities.\n2. `execute(...)` validates mode-gated fields with `validateTaskModeParams(...)`.\n3. It decides async vs sync:\n - sync when `async.enabled` is false\n - sync when the selected cached agent has `blocking === true`\n - sync when `tasks.length === 0`\n - otherwise async job scheduling\n4. Async path:\n - allocate unique output ids with `AgentOutputManager.allocateBatch(...)`\n - create one async job per task through `session.asyncJobManager.register(...)`\n - limit concurrent job bodies with `Semaphore(task.maxConcurrency)` from `packages/coding-agent/src/task/parallel.ts`\n - each job body calls `#executeSync(...)` with a one-task batch and the preallocated id\n - `onUpdate(...)` emits aggregate `progress` snapshots and `details.async`\n5. Sync path (`#executeSync(...)`) rediscovers agents from disk via `discoverAgents(...)`, so runtime resolution can differ from the earlier prompt description.\n6. It resolves the requested agent with `getAgent(...)`, rejects unknown or disabled agents, and enforces parent spawn policy plus `GJC_BLOCKED_AGENT` self-recursion prevention.\n7. It derives the effective output schema in priority order: task call `schema` (if allowed) → agent frontmatter `output` → inherited parent session schema.\n8. It validates task ids: missing ids and case-insensitive duplicates are immediate errors.\n9. If `isolated` was requested, it requires a git repo (`getRepoRoot(...)` / `captureBaseline(...)`) and resolves the actual backend through `resolveIsolationBackendForTaskExecution(...)`.\n10. It chooses an artifacts dir from the parent session when available, otherwise a temp dir, and writes `context.md` there when `session.getCompactContext?.()` returns content.\n11. It allocates unique ids again if the caller did not preallocate them, then builds `tasksWithUniqueIds`.\n12. For each task, it seeds an `AgentProgress` entry and runs `runTask(...)` through `mapWithConcurrencyLimit(...)` using `task.maxConcurrency`.\n13. Non-isolated `runTask(...)` calls `runSubprocess(...)` directly with parent cwd.\n14. Isolated `runTask(...)`:\n - creates an isolation workspace (`ensureWorktree(...)`, `ensureFuseOverlay(...)`, or `ensureProjfsOverlay(...)`)\n - applies the captured baseline for worktrees\n - runs `runSubprocess(...)` inside that workspace\n - on success, either commits to a per-task branch (`mergeMode === \"branch\"`) or captures a patch with `captureDeltaPatch(...)`\n - always cleans up the isolation workspace/backend\n15. `runSubprocess(...)` in `packages/coding-agent/src/task/executor.ts` creates a child agent session with:\n - isolated settings snapshot via `Settings.isolated(...)`, forcing `async.enabled = false` and `bash.autoBackground.enabled = false`\n - child `agentId` / `parentTaskPrefix` equal to the allocated task id\n - child internal URL router and `AgentOutputManager` from `packages/coding-agent/src/sdk.ts`\n - the shared `context`, optional `context.md` reference, optional isolation worktree path, output schema, and IRC peer roster in the system prompt template\n16. Child tool availability is derived from the agent definition plus runtime guards:\n - explicit `agent.tools` if provided\n - auto-add `task` when the agent has `spawns` and recursion depth allows it\n - remove `task` at or past `task.maxRecursionDepth`\n - expand `exec` to `eval` and `bash`\n - strip parent-owned `todo_write` after session creation\n17. `runSubprocess(...)` subscribes to child agent events, coalesces progress updates every 150 ms, forwards lifecycle/progress events on the parent event bus, and extracts tool data through `subprocessToolRegistry`.\n18. The child must finish through the hidden `yield` tool. If it does not, `runSubprocess(...)` sends up to 3 reminder prompts; the last reminder forces `toolChoice = yield` when supported.\n19. Finalization uses `finalizeSubprocessOutput(...)` to reconcile raw assistant text, `yield` payloads, structured schemas, `report_finding` data, and abort states. Output is truncated with `MAX_OUTPUT_BYTES` / `MAX_OUTPUT_LINES` before returning to the parent, but the full raw output is still written to `<id>.md`.\n20. After all sync tasks finish, `#executeSync(...)` aggregates usage, collects artifact paths, and if isolation was used merges results back:\n - branch mode: cherry-pick per-task branches with `mergeTaskBranches(...)`, then delete merged branches with `cleanupTaskBranches(...)`\n - patch mode: combine non-empty patch artifacts, dry-check with `git.patch.canApplyText(...)`, then apply or leave manual artifacts\n - nested repo patches are applied separately with `applyNestedPatches(...)`\n21. The final text summary is rendered from `packages/coding-agent/src/prompts/tools/task-summary.md` and includes `agent://<id>` handles for outputs that exist.\n\n## Modes / Variants\n- Execution mode\n - Sync inline execution — default path.\n - Async background execution — one async job per task item when `async.enabled` is on and the chosen agent is not marked `blocking`.\n- Simple mode\n - `default` — accepts shared `context` and per-call `schema`.\n - `schema-free` — accepts `context`, rejects `schema`.\n - `independent` — rejects `context` and `schema`; each assignment stands alone.\n- Isolation backend\n - `none` — no isolation.\n - `worktree` — detached git worktree plus baseline replay.\n - `fuse-overlay` — Unix FUSE overlay mount.\n - `fuse-projfs` — Windows ProjFS overlay.\n- Isolation merge strategy\n - Patch mode — capture/apply root patches, keep patch artifacts when application fails.\n - Branch mode — commit each task onto `gjc/task/<id>` branch, cherry-pick into parent, preserve failed branches for manual resolution.\n- Agent source\n - Project/user agents discovered from configured agent directories.\n - Bundled GJC role agents — the retained canonical four-agent surface from `packages/coding-agent/src/task/agents.ts`.\n- Bundled agent types\n - `executor` — bounded implementation, fixes, and refactors.\n - `architect` — read-only architecture and code-review assessment.\n - `planner` — read-only sequencing and acceptance criteria.\n - `critic` — read-only plan critique and actionability review.\n\n## Side Effects\n- Filesystem\n - Writes `context.md`, `<id>.jsonl`, and `<id>.md` under the session artifacts dir or a temp task dir.\n - In isolated patch mode writes `<id>.patch` artifacts.\n - Creates/removes worktrees or overlay mount directories.\n - In branch mode creates temporary worktrees and task branches.\n- Network\n - Child sessions may use whichever networked tools/models their active tool set permits.\n- Subprocesses / native bindings\n - `fuse-overlayfs` and `fusermount`/`fusermount3` for FUSE isolation.\n - ProjFS native bindings via `@gajae-code/natives` on Windows.\n - Git operations for baseline capture, patch apply, worktrees, branches, stash, cherry-pick, commits.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Creates child `AgentSession` instances with isolated settings snapshots.\n - Registers async jobs in `session.asyncJobManager` for background task mode.\n - Emits `task:subagent:event`, `task:subagent:progress`, and `task:subagent:lifecycle` on the parent event bus.\n - Allocates session-scoped output ids through `AgentOutputManager` so `agent://` remains unique across invocations and resumes.\n - Shares the parent `local://` root with subagents by passing `localProtocolOptions` through `createAgentSession(...)`.\n- User-visible prompts / interactive UI\n - Async mode streams aggregate progress updates.\n - Missing-`yield` recovery sends up to three internal reminder prompts to the child session.\n - Final summaries include `<system-notification>` blocks for isolation fallbacks or merge failures.\n- Background work / cancellation\n - Parent abort stops scheduling new work, aborts active child sessions, and marks unscheduled tasks as skipped.\n - Async jobs keep their own cancellation via `AsyncJobManager`.\n - Await timeouts are observation windows only: they do not stop, fail, or make a subagent stale. Inspect/list and continue work; cancel only when the subagent has actually failed, gone off-track, or become unrecoverably wrong.\n\n## Limits & Caps\n- Per-subagent output truncation: `MAX_OUTPUT_BYTES = 500_000` and `MAX_OUTPUT_LINES = 5000` in `packages/coding-agent/src/task/types.ts`. Full raw output is still written to `<id>.md` before truncation is returned to the caller.\n- Progress coalescing in child execution: `PROGRESS_COALESCE_MS = 150` in `packages/coding-agent/src/task/executor.ts`.\n- Recent output tail for progress: `RECENT_OUTPUT_TAIL_BYTES = 8 * 1024` and `recentOutput` keeps the last 8 non-empty lines in `packages/coding-agent/src/task/executor.ts`.\n- Missing-`yield` reminder retries: `MAX_YIELD_RETRIES = 3` in `packages/coding-agent/src/task/executor.ts`.\n- Task id schema cap: `tasks[].id` `maxLength: 48` in `packages/coding-agent/src/task/types.ts`.\n- Prompt text says ids should be `≤32` chars, but the runtime schema allows 48; this mismatch is real.\n- Async/full sync parallelism both use `task.maxConcurrency` from settings:\n - sync path: `mapWithConcurrencyLimit(...)`\n - async path: `Semaphore(...)` around job bodies\n- Recursion depth gate: `task.maxRecursionDepth` from settings; `packages/coding-agent/src/tools/index.ts` hides the `task` tool at or beyond the limit, and `runSubprocess(...)` also strips child `task` access at max depth.\n- Final inline summary preview per task uses `fullOutputThreshold = 5000` chars in `packages/coding-agent/src/task/index.ts`; longer outputs are summarized while `agent://<id>` points to the full artifact.\n\n## Errors\n- Most validation failures are returned as normal tool text with empty `results`, not thrown:\n - invalid simple-mode fields\n - unknown/disabled agent\n - missing tasks\n - missing/duplicate task ids\n - spawn-policy denial\n - requesting `isolated` while isolation mode is `none`\n- Isolated execution without a git repo returns `Isolated task execution requires a git repository. ...`.\n- Backend resolution can return a hard error (`ProjFS isolation initialization failed...`) or a non-fatal warning with fallback to `worktree`.\n- `mapWithConcurrencyLimit(...)` fails fast on non-abort worker exceptions; already completed results are preserved only in the thrown path’s local state, not surfaced unless the caller catches and converts them.\n- Child-session failures surface as `SingleResult.exitCode = 1` with `stderr`/`error` populated.\n- If the child omits `yield`, `finalizeSubprocessOutput(...)` injects warnings such as `SYSTEM WARNING: Subagent exited without calling yield tool after 3 reminders.`\n- Async scheduling failures are accumulated per task; if no jobs start, the tool returns `Failed to start background task jobs: ...`.\n- `agent://<id>` resolution errors are model-visible when another tool reads them: no session, no artifacts dir, missing id, conflicting extraction syntax, or invalid JSON for extraction.\n\n## Notes\n- Bundled agent definitions are embedded in `packages/coding-agent/src/task/agents.ts`.\n- `TaskTool.create(...)` caches discovered agents only for description rendering and the async blocking-agent decision. `#executeSync(...)` rediscovers agents each call.\n- Custom agent frontmatter can override bundled agents by name. Bundled definitions are embedded at build time in `packages/coding-agent/src/task/agents.ts`.\n- Child sessions do not inherit conversation history automatically. The only built-in carry-over is shared `context`, optional `context.md`, workspace tree/context files, and shared `local://` root.\n- `Settings.isolated(...)` gives each child a session-isolated settings snapshot; tool enablement is recomputed inside the child session rather than sharing mutable parent tool state.\n- Plan mode mutates an `effectiveAgent` with a read-only tool subset and plan-mode prompt text, but `runSubprocess(...)` is still invoked with `agent` rather than `effectiveAgent`. Model/thinking/schema overrides use the effective agent; prompt/tool/spawn restrictions do not fully flow through this call path.\n- Branch-mode merge temporarily stashes the parent repo before cherry-picking task branches. A stash-pop conflict is treated as merge failure and leaves recovery state behind.\n- Patch-mode only applies combined root patches if every successful task produced a patch and `git.patch.canApplyText(...)` succeeds.\n- Nested git repos are handled separately from the root repo. They are copied into isolated worktrees, diffed independently, and merged later with `applyNestedPatches(...)` because parent git cannot track their file-level changes.\n- `agent://` ids are numeric-prefixed (`0-Task`, `1-Task`, nested like `0-Parent.0-Child`) by `AgentOutputManager`; this is what prevents artifact collisions across repeated or nested task invocations.\n",
|
|
93
95
|
"tools/todo_write.md": "# todo_write\n\n> Applies ordered mutations to the session todo list and returns a text summary plus the full phase/task state.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/todo-write.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/todo-write.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/index.ts` — registers tool, exposes session hooks, gates availability.\n - `packages/coding-agent/src/modes/controllers/event-controller.ts` — updates the visible todo UI on tool completion.\n - `packages/coding-agent/src/session/agent-session.ts` — stores cached phases, auto-clears done/dropped tasks, emits failure reminders.\n - `packages/coding-agent/src/modes/controllers/todo-command-controller.ts` — `/todo` command path, custom-entry persistence, transcript reminder injection.\n - `packages/coding-agent/src/tools/render-utils.ts` — collapsed-preview cap for renderer trees.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `TodoOpEntry[]` | Yes | Ordered operations to apply. `minItems: 1`.\n\n### `TodoOpEntry`\n\n| Op | Required fields | Optional fields | Effect |\n| --- | --- | --- | --- |\n| `init` | `list` | None of the other fields are used | Replaces the entire list with `list`; every new task starts `pending` before normalization. |\n| `start` | `task` | None | Marks one task `in_progress`; any other `in_progress` task is demoted to `pending`. |\n| `done` | `task` or `phase` or neither | None | Marks the target task, phase, or all tasks `completed`. |\n| `drop` | `task` or `phase` or neither | None | Marks the target task, phase, or all tasks `abandoned`. |\n| `rm` | `task` or `phase` or neither | None | Removes the target task, clears the phase's task list, or clears all task lists. |\n| `append` | `phase`, `items` | None | Appends new `pending` tasks to a phase; creates the phase if missing. |\n| `note` | `task`, `text` | None | Appends one trimmed note string to the task's `notes` array. |\n\n### Fields used inside ops\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"init\" | \"start\" | \"done\" | \"rm\" | \"drop\" | \"append\" | \"note\"` | Yes | Operation discriminator. |\n| `list` | `{ phase: string; items: string[] }[]` | For `init` | Full replacement payload. Each `items` array has `minItems: 1`. |\n| `task` | `string` | For `start`; for task-targeted `done`/`drop`/`rm`/`note` | Exact task content match. |\n| `phase` | `string` | For `append`; for phase-targeted `done`/`drop`/`rm` | Exact phase name match, except `append` lazily creates a missing phase. |\n| `items` | `string[]` | For `append` | Tasks to append. `minItems: 1`. |\n| `text` | `string` | For `note` | Note text; trailing whitespace is stripped before storing. Empty-after-trim is rejected. |\n\n## Outputs\nThe tool returns a single-shot `AgentToolResult`:\n\n- `content`: one text part containing the summary from `formatSummary(...)`.\n - Empty final state with no errors: `Todo list cleared.`\n - Non-empty final state: remaining-item list, current phase progress, then a per-phase tree.\n - If the active `in_progress` task has notes, the summary includes the note bodies inline.\n - If any op produced validation/runtime errors, the summary starts with `Errors: ...` but still returns the mutated state.\n- `details`:\n - `phases: TodoPhase[]`\n - `storage: \"session\" | \"memory\"`\n\n`TodoPhase` / `TodoItem` state model:\n\n- `TodoPhase`: `{ name: string, tasks: TodoItem[] }`\n- `TodoItem`: `{ content: string, status: \"pending\" | \"in_progress\" | \"completed\" | \"abandoned\", notes?: string[] }`\n\nThe TUI renderer (`todoWriteToolRenderer`) merges call and result into one transcript block, renders phases as a tree, shows note counts as superscripts, and renders the note bodies only for the current `in_progress` task. Collapsed transcript previews cap tree items at `PREVIEW_LIMITS.COLLAPSED_ITEMS` (`8`).\n\n## Flow\n1. `TodoWriteTool.execute(...)` clones the current cached phases from `session.getTodoPhases?.() ?? []` (`packages/coding-agent/src/tools/todo-write.ts`).\n2. `applyParams(...)` walks `params.ops` in order and applies each entry with `applyEntry(...)`.\n3. Each op mutates the working phase array:\n - `initPhases(...)` rebuilds the list from scratch.\n - `start` resolves a task by exact `content`, demotes every other `in_progress` task to `pending`, then marks the target `in_progress`.\n - `done` / `drop` use `getTaskTargets(...)` to target one task, one phase, or every task.\n - `rm` removes one task, clears one phase's `tasks`, or clears all phases' task arrays.\n - `appendItems(...)` resolves or creates the target phase and pushes new `pending` tasks unless the same task content already exists anywhere.\n - `note` trims trailing whitespace, rejects empty text, and appends the note to `task.notes`.\n4. Missing task/phase references are recorded in an `errors` array by `resolveTaskOrError(...)` / `resolvePhaseOrError(...)`; execution continues through the rest of the batch.\n5. After the full batch, `normalizeInProgressTask(...)` enforces the single-active-task invariant:\n - if multiple tasks are `in_progress`, only the first stays active and the rest become `pending`;\n - if none are `in_progress`, the first `pending` task in phase/task order is auto-promoted to `in_progress`.\n6. `execute(...)` stores the normalized phases with `session.setTodoPhases?.(...)` and reports `storage` as `\"session\"` when `session.getSessionFile()` exists, else `\"memory\"`.\n7. The agent runtime also watches `todo_write` tool results in `packages/coding-agent/src/session/agent-session.ts`; successful results refresh cached todos, failed results inject a hidden next-turn reminder telling the model that todo progress is not visible until it retries.\n8. The event controller updates the visible todo UI from `result.details.phases` on success, or shows a warning on error (`packages/coding-agent/src/modes/controllers/event-controller.ts`).\n\n## Modes / Variants\n### State transitions\n\n| Current status | `start` | `done` | `drop` | `rm` | `append` | `note` |\n| --- | --- | --- | --- | --- | --- | --- |\n| `pending` | `in_progress` on target | `completed` | `abandoned` | Removed | New tasks enter as `pending` | No status change |\n| `in_progress` | Target stays `in_progress`; non-target active tasks become `pending` | `completed` | `abandoned` | Removed | No status change | No status change |\n| `completed` | Can be set back to `in_progress` if targeted | Stays `completed` | Becomes `abandoned` if targeted | Removed | No status change | No status change |\n| `abandoned` | Can be set back to `in_progress` if targeted | Becomes `completed` if targeted | Stays `abandoned` | Removed | No status change | No status change |\n\nNormalization then re-applies the single-active-task rule after the full op batch.\n\n### Op targeting rules\n- `done`, `drop`, `rm`:\n - `task` set: affect one exact-content task.\n - else `phase` set: affect every task in that exact-name phase.\n - else: affect every task in every phase.\n- `append` is the only op that creates a missing phase.\n- `note` only targets a single task.\n- `init` discards previous phases entirely.\n\n### Markdown round-trip helpers\nThe same file also exposes non-tool helpers used by `/todo`:\n- `phasesToMarkdown(...)` serializes phases as headings plus checklist items (`[ ]`, `[/]`, `[x]`, `[-]`) with blockquote note bodies.\n- `markdownToPhases(...)` parses that format, defaults orphan tasks into a `Todos` phase, accepts `>` as an `in_progress` marker and `~` as `abandoned`, and runs the same normalization step.\n\n## Side Effects\n- Filesystem\n - None in the tool itself.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Mutates the session todo cache through `setTodoPhases`.\n - `storage` reports whether the session has a backing session file, but the tool does not append a custom session entry itself.\n - Successful tool-result messages carry `details.phases`; `getLatestTodoPhasesFromEntries(...)` can reconstruct state later from those transcript entries.\n - Failed `todo_write` results cause `agent-session` to enqueue a hidden next-turn reminder (`customType: \"todo-write-error-reminder\"`).\n- User-visible prompts / interactive UI\n - Transcript block is rendered by `todoWriteToolRenderer` and merged with the call line.\n - `event-controller` updates the visible todo panel from successful results.\n - On error, `event-controller` shows `Todo update failed...`; the visible panel may stay stale until a later successful call.\n- Background work / cancellation\n - `AgentSession.setTodoPhases(...)` schedules auto-clear timers for `completed` / `abandoned` tasks via `tasks.todoClearDelay`.\n\n## Limits & Caps\n- `ops` array: `minItems: 1` (`todoWriteSchema`).\n- `init.list[*].items`: `minItems: 1`.\n- `append.items`: `minItems: 1`.\n- Renderer collapsed preview: `PREVIEW_LIMITS.COLLAPSED_ITEMS = 8` (`packages/coding-agent/src/tools/render-utils.ts`).\n- Auto-clear delay: `tasks.todoClearDelay` default `60` seconds; `< 0` disables auto-clear, `0` clears on the next microtask (`packages/coding-agent/src/session/agent-session.ts`).\n- Tool execution mode: `concurrency = \"exclusive\"`, `strict = true`, `loadMode = \"discoverable\"`.\n\n## Errors\n- The tool does not throw for ordinary bad op payloads; it accumulates human-readable strings in `errors` and still returns success with the mutated state.\n- Error strings come from the helpers in `packages/coding-agent/src/tools/todo-write.ts`, including:\n - `Missing list for init operation`\n - `Missing task content`\n - `Task \"...\" not found` with an extra empty-list hint when applicable\n - `Missing phase name`\n - `Phase \"...\" not found`\n - `Missing phase name for append operation`\n - `Missing items for append operation`\n - `Task \"...\" already exists`\n - `Missing text for note operation`\n- Because ops are processed in order, earlier errors do not roll back later ops.\n- Runtime-level tool failure is handled outside the tool body: `agent-session` injects a hidden reminder and the event controller warns the user that visible progress may be stale.\n- Idempotency is op-specific:\n - `init` is a full replacement; replaying the same payload yields the same state.\n - `start`, `done`, and `drop` are effectively idempotent on an existing target state, but `start` also demotes any other active task.\n - `rm` is not idempotent for targeted removals: the second call errors because the task or phase is gone.\n - `append` is not idempotent: duplicate task content is rejected with `Task \"...\" already exists`.\n - `note` is append-only and never idempotent; replaying it adds another note entry.\n\n## Notes\n- Task lookup is exact string equality inside the tool. The model-facing prompt says task content and phase names are identifiers and should stay unique; `append` enforces task uniqueness globally, but `init` does not validate duplicate task or phase names.\n- `findTaskByContent(...)` returns the first matching task across phases. Duplicate task contents make later targeted ops ambiguous.\n- `normalizeInProgressTask(...)` runs after the whole batch, not after each op. A single call can intentionally build an intermediate invalid state and rely on final normalization.\n- `storage: \"session\"` means the session has a session-file backing; it does not mean this tool wrote a durable custom entry.\n- Reload persistence differs by path:\n - plain `todo_write` calls survive in transcript tool-result details;\n - `/todo` command edits additionally append `customType: \"user_todo_edit\"` entries and inject a visible-to-model `<system-reminder>` developer message describing the manual edit.\n- On session resume, `AgentSession.#syncTodoPhasesFromBranch()` strips `completed` and `abandoned` tasks before restoring the cached list. The `/todo` command works around that by reading the latest transcript/custom-entry state so historical done/dropped tasks still appear to the user.\n- Tool availability is gated by `todo.enabled`, and the registry excludes it when `includeYield` is enabled (`packages/coding-agent/src/tools/index.ts`).\n- Subagents do not inherit `todo_write`; `packages/coding-agent/src/task/executor.ts` filters it out as a parent-owned tool.\n",
|
|
94
96
|
"tools/web_search.md": "# web_search\n\n> Run one web query through the first available search provider and return LLM-formatted answer, source URLs, and optional citations.\n\n## Source\n- Entry: `packages/coding-agent/src/web/search/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/web-search.md`\n- Key collaborators:\n - `packages/coding-agent/src/web/search/provider.ts` — lazy provider registry; availability chain.\n - `packages/coding-agent/src/web/search/types.ts` — unified `SearchResponse` / `SearchProviderError` types.\n - `packages/coding-agent/src/web/search/render.ts` — TUI renderer details type.\n - `packages/coding-agent/src/web/search/providers/base.ts` — provider interface and shared params contract.\n - `packages/coding-agent/src/web/search/providers/utils.ts` — credential lookup; source normalization.\n - `packages/coding-agent/src/web/search/providers/anthropic.ts` — Anthropic model web-search provider.\n - `packages/coding-agent/src/web/search/providers/brave.ts` — Brave Search API adapter.\n - `packages/coding-agent/src/web/search/providers/duckduckgo.ts` — keyless DuckDuckGo html/lite scrape adapter (permissionless default/fallback).\n - `packages/coding-agent/src/web/search/providers/openai-code.ts` — OpenAI code provider SSE adapter.\n - `packages/coding-agent/src/web/search/providers/exa.ts` — Exa API adapter.\n - `packages/coding-agent/src/web/search/providers/gemini.ts` — Gemini grounding SSE adapter.\n - `packages/coding-agent/src/web/search/providers/jina.ts` — Jina Reader search adapter.\n - `packages/coding-agent/src/web/search/providers/kagi.ts` — Kagi provider wrapper.\n - `packages/coding-agent/src/web/search/providers/kimi.ts` — Kimi search adapter.\n - `packages/coding-agent/src/web/search/providers/parallel.ts` — Parallel provider wrapper.\n - `packages/coding-agent/src/web/search/providers/perplexity.ts` — Perplexity API / OAuth adapter.\n - `packages/coding-agent/src/web/search/providers/searxng.ts` — self-hosted SearXNG adapter.\n - `packages/coding-agent/src/web/search/providers/synthetic.ts` — Synthetic search adapter.\n - `packages/coding-agent/src/web/search/providers/tavily.ts` — Tavily search adapter.\n - `packages/coding-agent/src/web/search/providers/zai.ts` — Z.AI remote search adapter.\n - `packages/coding-agent/src/web/parallel.ts` — Parallel search/extract HTTP client.\n - `packages/coding-agent/src/web/kagi.ts` — Kagi HTTP client.\n - `packages/coding-agent/src/tools/index.ts` — built-in tool registration and enable flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `query` | `string` | Yes | Search query. `executeSearch()` rewrites any `2020`-`2029` substring to the current year before dispatch. |\n| `recency` | `\"day\" \\| \"week\" \\| \"month\" \\| \"year\"` | No | Time filter. Only providers that implement it use it. Prompt text says Brave and Perplexity; code also maps it for Tavily and SearXNG. |\n| `limit` | `number` | No | Max results to return. Usually becomes the provider request's result-count parameter when `num_search_results` is absent. |\n| `max_tokens` | `number` | No | Passed through as `maxOutputTokens` / `max_tokens` only by Anthropic, Gemini, and Perplexity API-key mode. Ignored by the other providers. |\n| `temperature` | `number` | No | Passed through only by Anthropic, Gemini, and Perplexity API-key mode. Ignored by the other providers. |\n| `num_search_results` | `number` | No | Requested upstream search breadth. For most providers this is the same count used for returned sources. Perplexity is the only adapter that keeps it distinct from `limit`. |\n\n## Outputs\nThe tool returns a single text content block plus structured `details`.\n\n- `content`: `[{ type: \"text\", text: string }]`\n- `details`: `SearchRenderDetails` from `packages/coding-agent/src/web/search/render.ts`\n - `response: SearchResponse`\n - `error?: string`\n\n`text` is produced by `formatForLLM()` in `packages/coding-agent/src/web/search/index.ts`:\n\n- If `response.answer` exists, it is emitted first.\n- If sources exist, a `## Sources` section follows with a source count, then one entry per source:\n - `[n] <title> (<formatted age or published date>)`\n - ` <url>`\n - optional snippet line truncated to 240 chars.\n- If citations exist, a `## Citations` section follows with URL/title plus optional cited text truncated to 240 chars.\n- If related questions exist, a `## Related` bullet list follows.\n- If search queries exist, a `Search queries: <n>` section follows, capped to the first 3 queries and 120 chars each.\n\nFailure output is not thrown at the tool boundary when at least one provider was attempted. Instead the tool returns:\n\n- `content[0].text = \"Error: ...\"`\n- `details.response.provider = <last attempted provider> | \"none\"`\n- `details.error = ...`\n\nStreaming: none. `WebSearchTool.execute()` does not forward its `_signal` argument into `executeSearch()`, so provider cancellation is only available to internal callers that place `signal` inside `SearchQueryParams`.\n\n## Flow\n1. `WebSearchTool.execute()` in `packages/coding-agent/src/web/search/index.ts` delegates directly to `executeSearch()`.\n2. `executeSearch()` resolves the provider list via a single `resolveProviderChain(authStorage, params.provider ?? \"auto\", activeModelProvider)` call. The active model's provider is threaded in from `WebSearchTool` (`this.#session.model?.provider`, falling back to parsing `getActiveModelString()`) and from the CustomTool path (`ctx.model?.provider`).\n3. `resolveProviderChain()` is active-model-gated, not credential-scanning:\n - an explicitly preferred/selected provider that is `isAvailable()` becomes the primary;\n - otherwise the active model's own native search (`MODEL_PROVIDER_TO_SEARCH`) becomes the primary, but only when that provider's own credentials exist (`isAvailable()`);\n - keyed standalone providers are never auto-selected — explicit selection only.\n4. DuckDuckGo (keyless, `isAvailable()` always true) is always appended as the terminal fallback, so a missing primary — or a primary runtime failure — still returns results with zero configuration. There is no longer a \"No web search provider configured\" path.\n5. For each provider in order, `executeSearch()` calls `provider.search()` with:\n - `query` after year-rewrite,\n - `limit`, `recency`, `temperature`, `maxOutputTokens`, `numSearchResults`,\n - `systemPrompt` from `packages/coding-agent/src/prompts/tools/web-search.md`.\n6. On the first successful `SearchResponse`, `formatForLLM()` renders answer/sources/citations/related/search-queries into one text block and returns it with `details.response`.\n7. If a provider throws, `executeSearch()` records the error and tries the next provider. There is no provider-level parallel fan-out; fallback is sequential.\n8. After all candidates fail, `formatProviderError()` normalizes the last error:\n - Anthropic `404` becomes `Anthropic web search returned 404 (model or endpoint not found).`\n - `401`/`403` become `<Provider> authorization failed ...` except Z.AI, which preserves its raw message.\n - other `SearchProviderError`s surface `error.message`.\n9. If more than one provider was attempted, the final message is `All web search providers failed (<labels>). Last error: <message>`; otherwise it is just the normalized last error.\n\n## Modes / Variants\n- **Provider selection**\n - **Forced provider**: internal callers may pass `provider`; an unavailable forced provider falls back to the chain (which always ends in DuckDuckGo) instead of hard-failing (`packages/coding-agent/src/web/search/index.ts`). This field is not in the model-facing schema.\n - **Preferred provider**: `setPreferredSearchProvider()` sets a module-global default consumed by `resolveProviderChain()`. `packages/coding-agent/src/sdk.ts` and `packages/coding-agent/src/modes/controllers/selector-controller.ts` wire this from settings.\n - **Active-model-gated auto**: in `auto` mode, resolution maps the active model's provider to its own native search via `MODEL_PROVIDER_TO_SEARCH` (`openai|openai-codex→codex`, `anthropic→anthropic`, `google|google-gemini-cli|google-antigravity|gemini→gemini`, `moonshot|kimi-code|kimi→kimi`, `zai`, `perplexity`, `synthetic`), used only if that provider's creds exist; everything else falls to DuckDuckGo. `SEARCH_PROVIDER_ORDER` no longer drives auto credential scanning — it is retained for explicit selection, labels, and CLI option lists.\n- **Provider adapters**\n - **Tavily** — `packages/coding-agent/src/web/search/providers/tavily.ts`\n - Availability: API key from env or `agent.db` via `findCredential()`.\n - Querying: POST `https://api.tavily.com/search`.\n - `recency` maps to Tavily `time_range`; code explicitly keeps `topic` at default general scope instead of narrowing to news.\n - `limit` / `num_search_results`: adapter uses `params.numSearchResults ?? params.limit`, clamped to `5..20` with default `5`.\n - Output: `answer`, `sources`, `requestId`, `authMode: \"api_key\"`.\n - **Perplexity** — `packages/coding-agent/src/web/search/providers/perplexity.ts`\n - Availability: auth precedence is `PERPLEXITY_COOKIES` -> OAuth token in `agent.db` -> `PERPLEXITY_API_KEY` / `PPLX_API_KEY`.\n - OAuth/cookie mode: POSTs to `https://www.perplexity.ai/rest/sse/perplexity_ask`, consumes SSE, merges partial events, extracts answer and source URLs, sets `authMode: \"oauth\"`.\n - API-key mode: POSTs to `https://api.perplexity.ai/chat/completions` with `model: \"sonar-pro\"`, `search_mode: \"web\"`, `num_search_results`, optional `search_recency_filter`, `max_tokens`, `temperature`.\n - `num_search_results` controls upstream API breadth only in API-key mode. `limit` is preserved separately as `num_results` and slices returned `sources` after parsing in both auth modes.\n - Output may include `answer`, `sources`, `citations`, `usage`, `model`, `requestId`, `authMode`.\n - **Brave** — `packages/coding-agent/src/web/search/providers/brave.ts`\n - Availability: `BRAVE_API_KEY` only.\n - Querying: GET `https://api.search.brave.com/res/v1/web/search` with `count`, `extra_snippets=true`, and `freshness=pd|pw|pm|py` for `recency`.\n - `limit` / `num_search_results`: `params.numSearchResults ?? params.limit`, clamped to `1..20`, default `10`.\n - Output: `sources`, `requestId`.\n - **Jina** — `packages/coding-agent/src/web/search/providers/jina.ts`\n - Availability: `JINA_API_KEY` only.\n - Querying: GET-like fetch to `https://s.jina.ai/<encoded query>` with bearer auth.\n - Ignores `recency`, `max_tokens`, and `temperature`.\n - `limit` / `num_search_results`: adapter slices sources to `params.numSearchResults ?? params.limit` when provided; otherwise returns all payload items.\n - Output: `sources` only.\n - **Kimi** — `packages/coding-agent/src/web/search/providers/kimi.ts`\n - Availability: `MOONSHOT_SEARCH_API_KEY`, `KIMI_SEARCH_API_KEY`, `MOONSHOT_API_KEY`, or `agent.db` credentials for `moonshot` / `kimi-code`.\n - Querying: POST to `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` / default `https://api.kimi.com/coding/v1/search` with `text_query`, `limit`, `enable_page_crawling`, `timeout_seconds: 30`.\n - `limit` / `num_search_results`: `params.numSearchResults ?? params.limit`, clamped to `1..20`, default `10`.\n - Output: `sources`, `requestId`.\n - **Anthropic** — `packages/coding-agent/src/web/search/providers/anthropic.ts`\n - Availability: `findAnthropicAuth()` from `@gajae-code/ai`.\n - Querying: Anthropic model Messages API with web-search tool enabled.\n - `max_tokens` and `temperature` pass through.\n - `limit` and `num_search_results` are collapsed together before dispatch: `num_results = params.numSearchResults ?? params.limit`.\n - Output may include `answer`, `sources`, `citations`, `searchQueries`, `usage.searchRequests`, `model`, `requestId`.\n - **Gemini** — `packages/coding-agent/src/web/search/providers/gemini.ts`\n - Availability: OAuth credentials in `agent.db` for `google-gemini-cli` or `google-antigravity`.\n - Querying: SSE `streamGenerateContent` call with Google Search grounding enabled. Antigravity auth tries two fallback endpoints and retries `401/403/400 invalid auth` once after token refresh; `429/5xx` retry with exponential backoff and server-provided retry delay, capped by a `5 * 60 * 1000` ms rate-limit budget.\n - `max_tokens` and `temperature` pass through as `generationConfig.maxOutputTokens` / `generationConfig.temperature`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include `answer`, `sources`, `citations`, `searchQueries`, `usage`, `model`.\n - **OpenAI code** — `packages/coding-agent/src/web/search/providers/openai-code.ts`\n - Availability: non-expired OAuth credential for `openai-code` in `agent.db`.\n - Querying: SSE POST to `https://chatgpt.com/backend-api/openai-code/responses` with `tool_choice: { type: \"web_search\" }` and `search_context_size: \"high\"` by default.\n - Ignores `recency`, `max_tokens`, and `temperature` in this tool path.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include `answer`, `sources`, `usage`, `model`, `requestId`. If the streamed response has no `url_citation` annotations, the adapter falls back to scraping markdown links and bare URLs from the answer text.\n - **Z.AI** — `packages/coding-agent/src/web/search/providers/zai.ts`\n - Availability: env or `agent.db` credential for `zai`.\n - Querying: JSON-RPC `tools/call` against the Z.AI `web_search_prime` search endpoint.\n - Fallback chain inside the provider: tries `{query,count}`, then `{search_query,count}`, then `{search_query, search_engine:\"search-prime\", count}` when earlier attempts fail with argument-shape errors.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include parsed free-text `answer`, `sources`, `requestId`.\n - **Exa** — `packages/coding-agent/src/web/search/providers/exa.ts`\n - Availability: requires `EXA_API_KEY` and settings must not disable `exa.enabled` or `exa.enableSearch`.\n - Querying: POST `https://api.exa.ai/search` with `EXA_API_KEY`. No no-key fallback is used.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output: synthesized `answer` from up to 3 result summaries, `sources`, `requestId`.\n - **Parallel** — `packages/coding-agent/src/web/search/providers/parallel.ts`, `packages/coding-agent/src/web/parallel.ts`\n - Availability: env or `agent.db` credential for `parallel`.\n - Querying: POST `https://api.parallel.ai/v1beta/search` with `objective=query`, `search_queries=[query]`, `mode:\"fast\"`, `max_chars_per_result: 10000`, beta header `search-extract-2025-10-10`.\n - There is no provider fan-out here despite the name; the current adapter always sends a one-element `search_queries` array.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..40`, default `10`.\n - Output: `sources`, `requestId`.\n - **Kagi** — `packages/coding-agent/src/web/search/providers/kagi.ts`, `packages/coding-agent/src/web/kagi.ts`\n - Availability: env or `agent.db` credential for `kagi`.\n - Querying: GET `https://kagi.com/api/v0/search?q=<query>&limit=<n>` with `Authorization: Bot <key>`.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..40`, default `10`.\n - Output: `sources`, `relatedQuestions`, `requestId`.\n - **Synthetic** — `packages/coding-agent/src/web/search/providers/synthetic.ts`\n - Availability: env or `agent.db` credential for `synthetic`.\n - Querying: POST `https://api.synthetic.new/v2/search` with `{ query }`.\n - Ignores `recency`, `max_tokens`, and `temperature`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output: `sources` only.\n - **SearXNG** — `packages/coding-agent/src/web/search/providers/searxng.ts`\n - Availability: endpoint from `searxng.endpoint` setting or `SEARXNG_ENDPOINT` env.\n - Querying: GET `<endpoint>/search?format=json&q=...`; optional settings add `categories` and `language`.\n - Auth precedence: Basic auth (`searxng.basicUsername` / `searxng.basicPassword` or env equivalents) over bearer token (`searxng.token` / `SEARXNG_TOKEN`). Basic credentials are validated for RFC 7617 restrictions.\n - `recency` maps to `time_range`; `week` is downgraded to `month` because SearXNG does not support week.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..20`, default `10`.\n - Output: `sources`, `relatedQuestions` from `suggestions`.\n\n## Side Effects\n- Network\n - Calls one or more external search providers over HTTPS until one succeeds or all fail.\n - Provider-specific transports include JSON POST, JSON GET, SSE streaming (Perplexity OAuth/API, Gemini, OpenAI code), and JSON-RPC over HTTP (Z.AI).\n- Subprocesses / native bindings\n - None.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Uses a module-global provider-instance cache in `packages/coding-agent/src/web/search/provider.ts`.\n - Uses a module-global preferred-provider setting in the same file.\n - `packages/coding-agent/src/tools/index.ts` gates tool availability behind `session.settings.get(\"web_search.enabled\")`.\n- Background work / cancellation\n - Many provider adapters accept `AbortSignal`, but `WebSearchTool.execute()` does not pass its `_signal` into `executeSearch()`. Internal callers can still use cancellation by calling `runSearchQuery()` / `executeSearch()` with `signal` embedded in params.\n\n## Limits & Caps\n- Provider registry size: 15 providers (`SEARCH_PROVIDER_ORDER` in `packages/coding-agent/src/web/search/provider.ts`), including the keyless `duckduckgo` default/fallback. `SEARCH_PROVIDER_ORDER` no longer drives auto selection — see \"Active-model-gated auto\" above.\n- `formatForLLM()` truncates source snippets and citation text to 240 chars (`packages/coding-agent/src/web/search/index.ts`).\n- `formatForLLM()` emits at most 3 search queries, each truncated to 120 chars (`packages/coding-agent/src/web/search/index.ts`).\n- Brave result count: default `10`, max `20` (`DEFAULT_NUM_RESULTS`, `MAX_NUM_RESULTS` in `packages/coding-agent/src/web/search/providers/brave.ts`).\n- Tavily result count: default `5`, max `20` (`packages/coding-agent/src/web/search/providers/tavily.ts`).\n- Kimi result count: default `10`, max `20`; request timeout field fixed to `30` seconds (`packages/coding-agent/src/web/search/providers/kimi.ts`).\n- Parallel result count: default `10`, max `40`; per-result excerpt cap `10_000` chars (`packages/coding-agent/src/web/search/providers/parallel.ts`, `packages/coding-agent/src/web/parallel.ts`).\n- Kagi result count: default `10`, max `40` (`packages/coding-agent/src/web/search/providers/kagi.ts`).\n- SearXNG result count: default `10`, max `20` (`packages/coding-agent/src/web/search/providers/searxng.ts`).\n- Perplexity API-key mode defaults: `max_tokens = 8192`, `temperature = 0.2`, `num_search_results = 10` (`packages/coding-agent/src/web/search/providers/perplexity.ts`).\n- Anthropic defaults: model `anthropic-model-haiku-4-5`, `DEFAULT_MAX_TOKENS = 4096` when the provider omits `max_tokens` (`packages/coding-agent/src/web/search/providers/anthropic.ts`).\n- Gemini retries: up to `3` retries per endpoint, base delay `1000` ms, rate-limit delay budget `5 * 60 * 1000` ms (`packages/coding-agent/src/web/search/providers/gemini.ts`).\n\n## Errors\n- There is no \"no provider configured\" case: DuckDuckGo (keyless) is always appended as the terminal fallback, so the chain is never empty.\n- Tool-level all-failed case also returns a normal tool result with `Error: ...`; failures are summarized from the last attempted provider.\n- Provider adapters usually throw `SearchProviderError(provider, message, status)` for HTTP or protocol failures.\n- Availability probes intentionally swallow lookup errors and report `false` in many providers via `isApiKeyAvailable()`.\n- Per-provider notable failures:\n - Anthropic: missing credentials throw a plain `Error`; a `404` is remapped to a special final message by `formatProviderError()`.\n - Perplexity: missing auth throws a plain `Error`; OAuth stream `error_code` events become `SearchProviderError(\"perplexity\", ...)`.\n - Gemini: auth refresh, endpoint fallback, and retry logic are internal; final exhausted failures surface as `SearchProviderError(\"gemini\", ...)`.\n - OpenAI code and Gemini both fail if the HTTP response has no body after a `200`.\n - Z.AI treats malformed SSE/JSON-RPC payloads as provider errors and retries only argument-shape failures across request variants.\n - SearXNG `findAuth()` can throw configuration errors before any HTTP call if Basic auth fields are incomplete or invalid.\n\n## Notes\n- The model-facing schema does not expose `provider`, but internal callers can force one through `SearchQueryParams`.\n- `resolveProviderChain()` lazily imports provider modules and caches singleton instances. Just asking for labels via `getSearchProviderLabel()` does not trigger those imports.\n- Most providers treat `limit` and `num_search_results` as the same number because adapters pass `params.numSearchResults ?? params.limit`. Perplexity is the only implementation that preserves both concepts.\n- The prompt says `recency` is for Brave and Perplexity, but code also implements it for Tavily and SearXNG.\n- The year rewrite in `executeSearch()` is blunt: any `2020`-`2029` substring is replaced with the current year.\n- `packages/coding-agent/src/config/settings-schema.ts` exposes provider preferences for `auto`, `exa`, `brave`, `jina`, `kimi`, `perplexity`, `anthropic`, `zai`, `tavily`, `kagi`, `synthetic`, `parallel`, and `searxng`. Gemini and OpenAI code are in the registry and auto chain but not in that settings enum.\n- Exa availability fails closed unless `EXA_API_KEY` is present and Exa settings remain enabled.\n",
|
|
95
97
|
"tools/write.md": "# write\n\n> Create or overwrite a file, archive entry, or SQLite row.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/write.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/write.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/archive-reader.ts` — parse `archive.ext:entry` selectors.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite paths and perform row insert/update/delete.\n - `packages/coding-agent/src/lsp/index.ts` — format-on-write and diagnostics writethrough.\n - `packages/coding-agent/src/tools/auto-generated-guard.ts` — block overwriting generated files.\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts` — invalidate shared FS scan caches after writes.\n - `packages/coding-agent/src/tools/plan-mode-guard.ts` — resolve paths and enforce plan-mode write policy.\n\n## Inputs\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Target path. Plain file path writes a filesystem file. `archive.ext:inner/path` writes an archive entry for `.tar`, `.tar.gz`, `.tgz`, or `.zip`. `db.sqlite:table` inserts a row. `db.sqlite:table:key` updates or deletes a row. |\n| `content` | `string` | Yes | Full replacement file content, archive entry content, or SQLite row payload. SQLite non-delete writes must parse as a JSON5 object. Empty or whitespace-only content deletes a SQLite row when `path` includes a row key. |\n\nWorked examples:\n\n```text\npath: \"src/generated/config.json\"\ncontent: \"{\\n \\\"enabled\\\": true\\n}\\n\"\n```\n\n```text\npath: \"fixtures/archive.zip:templates/email.txt\"\ncontent: \"hello\\n\"\n```\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"{name: 'Ada', active: true}\"\n```\n\n## Outputs\nSingle-shot result.\n\n- Success always returns a text block.\n - Plain file write: `Successfully wrote <bytes> bytes to <relative-path>`.\n - Archive write: `Successfully wrote <bytes> bytes to <relative-archive-path>:<entry-path>`.\n - SQLite write: one of `Inserted row into <table>`, `Updated row '<key>' in <table>`, `No row updated ...`, `Deleted row ...`, `No row deleted ...`.\n- If hashline prefixes were copied from `read` output and stripped first, the first text block gets an extra note.\n- Plain file writes may also return `details.diagnostics` plus `details.meta.diagnostics` when LSP diagnostics-on-write is enabled.\n- SQLite writes use `toolResult(...).sourcePath(...)`, so `details.meta.sourcePath` points at the database file.\n- Archive writes return empty `details`.\n\n## Flow\n1. `WriteTool.execute()` in `packages/coding-agent/src/tools/write.ts` strips `LINE+ID|` hashline prefixes from `content` when the session is in hashline display mode.\n2. It calls `#resolveArchiveWritePath()` first. That uses `parseArchivePathCandidates()` from `packages/coding-agent/src/tools/archive-reader.ts`, checks candidate archive files on disk, and falls back to the longest matching archive suffix even when the archive file does not exist yet.\n3. Archive writes call `enforcePlanModeWrite(..., { op: exists ? \"update\" : \"create\" })`, then `#writeArchiveEntry()`.\n - The parent directory of the archive file is created with `fs.mkdir(..., { recursive: true })`.\n - `.zip` archives are read with `fflate.unzipSync()`, the target entry is replaced in an in-memory map, and the archive is rewritten with `fflate.zipSync()` + `Bun.write()`.\n - `.tar`, `.tar.gz`, and `.tgz` archives are read with `Bun.Archive`, existing entries are copied into an object map, the target entry is replaced, and `Bun.Archive.write()` rewrites the archive.\n - `invalidateFsScanAfterWrite()` runs on the archive file path.\n4. If the path is not treated as an archive, `execute()` calls `#resolveSqliteWritePath()`. That uses `parseSqlitePathCandidates()` and `isSqliteFile()` from `packages/coding-agent/src/tools/sqlite-reader.ts`. Existing non-SQLite files suppress the SQLite path interpretation.\n5. SQLite writes call `enforcePlanModeWrite(..., { op: \"update\" })`, then `#writeSqliteRow()`.\n - The database must already exist; missing DBs throw `SQLite database '<path>' not found`.\n - The tool opens `new Database(..., { create: false, strict: true })` and sets `PRAGMA busy_timeout = 3000`.\n - Whitespace-only `content` with a row key deletes a row.\n - Non-empty `content` is parsed with `Bun.JSON5.parse()`, must be a JSON object, and is routed to insert/update helpers from `packages/coding-agent/src/tools/sqlite-reader.ts`.\n - `invalidateFsScanAfterWrite()` runs on the DB path and the connection is closed in `finally`.\n6. Otherwise the tool treats `path` as a plain filesystem file.\n - `enforcePlanModeWrite(..., { op: \"create\" })` runs before path resolution.\n - Existing files are checked by `assertEditableFile()` to block overwriting detected generated files.\n - The session’s writethrough callback writes content. With LSP enabled and `lsp.formatOnWrite` / `lsp.diagnosticsOnWrite` settings on, `createLspWritethrough()` may format content, sync it through LSP servers, save it, and collect diagnostics. Otherwise `writethroughNoop()` writes directly with `Bun.write()` or `file.write()`.\n - `invalidateFsScanAfterWrite()` runs on the file path.\n7. The tool returns a text result and optional diagnostics metadata.\n\n## Modes / Variants\n### Plain file path\n- Target is any path that does not resolve as an archive selector and does not resolve as an existing-or-new SQLite selector.\n- Existing files are overwritten.\n- `write.ts` does not call `fs.mkdir()` on this path; parent-directory creation is only implemented in the archive branch.\n\nExample:\n\n```text\npath: \"tmp/output.txt\"\ncontent: \"hello\\n\"\n```\n\n### Archive entry write\n- Selector syntax: `archive.ext:inner/path`.\n- Supported archive suffixes come from `parseArchivePathCandidates()`: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- The inner path is normalized to `/`, strips empty and `.` segments, rejects `..`, and rejects directory targets ending in `/`.\n- Rewrites the whole archive file after replacing one entry.\n- Creates the parent directory for the archive file if needed.\n\nExample:\n\n```text\npath: \"build/assets.tar.gz:css/app.css\"\ncontent: \"body { color: black; }\\n\"\n```\n\n### SQLite table insert\n- Selector syntax: `db.sqlite:table`.\n- `content` must parse as a JSON5 object.\n- Empty object is allowed and becomes `INSERT INTO <table> DEFAULT VALUES`.\n- Query parameters are rejected for SQLite writes.\n\nExample:\n\n```text\npath: \"data/app.db:users\"\ncontent: \"{name: 'Ada', active: true}\"\n```\n\n### SQLite row update / delete\n- Selector syntax: `db.sqlite:table:key`.\n- Non-empty `content` updates the row.\n- Empty or whitespace-only `content` deletes the row.\n- Row lookup uses the single-column primary key if present; otherwise it falls back to `rowid`. Composite primary keys and `WITHOUT ROWID` tables are rejected for key-based writes.\n\nExample update:\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"{email: 'ada@example.com'}\"\n```\n\nExample delete:\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"\"\n```\n\n## Side Effects\n- Filesystem\n - Creates or overwrites plain files.\n - Rewrites entire archive files when writing an archive entry.\n - Creates parent directories for archive files only.\n - Mutates existing SQLite databases; never creates a new SQLite DB.\n- Subprocesses / native bindings\n - Uses Bun SQLite bindings via `bun:sqlite`.\n - Uses Bun archive APIs and lazily imports `fflate` for ZIP reads/writes.\n - May talk to configured LSP servers through `packages/coding-agent/src/lsp/index.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Invalidates shared filesystem scan cache entries through `invalidateFsScanAfterWrite()`.\n - Enforces plan-mode write restrictions before mutating the target.\n- Background work / cancellation\n - Marks the tool `nonAbortable = true` and `concurrency = \"exclusive\"` in `WriteTool`.\n - LSP writethrough can schedule deferred diagnostics fetches after a timeout, but plain `write.ts` only consumes the immediate return value.\n\n## Limits & Caps\n- `WriteTool` itself exposes no byte cap beyond storing `content` in memory and, for archives, rebuilding the archive in memory.\n- Generated-file detection reads at most `CHECK_BYTE_COUNT = 1024` bytes and `HEADER_LINE_LIMIT = 40` header lines from an existing file in `packages/coding-agent/src/tools/auto-generated-guard.ts`.\n- SQLite writes set `PRAGMA busy_timeout = 3000`.\n- LSP writethrough uses a `5_000` ms operation timeout in `runLspWritethrough()` and may schedule a deferred diagnostics fetch with `AbortSignal.timeout(25_000)` in `scheduleDeferredDiagnosticsFetch()`.\n\n## Errors\n- Invalid archive subpaths throw `ToolError` with messages such as:\n - `Archive write path must target a file inside the archive`\n - `Archive write path must target a file, not a directory`\n - `Archive path cannot contain '..'`\n- SQLite path parsing throws on unsupported forms:\n - `SQLite write paths do not support query parameters`\n - `SQLite write path must target a table`\n - `SQLite row writes require a non-empty row key`\n- Missing SQLite DBs surface as `SQLite database '<path>' not found`.\n- SQLite content errors are model-visible `ToolError`s, including invalid JSON5, non-object payloads, unknown columns, non-scalar values, empty update objects, composite primary keys, and `WITHOUT ROWID` tables.\n- Existing plain files may be rejected by `assertEditableFile()` when they look generated.\n- Archive read/write failures and unexpected SQLite exceptions are wrapped in `ToolError(error.message)`.\n- If no LSP server matches or LSP formatting/diagnostics times out, file writes still fall back to writing content; diagnostics may be omitted.\n\n## Notes\n- Archive path detection runs before SQLite detection. A path that matches an archive selector is never treated as SQLite.\n- SQLite detection declines when an existing file with a `.sqlite` / `.db` suffix is present but does not have SQLite magic bytes; then the path falls back to a plain file write.\n- ZIP entry content is encoded with `new TextEncoder().encode(content)` in `#writeArchiveEntry()`. Non-ZIP archive writes pass the string directly to `Bun.Archive.write()`.\n- The prompt forbids two common anti-patterns: using `write` for routine edits that should use `edit`, and creating `*.md` / `README` files unless explicitly requested. It also forbids emojis unless requested.\n- Plain file writes report byte count using `cleanContent.length`, which is UTF-16 code units in JS, not an on-disk byte measurement.\n- `stripWriteContent()` only removes hashline prefixes when the session’s file display mode has `hashLines` enabled; otherwise content is written unchanged.\n",
|