@gajae-code/coding-agent 0.6.0 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/dist/types/cli/update-cli.d.ts +3 -0
- package/dist/types/config/model-registry.d.ts +3 -0
- package/dist/types/config/models-config-schema.d.ts +5 -0
- package/dist/types/lsp/startup-events.d.ts +1 -0
- package/dist/types/skill-state/deep-interview-mutation-guard.d.ts +5 -0
- package/package.json +7 -7
- package/scripts/build-binary.ts +0 -7
- package/src/cli/update-cli.ts +53 -3
- package/src/config/models-config-schema.ts +1 -0
- package/src/defaults/gjc/skills/deep-interview/SKILL.md +3 -1
- package/src/defaults/gjc/skills/ralplan/SKILL.md +2 -0
- package/src/gjc-runtime/state-runtime.ts +22 -14
- package/src/gjc-runtime/state-writer.ts +21 -1
- package/src/internal-urls/docs-index.generated.ts +3 -4
- package/src/lsp/startup-events.ts +24 -0
- package/src/modes/interactive-mode.ts +5 -18
- package/src/session/agent-session.ts +28 -20
- package/src/skill-state/active-state.ts +53 -30
- package/src/skill-state/deep-interview-mutation-guard.ts +238 -30
- package/src/tools/ast-edit.ts +2 -2
- package/src/utils/edit-mode.ts +1 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// Auto-generated by scripts/generate-docs-index.ts - DO NOT EDIT
|
|
2
2
|
|
|
3
|
-
export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.md","REBRANDING_PLAN_260525.md","ai-schema-normalize.md","auth-broker-gateway.md","bash-tool-runtime.md","blob-artifact-architecture.md","bot-integration.md","brand-assets.md","bridge.md","codebase-overview.md","compaction.md","composer-codex-parity.md","computer-use/README.md","environment-variables.md","external-control-readiness.md","fs-scan-cache-architecture.md","
|
|
3
|
+
export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.md","REBRANDING_PLAN_260525.md","ai-schema-normalize.md","auth-broker-gateway.md","bash-tool-runtime.md","blob-artifact-architecture.md","bot-integration.md","brand-assets.md","bridge.md","codebase-overview.md","compaction.md","composer-codex-parity.md","computer-use/README.md","environment-variables.md","external-control-readiness.md","fs-scan-cache-architecture.md","geobench.md","gjc-dogfood-skill-template.md","grok-build-provider-design.md","handoff-generation-pipeline.md","hermes-mcp-bridge.md","hotspot-map-successor.md","keybindings.md","lsp-config.md","memory.md","models.md","multi-vendor-profiles.md","native-ffi-optimization-policy.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","onboarding-packet.md","onboarding-receipt.md","ooo-bridge-extension-contract.md","openclaw-hermes-rpc-integration.md","perf-profiling-corpus.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","python-repl.md","render-mermaid.md","resolve-tool-runtime.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","theme.md","tools/ask.md","tools/ast-edit.md","tools/ast-grep.md","tools/bash.md","tools/browser.md","tools/calc.md","tools/checkpoint.md","tools/computer.md","tools/cron.md","tools/debug.md","tools/edit.md","tools/eval.md","tools/find.md","tools/github.md","tools/inspect_image.md","tools/irc.md","tools/job.md","tools/lsp.md","tools/monitor.md","tools/read.md","tools/recipe.md","tools/render_mermaid.md","tools/resolve.md","tools/rewind.md","tools/search.md","tools/search_tool_bm25.md","tools/ssh.md","tools/task.md","tools/todo_write.md","tools/web_search.md","tools/write.md","tree.md","ttsr-injection-lifecycle.md","tui-runtime-internals.md"];
|
|
4
4
|
|
|
5
5
|
export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
6
6
|
"ERRATA-GPT5-HARMONY.md": "# ERRATA — GPT-5 Harmony-Header Leakage\n\n## 1. The problem\n\nOpenAI frames tool calls in the Harmony chat protocol:\n\n```\n<|start|>assistant<|channel|>commentary to=functions.<NAME><|message|>{ARGS}<|call|>\n```\n\n`<|channel|>commentary to=functions.NAME` is the **routing header** —\ncontrol tokens consumed by the runtime to dispatch the call. These\ntokens never appear as content under normal operation; the runtime\nstrips them.\n\nThe defect: gpt-5 models occasionally emit, **as ordinary content\ninside `{ARGS}`**, the **plain-text shadow** of these routing tokens —\nthe same characters without the `<|…|>` brackets — and continue\nproducing more pseudo-routing structure (channel name, body marker,\nmultilingual spam, fake tool-result framing). The contamination lives\ninside the visible tool argument and is dispatched to the tool as if it\nwere intended content.\n\n**Critical detail.** The actual `<|start|>` / `<|channel|>` /\n`<|message|>` / `<|call|>` special tokens almost never appear in tool\nargs. What leaks is the bracket-less spelling — `analysis to=functions.X\ncode …` — because OpenAI applies a logit mask suppressing the\ncontrol-token IDs inside the args region. The mass that would have gone\nto those special tokens redistributes onto the un-bracketed plain-text\nrepresentation the model also learned. This makes the leak structurally\ninvisible to the routing parser and lands it in the tool input verbatim.\n\nManifestation in tool args (real corpus example):\n\n```\n~ add_function(iso, ctx, ns, \"installSystemChangeObserver\",\n os_install_system_change_observer);】【\"】【analysis to=functions.edit\n code above เงินไทยฟรีuser to=functions.edit code …\n```\n\nThe leading code is real and intended. Everything after the first\nnon-Latin token through the next clean structural boundary is corruption.\n\n---\n\n## 2. Observed statistics & failure modes\n\nSource: `~/.gjc/stats.db` (`ss_tool_calls`, `ss_assistant_msgs`), through\n2026-05-10. 1.05M tool calls scanned.\n\n### 2.1 Rate\n\n| Model | Leaks in tool args | Calls | per million |\n|------------------|-------------------:|--------:|------------:|\n| gpt-5.4 | 37 | 226,957 | 163 |\n| gpt-5.3-openai-code | 17 | 112,243 | 151 |\n| gpt-5.5 | 2 | 80,750 | 25 |\n| gpt-5.2-openai-code | 0 | — | — |\n\nPlus 15 hits in assistant visible text / thinking blobs.\n\n### 2.2 Tool distribution\n\n| Tool | Hits |\n|---------------------|-----:|\n| `edit` | 38 |\n| `eval` | 11 |\n| `report_tool_issue` | 3 |\n| `grep`/`read`/`search`/`yield` | 1 each |\n\nConcentrated in tools with free-form (non-JSON-schema) argument formats.\n\n### 2.3 Leak shape (deterministic)\n\n```\nLEAK ::= JUNK_PREFIX MARKER CHANNEL_BODY (LEAK)?\nMARKER ::= \"to=functions.\" TOOL_NAME\nCHANNEL_BODY ::= \" code \" (SPAM | reasoning_prose | fake_tool_output)*\nJUNK_PREFIX ::= (GLITCH_TOKEN | CHANNEL_WORD | NON_LATIN_RUN | \"}\" | \"】【\")+\n```\n\n**Cascading is common.** Of 96 marker occurrences across 71 contaminated\nrecords, 39 contain ≥2 markers and 7 contain ≥3 — the model emits\nmultiple fake `to=functions.X code …` blocks back-to-back, often with\nfake `code_output\\nCell N:\\n…` framing between them. Once the\nplain-text scaffolding is in the residual stream, the prefix now *looks\nlike* a fresh tool envelope start, so the macro prior over continuations\nkeeps voting for more scaffolding. Self-amplifying.\n\n### 2.4 Glitch tokens\n\nSingle-token identifiers in `o200k_base` whose embeddings appear to be\nnear-init from underrepresentation in post-training. ASCII residue\nimmediately before the marker in the natural corpus:\n\n| Surface string | Single-token | Token ID | Hits in corpus |\n|-------------------|:-:|---------:|---:|\n| `Japgolly` | ✅ | 199,745 | 1 |\n| `Jsii` | ✅ | 114,318 | (subtoken of `Jsii_commentary`) |\n| `Jsii_commentary` | — (3 toks) | — | 2 |\n| `changedFiles` | — (2 toks) | — | 8 |\n| `RTLU` | — (2 toks) | — | 3 |\n\n`Japgolly` is in the last 0.13% of the vocabulary — the same family of\nGitHub-corpus residue that produced `SolidGoldMagikarp` in the 2023\nGPT-2 vocabulary (Rumbelow & Watkins). `SolidGoldMagikarp` itself\ntokenizes to 5 tokens in `o200k_base` — that specific token was retired,\nbut the class wasn't.\n\nFor the multi-token entries, the corpus-level signature is the surface\nstring; the underlying glitch trigger is a sub-token (e.g. `Jsii` inside\n`Jsii_commentary`). The detector list (`G` signal) keys on the surface\nstrings.\n\nStable across unrelated sessions. Treated as a high-precision detector\nsignal.\n\n### 2.5 Channel-word leakage\n\n`analysis` (5), `assistant` (5), `commentary` (3), `user` (1) appear\ndirectly preceding `to=`. Always bare words; never `<|channel|>analysis`\nor any other bracketed form. Consistent with §1 — the brackets are\nmasked, the words are not.\n\n### 2.6 Non-Latin spam residue\n\n96 marker hits, by script: CJK 40, Cyrillic 12, Telugu/Kannada/Malayalam\n18, Thai 8, Georgian 7, Armenian 7, Arabic 1. Recurring fragments are\nChinese gambling SEO (`大发时时彩`, `天天中彩票`), Georgian/Abkhaz junk,\nand Thai casino spam — well-known low-quality crawl residue.\n\nThis is the same script distribution observed in the controlled\nreproduction (§7.3), independent of the prompt's natural language.\n\n### 2.7 Failure-mode breakdown for the `edit` tool\n\nThe `edit` tool exists in two variants in the corpus:\n\n| Variant | Calls | Recovery |\n|--------------------------|------:|----------|\n| Patch-DSL (`§PATH`/anchor/`«»≔` ops) | 27 | **Recoverable** by op-truncation (§3.3) |\n| JSON-schema (`{path,edits:[…]}`) | 11 | **Not recoverable** — contamination is escaped *inside* JSON strings, parser accepts it cleanly, content would be written verbatim into source files |\n\nFor Patch-DSL leaks specifically:\n\n- 20/27 cases: contamination on the last input line; nothing follows.\n- 7/27 cases: contamination mid-input; what follows is one of: a\n duplicate replay of an earlier file/anchor, intended content for a\n *different* tool call (the model started its next call inline), or\n pure hallucination. Post-contamination content is never trustworthy.\n\n### 2.8 Mechanism (confirmed)\n\n**Prior collapse from null-embedding glitch tokens, into a\ncontrol-token-masked basin whose mass redistributes onto the\nplain-text shadow of the Harmony protocol.**\n\nStep by step:\n\n1. The model is mid-`{ARGS}` of a Harmony tool call. The runtime applies\n a logit mask suppressing structural control tokens (`<|channel|>`,\n `<|message|>`, `<|call|>`, `<|start|>`, `<|end|>`) inside the args\n region. Without this mask, normal generation would constantly\n hallucinate envelope-closes; with it, those token IDs have logit\n `-∞` in args.\n2. A glitch token `g` is sampled. By construction `g` was in the BPE\n merge corpus but barely in LM/RL training, so its **input embedding\n `e_g` ≈ near-init noise of small norm**.\n3. At position t+1, the residual update `h_{t+1} ≈ LN(h_t + e_g + Attn +\n MLP)` is dominated by the prefix-derived terms; the just-emitted-token\n signal is effectively absent. Generation diversity normally comes\n from `e_x` steering the residual into different sub-regions —\n stripped here.\n4. The next-token distribution therefore collapses onto the **conditional\n prior over continuations of the prefix, with local conditioning\n removed**. In a tool-calling rollout context, that prior is sharply\n peaked on Harmony scaffolding (control tokens + routing tokens) —\n that's what RL trained.\n5. The mask zeros the control-token IDs. Mass redistributes onto the\n **next-best continuation**: the un-bracketed surface-form spelling of\n the same protocol (`analysis`, `commentary`, ` to=functions.X`,\n ` code `). This spelling is unmasked because those characters are\n ordinary tokens.\n6. Once a few tokens of plain-text scaffolding land in the residual\n stream, the prefix now resembles a fresh envelope start. The macro\n prior keeps voting for more scaffolding. Cascading (§2.3) follows.\n7. Multilingual spam after the marker is the same prior-collapse\n continuation, drawn from the training neighborhood of the glitch\n token (often ESL/auto-generated multilingual web junk — exactly the\n crawl residue in §2.6).\n\n**Two corollaries the corpus data demanded but only the experiment\nexplained:**\n\n- **The brackets never appear** (§1, §2.5). The mask is what makes the\n leak land in plain text instead of as a real envelope-close.\n- **Counterintuitive grammar dependency** (§7.4). The leak is *worse* in\n formats closest to OpenAI's training distribution. Off-distribution\n custom grammars dampen the macro-prior basin; the official\n `*** Begin Patch` format is the strongest collapse target.\n\nThe 2023 SolidGoldMagikarp paper documented mechanism (1)+(2)+(4). The\nnew piece is (5): when constrained decoding masks the natural collapse\ntarget, the mass laundered through the un-masked plain-text shadow\nbecomes a structurally-invisible exfiltration channel.",
|
|
@@ -19,7 +19,6 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
19
19
|
"environment-variables.md": "# Environment Variables (Current Runtime Reference)\n\nThis reference is derived from current code paths in:\n\n- `packages/coding-agent/src/**`\n- `packages/ai/src/**` (provider/auth resolution used by coding-agent)\n- `packages/utils/src/**` and `packages/tui/src/**` where those vars directly affect coding-agent runtime\n\nIt documents only active behavior.\n\n## Resolution model and precedence\n\nMost runtime lookups use `$env` from `@gajae-code/utils` (`packages/utils/src/env.ts`).\n\n`$env` loading order:\n\n1. Existing process environment (`Bun.env`)\n2. Project `.env` (`$PWD/.env`) for keys not already set\n3. Agent `.env` (`~/.gjc/agent/.env`, respecting `GJC_CONFIG_DIR` / `GJC_CODING_AGENT_DIR`) for keys not already set\n4. Config-root `.env` (`~/.gjc/.env`, respecting `GJC_CONFIG_DIR`) for keys not already set\n5. Home `.env` (`~/.env`) for keys not already set\n\nAdditional rule inside each `.env` file: `GJC_*` keys are mirrored to `GJC_*` keys in that parsed file.\n\n---\n\n## 1) Model/provider authentication\n\nThese are consumed via `getEnvApiKey()` (`packages/ai/src/stream.ts`) unless noted otherwise.\n\n### Core provider credentials\n\n| Variable | Used for | Required when | Notes / precedence |\n| ------------------------------- | ------------------------------------------------ | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_OAUTH_TOKEN` | Anthropic API auth | Using Anthropic with OAuth token auth | Takes precedence over `ANTHROPIC_API_KEY` for provider auth resolution |\n| `ANTHROPIC_API_KEY` | Anthropic API auth | Using Anthropic without OAuth token | Fallback after `ANTHROPIC_OAUTH_TOKEN` |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Anthropic via Azure Foundry / enterprise gateway | `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` enabled | Takes precedence over `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY` when Foundry mode is enabled |\n| `OPENAI_API_KEY` | OpenAI auth | Using OpenAI-family providers without explicit apiKey argument | Used by OpenAI Completions/Responses providers |\n| `GEMINI_API_KEY` | Google Gemini auth | Using `google` provider models | Primary key for Gemini provider mapping |\n| `GOOGLE_API_KEY` | Gemini image tool auth fallback | Using `gemini_image` tool without `GEMINI_API_KEY` | Used by coding-agent image tool fallback path |\n| `GROQ_API_KEY` | Groq auth | Using Groq models | |\n| `CEREBRAS_API_KEY` | Cerebras auth | Using Cerebras models | |\n| `FIREWORKS_API_KEY` | Fireworks auth | Using Fireworks models | |\n| `TOGETHER_API_KEY` | Together auth | Using `together` provider | |\n| `HUGGINGFACE_HUB_TOKEN` | Hugging Face auth | Using `huggingface` provider | Primary Hugging Face token env var |\n| `HF_TOKEN` | Hugging Face auth | Using `huggingface` provider | Fallback when `HUGGINGFACE_HUB_TOKEN` is unset |\n| `SYNTHETIC_API_KEY` | Synthetic auth | Using Synthetic models | |\n| `NVIDIA_API_KEY` | NVIDIA auth | Using `nvidia` provider | |\n| `NANO_GPT_API_KEY` | NanoGPT auth | Using `nanogpt` provider | |\n| `VENICE_API_KEY` | Venice auth | Using `venice` provider | |\n| `LITELLM_API_KEY` | LiteLLM auth | Using `litellm` provider | OpenAI-compatible LiteLLM proxy key |\n| `LM_STUDIO_API_KEY` | LM Studio auth (optional) | Using `lm-studio` provider with authenticated hosts | Local LM Studio usually runs without auth; any non-empty token works when a key is required |\n| `OLLAMA_API_KEY` | Ollama auth (optional) | Using `ollama` provider with authenticated hosts | Local Ollama usually runs without auth; any non-empty token works when a key is required |\n| `LLAMA_CPP_API_KEY` | llama.cpp auth (optional) | Using `llama.cpp` provider with authenticated hosts | Local llama.cpp usually runs without auth; any non-empty token works when a key is configured |\n| `XIAOMI_API_KEY` | Xiaomi MiMo auth | Using `xiaomi` provider | |\n| `MOONSHOT_API_KEY` | Moonshot auth | Using `moonshot` provider | |\n| `XAI_API_KEY` | xAI auth | Using xAI models | |\n| `OPENROUTER_API_KEY` | OpenRouter auth | Using OpenRouter models | Also used by image tool when preferred/auto provider is OpenRouter |\n| `MISTRAL_API_KEY` | Mistral auth | Using Mistral models | |\n| `ZAI_API_KEY` | z.ai auth | Using z.ai models | Also used by z.ai web search provider |\n| `MINIMAX_API_KEY` | MiniMax auth | Using `minimax` provider | |\n| `AZURE_OPENAI_API_KEY` | Azure OpenAI auth | Using `azure-openai` / `azure-openai-responses` models | Pair with `AZURE_OPENAI_BASE_URL` or `AZURE_OPENAI_RESOURCE_NAME` |\n| `MINIMAX_CODE_API_KEY` | MiniMax Code auth | Using `minimax-code` provider | |\n| `MINIMAX_CODE_CN_API_KEY` | MiniMax Code CN auth | Using `minimax-code-cn` provider | |\n| `OPENCODE_API_KEY` | OpenCode auth | Using `opencode-go` / `opencode-zen` models | |\n| `QIANFAN_API_KEY` | Qianfan auth | Using `qianfan` provider | |\n| `QWEN_OAUTH_TOKEN` | Qwen Portal auth | Using `qwen-portal` with OAuth token | Takes precedence over `QWEN_PORTAL_API_KEY` |\n| `QWEN_PORTAL_API_KEY` | Qwen Portal auth | Using `qwen-portal` with API key | Fallback after `QWEN_OAUTH_TOKEN` |\n| `ZENMUX_API_KEY` | ZenMux auth | Using `zenmux` provider | Used for ZenMux OpenAI and Anthropic-compatible routes |\n| `VLLM_API_KEY` | vLLM auth/discovery opt-in | Using `vllm` provider (local OpenAI-compatible servers) | Any non-empty value works for no-auth local servers |\n| `CURSOR_ACCESS_TOKEN` | Cursor provider auth | Using Cursor provider | |\n| `AI_GATEWAY_API_KEY` | Vercel AI Gateway auth | Using `vercel-ai-gateway` provider | |\n| `CLOUDFLARE_AI_GATEWAY_API_KEY` | Cloudflare AI Gateway auth | Using `cloudflare-ai-gateway` provider | Base URL must be configured as `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic` |\n| `ALIBABA_CODING_PLAN_API_KEY` | Alibaba Coding Plan auth | Using `alibaba-coding-plan` provider | |\n| `DEEPSEEK_API_KEY` | DeepSeek auth | Using DeepSeek models | |\n| `KILO_API_KEY` | Kilo auth | Using Kilo models | |\n| `OLLAMA_CLOUD_API_KEY` | Ollama Cloud auth | Using `ollama-cloud` provider | |\n| `GITLAB_TOKEN` | GitLab Duo auth | Using `gitlab-duo` provider | |\n\n### GitHub/Copilot token chains\n\n| Variable | Used for | Chain |\n| ---------------------- | ------------------------------------------------ | ---------------------------------------------------- |\n| `COPILOT_GITHUB_TOKEN` | GitHub Copilot provider auth | `COPILOT_GITHUB_TOKEN` → `GH_TOKEN` → `GITHUB_TOKEN` |\n| `GH_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: `GITHUB_TOKEN` → `GH_TOKEN` |\n| `GITHUB_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: checked before `GH_TOKEN` |\n\n### Auth broker / auth gateway (remote credential vault)\n\nWhen the broker is enabled, the local SQLite credential store is bypassed and all OAuth refresh / access tokens live on the broker host. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full protocol, CLI surface, and 5-min/15-s usage cache layering.\n\n| Variable | Used for | Required when | Notes / precedence |\n| ----------------------- | ------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `GJC_AUTH_BROKER_URL` | Base URL of the remote auth-broker (e.g. `https://broker.tailnet:8765`); selects broker mode | Resolving credentials through a broker; also required by `gjc auth-gateway serve` (the gateway is itself a broker client) | Wins over `auth.broker.url` in `config.yml`. When set with no resolvable token, `resolveAuthBrokerConfig()` hard-errors instead of falling back to local SQLite. |\n| `GJC_AUTH_BROKER_TOKEN` | Bearer token sent on every broker endpoint except `/v1/healthz` | `GJC_AUTH_BROKER_URL` is set and no token is available from `auth.broker.token` or `<config-dir>/auth-broker.token` | Resolution: this env → `auth.broker.token` (`$ENV_NAME` indirection supported) → `<config-dir>/auth-broker.token` (mode `0600`). `<config-dir>` is `~/.gjc/` (respecting `GJC_CONFIG_DIR`). |\n\nThe gateway has no dedicated env vars — it inherits `GJC_AUTH_BROKER_*`. Its own inbound bearer token lives at `<config-dir>/auth-gateway.token` and is managed via `gjc auth-gateway token`.\n\n### Multi-account credential ranking\n\nWhen more than one OAuth credential is stored for the same provider (e.g. several Anthropic accounts), `AuthStorage` ranks them at session start to pick which one serves the session. This env var selects the ranking strategy; it is fully opt-in and does not change the default.\n\n| Variable | Used for | Required when | Notes / precedence |\n| ----------------------------- | ------------------------------------------------- | -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `GJC_CREDENTIAL_RANKING_MODE` | Multi-account OAuth credential selection strategy | Never (opt-in) | `balanced` (default) prefers the least-drained account (spreads load, keeps burst headroom). `earliest-reset` prefers the soonest-to-reset non-blocked account (earliest-expiry-first) so perishable tumbling-window quota (e.g. Claude 5h/7d) is drained before reset. Unset/unknown → `balanced`. Only affects session-start ranking; blocked/exhausted accounts still sort last. |\n\n---\n\n## 2) Provider-specific runtime configuration\n\n### Anthropic Foundry Gateway (Azure / enterprise proxy)\n\nWhen `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` is enabled, Anthropic requests switch to Foundry mode:\n\n- Base URL resolves from `FOUNDRY_BASE_URL` (fallback remains model/default base URL if unset).\n- API key resolution for provider `anthropic` becomes:\n `ANTHROPIC_FOUNDRY_API_KEY` → `ANTHROPIC_OAUTH_TOKEN` → `ANTHROPIC_API_KEY`.\n- `ANTHROPIC_CUSTOM_HEADERS` is parsed as comma/newline-separated `key: value` pairs and merged into request headers.\n- TLS client/server material can be injected from env values:\n `NODE_EXTRA_CA_CERTS`, `ANTHROPIC_MODEL_CODE_CLIENT_CERT`, `ANTHROPIC_MODEL_CODE_CLIENT_KEY`.\n Each accepts either:\n - a filesystem path to PEM content, or\n - inline PEM (including escaped `\\n` sequences).\n\n| Variable | Value type | Behavior |\n| --------------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------- |\n| `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` | Boolean-like string (`1`, `true`, `yes`, `on`) | Enables Foundry mode for Anthropic provider |\n| `FOUNDRY_BASE_URL` | URL string | Anthropic endpoint base URL in Foundry mode |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Token string | Used for `Authorization: Bearer <token>` |\n| `ANTHROPIC_CUSTOM_HEADERS` | Header list string | Extra headers; format `header-a: value, header-b: value` or newline-separated |\n| `NODE_EXTRA_CA_CERTS` | PEM path or inline PEM | Extra CA chain for server certificate validation |\n| `ANTHROPIC_MODEL_CODE_CLIENT_CERT` | PEM path or inline PEM | mTLS client certificate |\n| `ANTHROPIC_MODEL_CODE_CLIENT_KEY` | PEM path or inline PEM | mTLS client private key (must be paired with cert) |\n\n### Amazon Bedrock\n\n| Variable | Default / behavior |\n| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- |\n| `AWS_REGION` | Primary region source |\n| `AWS_DEFAULT_REGION` | Fallback if `AWS_REGION` unset |\n| `AWS_PROFILE` | Enables named profile auth path |\n| `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | Enables IAM key auth path |\n| `AWS_BEARER_TOKEN_BEDROCK` | Enables bearer token auth path |\n| `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` / `AWS_CONTAINER_CREDENTIALS_FULL_URI` | Enables ECS task credential path |\n| `AWS_WEB_IDENTITY_TOKEN_FILE` + `AWS_ROLE_ARN` | Enables web identity auth path |\n| `AWS_BEDROCK_SKIP_AUTH` | If `1`, injects dummy credentials (proxy/non-auth scenarios) |\n| `AWS_BEDROCK_FORCE_HTTP1` | If `1`, forces Node HTTP/1 request handler |\n| `HTTPS_PROXY` / `HTTP_PROXY` / `ALL_PROXY` | Routes Bedrock runtime and AWS SSO credential calls through the configured proxy using HTTP/1 |\n| `NO_PROXY` | Excludes matching hosts from proxy routing when a proxy variable is configured |\n\nRegion fallback in provider code: `options.region` → `AWS_REGION` → `AWS_DEFAULT_REGION` → `us-east-1`.\n\nCredential fallback order is static env (`AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` plus optional `AWS_SESSION_TOKEN`), named profile / SSO / `credential_process`, then EC2 IMDSv2. `models.yml` Bedrock entries use `api: bedrock-converse-stream` and do not require `apiKey` or `apiKeyEnv` because the provider signs requests from this AWS chain.\n\n### Azure OpenAI Responses\n\n| Variable | Default / behavior |\n| ---------------------------------- | --------------------------------------------------------------------------- |\n| `AZURE_OPENAI_API_KEY` | Required unless API key passed as option |\n| `AZURE_OPENAI_API_VERSION` | Default `v1` |\n| `AZURE_OPENAI_BASE_URL` | Direct base URL override |\n| `AZURE_OPENAI_RESOURCE_NAME` | Used to construct base URL: `https://<resource>.openai.azure.com/openai/v1` |\n| `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` | Optional mapping string: `modelId=deploymentName,model2=deployment2` |\n\nBase URL resolution: option `azureBaseUrl` → env `AZURE_OPENAI_BASE_URL` → option/env resource name → `model.baseUrl`.\n\n### Model provider base URL overrides\n\nBuilt-in model provider base URLs resolve with this precedence:\n\n1. `models.yml` / model config provider `baseUrl`\n2. provider-specific base URL environment variable\n3. bundled provider default\n\nSupported aliases:\n\n| Provider | Variables |\n| --- | --- |\n| OpenAI | `OPENAI_BASE_URL` |\n| Anthropic | `ANTHROPIC_BASE_URL` |\n| Google Gemini | `GOOGLE_BASE_URL`, `GEMINI_BASE_URL` |\n| Google Antigravity | `GOOGLE_ANTIGRAVITY_BASE_URL`, then `GOOGLE_BASE_URL`, then `GEMINI_BASE_URL` |\n| Google Gemini CLI | `GOOGLE_GEMINI_CLI_BASE_URL`, then `GOOGLE_BASE_URL`, then `GEMINI_BASE_URL` |\n| Google Vertex | `GOOGLE_VERTEX_BASE_URL`, then `GOOGLE_BASE_URL`, then `GEMINI_BASE_URL` |\n| Any provider id | derived `<PROVIDER_ID>_BASE_URL`, uppercased with non-alphanumerics converted to `_` (for example `my-proxy` → `MY_PROXY_BASE_URL`) |\n\nOpenAI-compatible proxy note: the built-in `openai` provider keeps its bundled API transport (`openai-responses`). Setting `OPENAI_BASE_URL` changes the host but still calls `<baseUrl>/responses`. If your proxy only supports Chat Completions, configure a custom `models.yml` provider with `api: openai-completions` instead of using the built-in OpenAI provider override:\n\n```yaml\nproviders:\n openai-compatible:\n baseUrl: https://proxy.example.com/v1\n apiKey: OPENAI_API_KEY\n api: openai-completions\n models:\n - id: gpt-4o\n name: GPT-4o via proxy\n api: openai-completions\n```\n\nFor OpenRouter traffic, GJC explicitly sends `User-Agent: Gajae-Code/<package version>` plus OpenRouter attribution headers. For the built-in OpenAI Responses transport and generic OpenAI-compatible Chat Completions transport, GJC passes model/provider headers through the OpenAI JavaScript SDK and does not set a GJC user-agent unless the provider-specific code adds one.\n\n### OpenAI-compatible proxy provider config\n\nFor OpenAI-compatible proxies that only implement Chat Completions, prefer a custom `models.yml` provider over `OPENAI_BASE_URL`:\n\n```yaml\nproviders:\n openai-compatible:\n baseUrl: https://proxy.example.com/v1\n apiKeyEnv: OPENAI_API_KEY\n api: openai-completions\n auth: apiKey\n headers:\n User-Agent: curl/8.7.1\n models:\n - id: gpt-4o\n name: GPT-4o via proxy\n reasoning: false\n input: [text]\n cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }\n```\n\n`models.yml` is strict: unsupported provider/model keys fail validation before the provider request is dispatched.\n\n### GJC workflow bridge commands\n\n`gjc ralplan`, `gjc deep-interview`, and `gjc state` are private runtime bridge commands. They require `GJC_RUNTIME_BINARY` (or legacy `GJC_LEGACY_RUNTIME_BINARY`) to point at the private runtime executable; public bundled workflow use remains through `/skill:ralplan` and `/skill:deep-interview` inside a GJC session.\n\n| Variable | Behavior |\n| --- | --- |\n| `GJC_RUNTIME_BINARY` | Private runtime bridge binary for `gjc ralplan`, `gjc deep-interview`, and `gjc state` |\n| `GJC_LEGACY_RUNTIME_BINARY` | Legacy fallback bridge binary name |\n\n### Interactive `--tmux` startup and scroll/mouse profile\n\n`gjc --tmux` launches the interactive TUI inside a fresh GJC-managed tmux session. When GJC creates that session it applies a profile that is **scoped to the GJC session only** (it never runs `set -g` / global tmux options), including:\n\n- `mouse on` — enables mouse-wheel scrolling into tmux copy-mode (history/scrollback).\n- `set-clipboard on` and a readable copy-mode `mode-style`.\n- GJC ownership/identity tags (`@gjc-profile`, branch/project markers).\n\nThis profile is applied on macOS, Linux, and WSL (Linux) alike; only native Windows (`win32`) skips the tmux launch. It is applied **only to sessions GJC itself creates**. If you start tmux yourself and then run `gjc` inside it, GJC leaves your tmux configuration untouched — add `set -g mouse on` to your own `~/.tmux.conf`, or relaunch with `gjc --tmux` to get the managed profile.\n\n| Variable | Behavior |\n| --- | --- |\n| `GJC_LAUNCH_POLICY` | Launch policy for `--tmux` startup: `tmux` (default) or `direct` (skip the tmux session) |\n| `GJC_TMUX_SESSION` | Explicit tmux session name override for `--tmux` startup |\n| `GJC_TMUX_COMMAND` | tmux binary/command override for every GJC tmux flow (`GJC_TEAM_TMUX_COMMAND` is honored as a team-path alias) |\n| `GJC_TMUX_PROFILE` | Set `0`/`false`/`off` to apply only the required ownership tags and skip the scroll/mouse/clipboard profile |\n| `GJC_MOUSE` | Set `0`/`false`/`off` to skip `mouse on`, leaving wheel scrolling to the host terminal instead of tmux copy-mode |\n\n#### WSL / Windows Terminal scrolling\n\nOn WSL with Windows Terminal, scrolling behaves differently depending on whether tmux owns the mouse:\n\n- **With the GJC profile (default):** the mouse wheel enters tmux copy-mode and scrolls the pane's scrollback. Keyboard fallback: `Ctrl-b [` to enter copy-mode, then `PgUp`/arrows; `q` to exit.\n- **Without tmux mouse capture (`GJC_MOUSE=off`, or running outside `gjc --tmux`):** Windows Terminal handles the wheel and scrolls its own native scrollback.\n\nIf the wheel does not scroll inside `gjc --tmux` on WSL, confirm the session is GJC-managed (`gjc session list`) so the `mouse on` profile is actually applied; sessions you launched yourself do not receive it. Set `GJC_MOUSE=off` if you prefer Windows Terminal's native scrollback over tmux copy-mode.\n\n### Team tmux backend, dry-run, and state paths\n\n`gjc team ...` starts tmux worker panes from the current tmux-backed leader session. Start that leader with `gjc --tmux` first; `gjc team` intentionally does not create or attach the leader session itself.\n\n`gjc team ... --dry-run --json` creates the same machine-readable state tree as a team launch without starting tmux panes. By default that state is written under `<cwd>/.gjc/state/team/<team>/`; treat it as ephemeral smoke-test/review state. Do not commit generated `.gjc/state/team` contents. Remove the generated team directory after a dry-run when the harness no longer needs it.\n\n| Variable | Behavior |\n| --- | --- |\n| `GJC_TEAM_STATE_ROOT` | Overrides the team state root (default `<cwd>/.gjc/state/team`) |\n| `GJC_TEAM_TMUX_COMMAND` | tmux binary/command override for team launch |\n| `GJC_TEAM_WORKER_COMMAND` | Worker GJC command override |\n| `GJC_TEAM_WORKER_CLI` | Team worker CLI selector; accepted values are `auto` or `gjc` |\n| `GJC_TEAM_WORKER_CLI_MAP` | Comma-separated worker CLI selector map; entries must be `auto` or `gjc` |\n\n### Hermes MCP bridge\n\n`gjc mcp-serve coordinator` exposes a GJC-native outward MCP bridge for Hermes-style coordinators. `gjc mcp-serve hermes` is a compatibility alias for the same bridge. The bridge is read-only by default and fails closed until roots and mutation classes are explicitly configured.\n\nCoordinator MCP currently exposes durable polling/await tools, not push subscriptions. Consume `gjc_coordinator_read_coordination_status`, `gjc_coordinator_read_turn`, or bounded `gjc_coordinator_await_turn` for state changes.\n\n| Variable | Behavior |\n| --- | --- |\n| `GJC_COORDINATOR_MCP_WORKDIR_ROOTS` | Required allowlist for workdir and artifact paths. `gjc setup hermes` renders absolute normalized paths joined with the platform path delimiter (`:` on POSIX, `;` on Windows). The bridge parser also accepts commas, semicolons, and newlines for legacy manual configs. |\n| `GJC_COORDINATOR_MCP_MUTATIONS` | Enables mutating tool classes as a comma-separated list (`sessions`, `questions`, `reports`) or `all`. `sessions` covers session startup, prompt delivery, durable turn journal updates, queue, and force operations. Per-call `allow_mutation: true` is still required. |\n| `GJC_COORDINATOR_MCP_ARTIFACT_BYTE_CAP` | Max bytes returned by artifact reads (default `65536`, capped at `1048576`). |\n| `GJC_COORDINATOR_MCP_STATE_ROOT` | Bridge coordination state root (default `<cwd>/.gjc/state/coordinator-mcp`). |\n| `GJC_COORDINATOR_MCP_PROFILE` | Optional profile namespace for session/question/report state. Missing scope never widens to global session enumeration. |\n| `GJC_COORDINATOR_MCP_REPO` | Optional repo namespace for session/question/report state. Missing scope never widens to global session enumeration. |\n| `GJC_COORDINATOR_MCP_SESSION_COMMAND` | GJC-compatible command used by mutating session startup to launch a detached tmux session. `gjc setup hermes` renders this to `gjc --worktree` by default so Hermes-installed configs start real GJC work in a GJC-managed worktree while preserving GJC project/session resume identity. Explicit values are preserved as user intent. When manually omitted, mutating session startup fails closed unless a service adapter is injected. |\n| `GJC_COORDINATOR_MCP_SETUP_MANAGED_BY` | Marker written by `gjc setup hermes` for safe managed config updates. |\n| `GJC_COORDINATOR_MCP_SETUP_SCHEMA_VERSION` | Managed setup schema version written by `gjc setup hermes`. |\n| `GJC_COORDINATOR_MCP_SETUP_SIGNATURE` | Deterministic managed setup signature used to detect safe updates versus unmanaged conflicts. |\n\n### Google Vertex AI\n\n| Variable | Required? | Notes |\n| -------------------------------- | ------------------------------ | ------------------------------------------------------------------------------------------------------------------------- |\n| `GOOGLE_CLOUD_PROJECT` | Yes (unless passed in options) | Fallback: `GCLOUD_PROJECT` |\n| `GCLOUD_PROJECT` | Fallback | Used as alternate project ID source |\n| `GOOGLE_CLOUD_PROJECT_ID` | OAuth login helper only | Used by Gemini CLI OAuth project discovery |\n| `GOOGLE_CLOUD_LOCATION` | Yes (unless passed in options) | No default in provider |\n| `GOOGLE_CLOUD_API_KEY` | Conditional | Direct Vertex API-key auth; otherwise ADC fallback can authenticate when project and location are set |\n| `GOOGLE_APPLICATION_CREDENTIALS` | Conditional | If set, file must exist; otherwise ADC fallback path is checked (`~/.config/gcloud/application_default_credentials.json`) |\n\n### Kimi\n\n| Variable | Default / behavior |\n| ---------------------- | -------------------------------------------------------- |\n| `KIMI_CODE_OAUTH_HOST` | Primary OAuth host override |\n| `KIMI_OAUTH_HOST` | Fallback OAuth host override |\n| `KIMI_CODE_BASE_URL` | Overrides Kimi usage endpoint base URL (`usage/kimi.ts`) |\n\nOAuth host chain: `KIMI_CODE_OAUTH_HOST` → `KIMI_OAUTH_HOST` → `https://auth.kimi.com`.\n\n### Gemini CLI compatibility\n\n| Variable | Default / behavior |\n| -------------------------- | --------------------------------------------------------------- |\n| `GJC_AI_GEMINI_CLI_VERSION` | Overrides Gemini CLI user-agent version tag (`0.35.3` if unset) |\n\n### OpenAI code provider responses (feature/debug controls)\n\n| Variable | Behavior |\n| ------------------------------------ | ---------------------------------------------------- |\n| `GJC_OPENAI_CODE_DEBUG` | `1`/`true` enables OpenAI code provider debug logging |\n| `GJC_OPENAI_CODE_WEBSOCKET` | `1`/`true` enables websocket transport preference |\n| `GJC_OPENAI_CODE_WEBSOCKET_V2` | `1`/`true` enables websocket v2 path |\n| `GJC_OPENAI_CODE_WEBSOCKET_IDLE_TIMEOUT_MS` | Positive integer override (default 300000) |\n| `GJC_OPENAI_CODE_WEBSOCKET_RETRY_BUDGET` | Non-negative integer override (default 5) |\n| `GJC_OPENAI_CODE_WEBSOCKET_RETRY_DELAY_MS` | Positive integer base backoff override (default 500) |\n| `GJC_OPENAI_STREAM_IDLE_TIMEOUT_MS` | Positive integer OpenAI stream idle timeout override |\n\n### Cursor provider debug\n\n| Variable | Behavior |\n| ------------------ | ------------------------------------------------------------------------ |\n| `DEBUG_CURSOR` | Enables provider debug logs; `2`/`verbose` for detailed payload snippets |\n| `DEBUG_CURSOR_LOG` | Optional file path for JSONL debug log output |\n\n### Prompt cache compatibility switch\n\n| Variable | Behavior |\n| -------------------- | ----------------------------------------------------------------------------------------------------------------- |\n| `GJC_CACHE_RETENTION` | If `long`, enables long retention where supported (`anthropic`, `openai-responses`, Bedrock retention resolution); any other value forces `short`. The Anthropic provider already defaults to `long` (1h) when unset, so this is mainly an opt-out (`short`) or a way to extend long retention to other providers. |\n\n---\n\n## 3) Web search subsystem\n\n### Search provider credentials\n\n| Variable | Used by |\n| --------------------------------------------------- | ------------------------------------------------------------- |\n| `EXA_API_KEY` | Exa search provider |\n| `BRAVE_API_KEY` | Brave search provider |\n| `PERPLEXITY_API_KEY` | Perplexity search provider API-key mode |\n| `PERPLEXITY_COOKIES` | Perplexity cookie-auth search mode |\n| `TAVILY_API_KEY` | Tavily search provider |\n| `ZAI_API_KEY` | z.ai search provider (also checks stored OAuth in `agent.db`) |\n| `OPENAI_API_KEY` / OpenAI code OAuth in DB | OpenAI code search provider availability/auth |\n| `GJC_OPENAI_CODE_WEB_SEARCH_MODEL` | OpenAI code search provider model override |\n| `MOONSHOT_SEARCH_API_KEY` / `KIMI_SEARCH_API_KEY` | Kimi/Moonshot search provider env auth |\n| `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` | Kimi/Moonshot search endpoint override |\n| `KAGI_API_KEY` | Kagi search provider |\n| `JINA_API_KEY` | Jina search provider |\n| `PARALLEL_API_KEY` | Parallel search provider |\n| `SEARXNG_ENDPOINT`, `SEARXNG_TOKEN` | SearXNG endpoint and optional bearer token |\n| `SEARXNG_BASIC_USERNAME`, `SEARXNG_BASIC_PASSWORD` | SearXNG HTTP Basic Auth credentials |\n\nSearXNG also reads the equivalent `searxng.endpoint`, `searxng.token`, `searxng.basicUsername`, and `searxng.basicPassword` settings from `~/.gjc/agent/config.yml`; environment variables are fallbacks.\n\n### Anthropic web search auth chain\n\nAnthropic web search uses `findAnthropicAuth()` from `packages/ai/src/utils/anthropic-auth.ts` in this order:\n\n1. `ANTHROPIC_SEARCH_API_KEY` (+ optional `ANTHROPIC_SEARCH_BASE_URL`)\n2. `ANTHROPIC_FOUNDRY_API_KEY` when `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` is enabled\n3. Anthropic OAuth credentials from `agent.db` (must not expire within 5-minute buffer)\n4. Anthropic API-key credentials from `agent.db`\n5. Generic Anthropic env fallback: provider key (`ANTHROPIC_FOUNDRY_API_KEY` in Foundry mode, otherwise `ANTHROPIC_OAUTH_TOKEN`/`ANTHROPIC_API_KEY`) + optional `ANTHROPIC_BASE_URL` (`FOUNDRY_BASE_URL` when Foundry mode is enabled)\n\nRelated vars:\n\n| Variable | Default / behavior |\n| --------------------------- | ---------------------------------------------------- |\n| `ANTHROPIC_SEARCH_API_KEY` | Highest-priority explicit search key |\n| `ANTHROPIC_SEARCH_BASE_URL` | Defaults to `https://api.anthropic.com` when omitted |\n| `ANTHROPIC_SEARCH_MODEL` | Defaults to `anthropic-model-haiku-4-5` |\n| `ANTHROPIC_BASE_URL` | Generic fallback base URL for tier-4 auth path |\n\n### Perplexity OAuth flow behavior flag\n\n| Variable | Behavior |\n| ------------------- | ------------------------------------------------------------------------------- |\n| `GJC_AUTH_NO_BORROW` | If set, disables macOS native-app token borrowing path in Perplexity login flow |\n\n---\n\n## 4) Python tooling and kernel runtime\n\n| Variable | Default / behavior |\n| ------------------------- | ------------------------------------------------------------------------------------------------------------------- |\n| `GJC_PY` | Eval backend override: `0`/`bash`=JavaScript only, `1`/`py`=Python only, `mix`/`both`=both; invalid values ignored |\n| `GJC_PYTHON_SKIP_CHECK` | If `1`, skips Python interpreter availability checks (subprocess runner still starts on demand) |\n| `GJC_PYTHON_INTEGRATION` | If `1`, opts gated integration tests in (e.g. `python-runner.integration.test.ts`) into running against real Python |\n| `GJC_PYTHON_IPC_TRACE` | If `1`, logs NDJSON frames exchanged with the Python runner subprocess |\n| `VIRTUAL_ENV` | Highest-priority venv path for Python runtime resolution |\n\nExtra conditional behavior:\n\n- If `BUN_ENV=test` or `NODE_ENV=test`, Python availability checks are treated as OK and warming is skipped.\n- Python env filtering denies common API keys and allows safe base vars + `LC_`, `XDG_`, `GJC_` prefixes.\n\n---\n\n## 5) Agent/runtime behavior toggles\n\n| Variable | Default / behavior |\n| ---------------------------- | -------------------------------------------------------------------------------------------------- |\n| `GJC_SMOL_MODEL` | Ephemeral model-role override for `smol` (CLI `--smol` takes precedence) |\n| `GJC_SLOW_MODEL` | Ephemeral model-role override for `slow` (CLI `--slow` takes precedence) |\n| `GJC_PLAN_MODEL` | Ephemeral model-role override for `plan` (CLI `--plan` takes precedence) |\n| `GJC_NO_TITLE` | If set (any non-empty value), disables auto session title generation on first user message |\n| `NULL_PROMPT` | If `true`, system prompt builder returns empty string |\n| `GJC_BLOCKED_AGENT` | Blocks a specific subagent type in task tool |\n| `GJC_SUBPROCESS_CMD` | Overrides subagent spawn command (`gjc` / `gjc.cmd` resolution bypass) |\n| `GJC_TASK_MAX_OUTPUT_BYTES` | Max captured output bytes per subagent (default `500000`) |\n| `GJC_TASK_MAX_OUTPUT_LINES` | Max captured output lines per subagent (default `5000`) |\n| `GJC_TIMING` | If set (any non-empty value), prints a hierarchical timing-span tree to **stderr** via `logger.printTimings()`. In interactive mode the tree prints once the agent is ready (before the TUI starts); in print mode it prints after the whole prompt batch completes. Print-mode prompts are wrapped in `print:prompt:initial` / `print:prompt:next` spans so each user message shows up as its own row. `GJC_TIMING=x` exits the process with code 0 right after printing in interactive mode (use to measure cold startup only). `GJC_TIMING=full` lists every module-load entry instead of just the top N. |\n| `GJC_PACKAGE_DIR` | Overrides package asset base dir resolution (docs/examples/changelog path lookup) |\n| `GJC_DISABLE_LSPMUX` | If `1`, disables lspmux detection/integration and forces direct LSP server spawning |\n| `GJC_RPC_EMIT_TITLE` | Boolean-like flag enabling title events in RPC mode |\n| `SMITHERY_URL` | Smithery web URL override (default `https://smithery.ai`) |\n| `SMITHERY_API_URL` | Smithery API base URL override (default `https://api.smithery.ai`) |\n| `PUPPETEER_EXECUTABLE_PATH` | Browser tool Chromium executable override |\n| `LM_STUDIO_BASE_URL` | Default implicit LM Studio discovery base URL override (`http://127.0.0.1:1234/v1` if unset) |\n| `OLLAMA_BASE_URL` | Default implicit Ollama discovery base URL override (`http://127.0.0.1:11434` if unset) |\n| `LLAMA_CPP_BASE_URL` | Default implicit Llama.cpp discovery base URL override (`http://127.0.0.1:8080` if unset) |\n| `GJC_EDIT_VARIANT` | Forces edit tool variant when valid (`patch`, `replace`, `hashline`, `atom`, `vim`, `apply_patch`) |\n| `GJC_FORCE_IMAGE_PROTOCOL` | Forces supported image protocol (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) where used |\n| `GJC_ALLOW_SIXEL_PASSTHROUGH` | Allows SIXEL passthrough when `GJC_FORCE_IMAGE_PROTOCOL=sixel` |\n| `GJC_NO_PTY` | If `1`, disables interactive PTY path for bash tool |\n\n`GJC_NO_PTY` is also set internally when CLI `--no-pty` is used.\n\n---\n\n## 6) Storage and config root paths\n\nThese are consumed via `@gajae-code/utils/dirs` and affect where coding-agent stores data.\n\n| Variable | Default / behavior |\n| --------------------- | ----------------------------------------------------------------------------- |\n| `GJC_CONFIG_DIR` | Config root dirname under home (default `.gjc`) |\n| `GJC_CODING_AGENT_DIR` | Full override for agent directory (default `~/<GJC_CONFIG_DIR or .gjc>/agent`) |\n| `PWD` | Used when matching canonical current working directory in path helpers |\n\n---\n\n## 7) Shell/tool execution environment\n\n(From `packages/utils/src/procmgr.ts` and coding-agent bash tool integration.)\n\n| Variable | Behavior |\n| -------------------------- | ------------------------------------------------------------------------------ |\n| `GJC_BASH_NO_CI` | Suppresses automatic `CI=true` injection into spawned shell env |\n| `ANTHROPIC_MODEL_BASH_NO_CI` | Legacy alias fallback for `GJC_BASH_NO_CI` |\n| `GJC_BASH_NO_LOGIN` | Disables login-shell mode; shell args become `['-c']` instead of `['-l','-c']` |\n| `ANTHROPIC_MODEL_BASH_NO_LOGIN` | Legacy alias fallback for `GJC_BASH_NO_LOGIN` |\n| `GJC_SHELL_PREFIX` | Optional command prefix wrapper |\n| `ANTHROPIC_MODEL_CODE_SHELL_PREFIX` | Legacy alias fallback for `GJC_SHELL_PREFIX` |\n| `VISUAL` | Preferred external editor command |\n| `EDITOR` | Fallback external editor command |\n\nCurrent implementation: `GJC_BASH_NO_LOGIN`/`ANTHROPIC_MODEL_BASH_NO_LOGIN` are active; when either is set, `getShellArgs()` returns `['-c']`.\n\n---\n\n## 8) UI/theme/session detection (auto-detected env)\n\nThese are read as runtime signals; they are usually set by the terminal/OS rather than manually configured.\n\n| Variable | Used for |\n| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------- |\n| `COLORTERM`, `TERM`, `WT_SESSION` | Color capability detection (theme color mode) |\n| `COLORFGBG` | Terminal background light/dark auto-detection |\n| `TERM_PROGRAM`, `TERM_PROGRAM_VERSION`, `TERMINAL_EMULATOR` | Terminal identity in system prompt/context |\n| `KDE_FULL_SESSION`, `XDG_CURRENT_DESKTOP`, `DESKTOP_SESSION`, `XDG_SESSION_DESKTOP`, `GDMSESSION`, `WINDOWMANAGER` | Desktop/window-manager detection in system prompt/context |\n| `KITTY_WINDOW_ID`, `TMUX_PANE`, `TERM_SESSION_ID`, `WT_SESSION` | Stable per-terminal session breadcrumb IDs |\n| `SHELL`, `ComSpec`, `TERM_PROGRAM`, `TERM` | System info diagnostics |\n| `APPDATA`, `XDG_CONFIG_HOME` | lspmux config path resolution |\n| `HOME` | Path shortening in command UI |\n\n---\n\n## 9) TUI runtime flags (shared package, affects coding-agent UX)\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------------------------- |\n| `GJC_NOTIFICATIONS` | `off` / `0` / `false` suppress desktop notifications |\n| `GJC_TUI_WRITE_LOG` | If set, logs TUI writes to file |\n| `GJC_HARDWARE_CURSOR` | If `1`, enables hardware cursor mode |\n| `GJC_CLEAR_ON_SHRINK` | If `1`, clears empty rows when content shrinks |\n| `GJC_DEBUG_REDRAW` | If `1`, enables redraw debug logging |\n| `GJC_TUI_DEBUG` | If `1`, enables deep TUI debug dump path |\n| `GJC_FORCE_IMAGE_PROTOCOL` | Forces terminal image protocol detection (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) |\n| `GJC_TUI_KEYBOARD_PROTOCOL` | Enhanced keyboard input (Kitty keyboard protocol + xterm modifyOtherKeys). Enabled by default; set `0` / `false` to leave the keyboard in its default mode. Use this when a terminal (e.g. Android Termius) breaks IME/Hangul composition while these enhanced modes are active. |\n\n---\n\n## 10) Commit generation controls\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------- |\n| `GJC_COMMIT_TEST_FALLBACK` | If `true` (case-insensitive), force commit fallback generation path |\n| `GJC_COMMIT_NO_FALLBACK` | If `true`, disables fallback when agent returns no proposal |\n| `GJC_COMMIT_MAP_REDUCE` | If `false`, disables map-reduce commit analysis path |\n| `DEBUG` | If set, commit agent error stack traces are printed |\n\n---\n\n## 11) Bridge mode (`--mode bridge`)\n\nConsumed by `packages/coding-agent/src/modes/bridge/*`. The bridge is a\nnetwork-reachable control surface and is **secure-by-default**: it refuses to\nstart without TLS and a bearer token, and the 0.3.1 default endpoint matrix\nfail-closes session events, commands, controller ownership, UI responses, host\ntool results, and host URI results. See `docs/bridge.md` for protocol details.\n\n| Variable | Required | Default | Behavior |\n| --- | --- | --- | --- |\n| `GJC_BRIDGE_TOKEN` | Yes | — | Bearer token required on authenticated endpoints. **Secret — never commit.** |\n| `GJC_BRIDGE_TLS_CERT` | Yes | — | Path to the TLS certificate (PEM). Startup fails closed if cert/key are missing (TLS is mandatory, including loopback). |\n| `GJC_BRIDGE_TLS_KEY` | Yes | — | Path to the TLS private key (PEM). **Secret — never commit; `chmod 600`.** |\n| `GJC_BRIDGE_HOST` | No | `127.0.0.1` | Bind hostname. |\n| `GJC_BRIDGE_PORT` | No | `4077` | Bind port (1–65535). |\n| `GJC_BRIDGE_SCOPES` | No | `prompt` | Parsed for dormant command-surface compatibility. Valid scopes: `prompt`, `control`, `bash`, `export`, `session`, `model`, `message:read`, `host_tools`, `host_uri`, `admin`. The default endpoint matrix still advertises no accepted scopes and rejects commands before scope checks. |\n\nLocal development with a self-signed certificate must add the local CA to the\nclient trust store; there is no plaintext or certificate-verification-bypass mode.\n\n---\n\n## Security-sensitive variables\n\nTreat these as secrets; do not log or commit them:\n\n- Provider/API keys and OAuth/bearer credentials (all `*_API_KEY`, `*_TOKEN`, OAuth access/refresh tokens)\n- Cloud credentials (`AWS_*`, `GOOGLE_APPLICATION_CREDENTIALS` path may expose service-account material)\n- Search/provider auth vars (`EXA_API_KEY`, `BRAVE_API_KEY`, `PERPLEXITY_API_KEY`, Anthropic search keys)\n- Foundry mTLS material (`ANTHROPIC_MODEL_CODE_CLIENT_CERT`, `ANTHROPIC_MODEL_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS` when it points to private CA bundles)\n- Bridge auth/TLS material (`GJC_BRIDGE_TOKEN` and the `GJC_BRIDGE_TLS_KEY` private key; never commit cert/key/token material)\n\nPython runtime also explicitly strips many common key vars before spawning kernel subprocesses (`packages/coding-agent/src/eval/py/runtime.ts`).\n",
|
|
20
20
|
"external-control-readiness.md": "# External control surface readiness\n\nThis document classifies every public GJC surface that an external controller, bot, editor, or harness can use to drive `gjc`. It is intentionally narrower than the generic bot guide: it states what is ready today, what is only editor/client-oriented, and what remains experimental.\n\n## Readiness matrix\n\n| Surface | Current readiness | Primary command | Use when | Do not use when | Provider-independent smoke path |\n| --- | --- | --- | --- | --- | --- |\n| Coordinator MCP | Preferred multi-session bot/control-plane surface. | `gjc mcp-serve coordinator` | A controller needs to start/register GJC sessions, send bounded turns, answer questions, read artifacts, and write durable status reports across one or more repo/worktree lanes. | The controller only needs one embedded subprocess and can own stdio directly. | `gjc mcp-serve coordinator --check --json`; `packages/coding-agent/test/coordinator-mcp.test.ts`; `packages/coding-agent/test/setup-cli.test.ts`. |\n| RPC stdio | Stable subprocess worker surface. | `gjc --mode rpc` | A host embeds one GJC worker process, sends JSONL commands over stdin, consumes stdout frames, and optionally uses `python/gjc-rpc`. | The host needs remote HTTPS, multi-session orchestration, or MCP tool discovery. | `packages/coding-agent/test/rpc-unattended-stdio.test.ts`; `packages/coding-agent/test/rpc-client.start.test.ts`; `packages/coding-agent/test/rpc-host-tools.test.ts`; `packages/coding-agent/test/rpc-host-uris.test.ts`. |\n| ACP mode | Editor/ACP client surface with tested protocol initialization, session lifecycle, client-owned MCP, file/terminal client bridges, permission routing, and stdout hygiene. | `gjc --mode acp` or `gjc acp` | An editor or ACP-compatible client wants to drive GJC through the Agent Client Protocol over stdio. | A bot needs a generic multi-session control plane; use Coordinator MCP instead. | `packages/coding-agent/test/acp-initialize-conformance.test.ts`; `packages/coding-agent/test/acp-stdout-hygiene.test.ts`; `packages/coding-agent/test/acp-lazy-startup.test.ts`; `packages/coding-agent/test/acp-mcp-isolation.test.ts`; `packages/coding-agent/test/read-acp-fs.test.ts`; `packages/coding-agent/test/write-acp-fs.test.ts`; `packages/coding-agent/test/bash-acp-terminal.test.ts`. |\n| Bridge HTTPS | Experimental, fail-closed remote session-control surface. | `gjc --mode bridge` | A future remote client needs HTTPS protocol scaffolding, authenticated health/help/handshake behavior, or SDK compatibility tests. | Production bot lifecycle, default external-controller integration, or claims that remote session events/commands are enabled by default. | `packages/coding-agent/test/bridge/bridge-auth.test.ts`; `packages/coding-agent/test/bridge/bridge-mode-handler.test.ts`; `packages/coding-agent/test/bridge/bridge-conformance.test.ts`; `packages/bridge-client/test/bridge-client.test.ts`. |\n\n## Surface details\n\n### Coordinator MCP\n\nCoordinator MCP is the default answer for external bot and orchestration integrations. It exposes a transport-level MCP tool contract for session discovery, managed session start, visible tmux registration, prompt delivery, bounded turn waiting, structured question answering, artifact reads, and explicit completion/failure/cancellation reports.\n\nReadiness claim:\n\n- Ready as the preferred generic external-controller control plane.\n- Provider-independent contract checks exist for server metadata, tool discovery, read-only defaults, mutation gates, setup rendering, and dry-run lifecycle behavior.\n- It is not a provider/model contract. Live model execution remains the operator's environment-specific smoke.\n\nPrimary references:\n\n- `docs/bot-integration.md`\n- `docs/hermes-mcp-bridge.md`\n- `packages/coding-agent/src/coordinator/contract.ts`\n- `packages/coding-agent/src/coordinator-mcp/server.ts`\n\n### RPC stdio\n\nRPC mode is the stable embedded-worker surface. It is newline-delimited JSON over stdio and emits a `{ \"type\": \"ready\" }` frame before accepting commands. Hosts can drive prompts, state queries, host tools, host URI schemes, workflow gates, extension UI responses, cancellation, and unattended negotiation through the RPC command catalog.\n\nReadiness claim:\n\n- Ready for single-process host integration and subprocess workers.\n- The public Python client in `python/gjc-rpc` is the recommended typed client for Python hosts.\n- Multi-session orchestration and MCP tool discovery are out of scope for RPC; use Coordinator MCP for those.\n\nPrimary references:\n\n- `docs/rpc.md`\n- `python/gjc-rpc/README.md`\n- `packages/coding-agent/src/modes/rpc/rpc-mode.ts`\n- `packages/coding-agent/src/modes/rpc/rpc-types.ts`\n\n### ACP mode\n\nACP mode runs GJC as an Agent Client Protocol server over stdio. It is useful for editor-style clients that own the ACP transport and want session creation, session load/fork/resume/close metadata, prompt handling, client-provided MCP servers, permission prompts, editor file reads/writes, terminal-backed bash, and elicitation support.\n\nReadiness claim:\n\n- ACP is implemented and covered for current editor/client contracts: initialize conformance, agent capability advertisement, lazy startup, stdout JSON-RPC hygiene, client-owned MCP isolation, event mapping, file bridge routing, terminal routing, and permission routing.\n- ACP is not the preferred bot control-plane surface. It is not positioned as a multi-session external bot coordinator, and it does not replace Coordinator MCP reports/artifacts/turn state.\n- A real prompt still depends on the selected provider/model credentials, so required PR smokes should stay on provider-independent initialize, lifecycle, bridge, and mapper tests.\n\nCurrent entrypoints:\n\n```sh\ngjc --mode acp\n# equivalent ACP subcommand for ACP clients that prefer command-style launch\ngjc acp\n```\n\nPrimary references:\n\n- `packages/coding-agent/src/commands/acp.ts`\n- `packages/coding-agent/src/modes/acp/acp-mode.ts`\n- `packages/coding-agent/src/modes/acp/acp-agent.ts`\n- `packages/coding-agent/src/modes/acp/acp-client-bridge.ts`\n- `packages/coding-agent/src/modes/acp/acp-event-mapper.ts`\n\n### Bridge HTTPS\n\nBridge mode is an experimental network protocol surface over HTTPS. Its current public posture is deliberately fail-closed: unauthenticated health/help are available, authenticated handshake is available, and default session-control endpoints advertise no accepted capabilities/scopes and reject with `endpoint_disabled`.\n\nReadiness claim:\n\n- Ready as experimental protocol scaffolding with fail-closed behavior and SDK/client conformance tests.\n- Not ready as the default external-bot product surface.\n- Do not document events, commands, controller ownership, UI responses, host tool results, or host URI results as enabled by default. Those names remain in the protocol catalog for internal compatibility and future re-enable work.\n\nPrimary references:\n\n- `docs/bridge.md`\n- `packages/coding-agent/src/modes/bridge/bridge-mode.ts`\n- `packages/coding-agent/src/modes/bridge/auth.ts`\n- `packages/bridge-client/src/index.ts`\n\n## PR smoke checklist\n\nFor external-control PRs, use this provider-independent checklist before any optional live provider smoke:\n\n1. **Docs-to-code alignment:** the readiness matrix still matches CLI mode parsing, MCP command registration, ACP command registration, bridge endpoint defaults, and RPC/ACP/Bridge tests.\n2. **Coordinator MCP:** `gjc mcp-serve coordinator --check --json` still reports the coordinator server and tool list, and focused MCP tests pass without provider credentials.\n3. **RPC stdio:** at least one stdio or client contract test proves JSONL startup/command routing without a real provider key.\n4. **ACP mode:** initialize/stdout or conformance tests prove the ACP JSON-RPC entrypoint and capability advertisement without a real provider key.\n5. **Bridge HTTPS:** bridge auth/handler tests prove TLS requirement, authenticated handshake, help/health behavior, and default `endpoint_disabled` session-control posture.\n6. **Local leak audit:** deliverable docs/tests must not contain private profile names, user-home paths, callback artifact paths, local proxy names, terminal app names, or private launch wrappers.\n\nOptional live smokes are useful diagnostics for one operator's model/profile/network setup, but they must not be required for PR readiness unless the PR explicitly changes live provider behavior.\n",
|
|
21
21
|
"fs-scan-cache-architecture.md": "# Filesystem Scan Cache Architecture Contract\n\nThis document defines the current contract for the shared filesystem scan cache implemented in Rust (`crates/pi-natives/src/fs_cache.rs`) and consumed by native discovery/search APIs exposed to `packages/coding-agent`.\n\n## What this cache is\n\nThe cache stores full directory-scan entry lists (`GlobMatch[]`) keyed by scan scope and traversal policy, then lets higher-level operations (glob filtering, fuzzy scoring, grep file selection) run against those cached entries.\n\nPrimary goals:\n\n- avoid repeated filesystem walks for repeated discovery/search calls\n- keep consistency across `glob`, `fuzzyFind`, and `grep` when they share the same scan policy\n- allow explicit staleness recovery for empty results and explicit invalidation after file mutations\n\n## Ownership and public surface\n\n- Cache implementation and policy: `crates/pi-natives/src/fs_cache.rs`\n- Native consumers:\n - `crates/pi-natives/src/glob.rs`\n - `crates/pi-natives/src/fd.rs` (`fuzzyFind`)\n - `crates/pi-natives/src/grep.rs`\n- JS binding/export:\n - `packages/natives/src/glob/index.ts` (`invalidateFsScanCache`)\n - `packages/natives/src/glob/types.ts`\n - `packages/natives/src/grep/types.ts`\n- Coding-agent mutation invalidation helpers:\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts`\n\n## Cache key partitioning (hard contract)\n\nEach entry is keyed by:\n\n- canonicalized `root` directory path\n- `include_hidden` boolean\n- `use_gitignore` boolean\n- `skip_node_modules` boolean\n\nImplications:\n\n- Hidden and non-hidden scans do **not** share entries.\n- Gitignore-respecting and ignore-disabled scans do **not** share entries.\n- Scans that prune `node_modules` do **not** share entries with scans that include it.\n- Consumers must pass stable semantics for hidden/gitignore/node_modules behavior; changing any flag creates a different cache partition.\n\n## Scan collection behavior\n\nCache population uses a deterministic walker (`ignore::WalkBuilder`) configured by `include_hidden`, `use_gitignore`, and `skip_node_modules`:\n\n- `follow_links(false)`\n- sorted by file path\n- `.git` is always skipped\n- `node_modules` is pruned at traversal time when `skip_node_modules=true`\n- entry file type + `mtime` are captured via `symlink_metadata`\n\nSearch roots are resolved by `resolve_search_path`:\n\n- relative paths are resolved against current cwd\n- target must be an existing directory\n- root is canonicalized when possible\n\n## Freshness and eviction policy\n\nGlobal policy (environment-overridable):\n\n- `FS_SCAN_CACHE_TTL_MS` (default `1000`)\n- `FS_SCAN_EMPTY_RECHECK_MS` (default `200`)\n- `FS_SCAN_CACHE_MAX_ENTRIES` (default `16`)\n\nBehavior:\n\n- `get_or_scan(...)`\n - if TTL is `0`: bypass cache entirely, always fresh scan (`cache_age_ms = 0`)\n - on cache hit within TTL: return cached entries + non-zero `cache_age_ms`\n - on expired hit: evict key, rescan, store fresh entry\n- max entry enforcement is oldest-first eviction by `created_at`\n\n## Empty-result fast recheck (separate from normal hits)\n\nNormal cache hit:\n\n- a cache hit inside TTL returns cached entries and does nothing else.\n\nEmpty-result fast recheck:\n\n- this is a **caller-side** policy using `ScanResult.cache_age_ms`\n- if filtered/query result is empty and cached scan age is at least `empty_recheck_ms()`, caller performs one `force_rescan(...)` and retries\n- intended to reduce stale-negative results when files were recently added but cache is still within TTL\n\nCurrent consumers:\n\n- `glob`: rechecks when filtered matches are empty and scan age exceeds threshold\n- `fuzzyFind` (`fd.rs`): rechecks only when query is non-empty and scored matches are empty\n- `grep`: rechecks when selected candidate file list is empty\n\n## Consumer defaults and cache usage\n\nCache is opt-in on all exposed APIs (`cache?: boolean`, default `false`).\n\nCurrent defaults in native APIs:\n\n- `glob`: `hidden=false`, `gitignore=true`, `cache=false`, and `node_modules` included only when the pattern mentions `node_modules`\n- `fuzzyFind`: `hidden=false`, `gitignore=true`, `cache=false`, and `node_modules` is skipped\n- `grep`: `hidden=true`, `gitignore=true`, `cache=false`, and `node_modules` included only when the glob mentions `node_modules`\n\nCoding-agent callers today:\n\n- High-volume mention candidate discovery enables cache:\n - `packages/coding-agent/src/utils/file-mentions.ts`\n - profile: `hidden=true`, `gitignore=true`, `includeNodeModules=true`, `cache=true`\n- Tool-level `grep` integration currently disables scan cache (`cache: false`):\n - `packages/coding-agent/src/tools/grep.ts`\n\n## Invalidation contract\n\nNative invalidation entrypoint:\n\n- `invalidateFsScanCache(path?: string)`\n - with `path`: remove cache entries whose root is a prefix of target path\n - without path: clear all scan cache entries\n\nPath handling details:\n\n- relative invalidation paths are resolved against cwd\n- invalidation attempts canonicalization\n- if target does not exist (e.g., delete), fallback canonicalizes parent and reattaches filename when possible\n- this preserves invalidation behavior for create/delete/rename where one side may not exist\n\n## Coding-agent mutation flow responsibilities\n\nCoding-agent code must invalidate after successful filesystem mutations.\n\nCentral helpers:\n\n- `invalidateFsScanAfterWrite(path)`\n- `invalidateFsScanAfterDelete(path)`\n- `invalidateFsScanAfterRename(oldPath, newPath)` (invalidates both sides when paths differ)\n\nCurrent mutation tool callsites:\n\n- `packages/coding-agent/src/tools/write.ts`\n- `packages/coding-agent/src/patch/index.ts` (hashline/patch/replace flows)\n\nRule: if a flow mutates filesystem content or location and bypasses these helpers, cache staleness bugs are expected.\n\n## Adding a new cache consumer safely\n\nWhen introducing cache use in a new scanner/search path:\n\n1. **Use stable scan policy inputs**\n - decide hidden/gitignore/node_modules semantics first\n - pass them consistently to `get_or_scan`/`force_rescan` so cache partitions are intentional\n\n2. **Treat cache data as pre-filtered only by traversal policy**\n - apply tool-specific filtering (glob patterns, type filters, scoring) after retrieval\n - never assume cached entries already reflect your higher-level filters\n\n3. **Implement empty-result fast recheck only for stale-negative risk**\n - use `scan.cache_age_ms >= empty_recheck_ms()`\n - retry once with `force_rescan(..., store=true, ...)`\n - keep this path separate from normal cache-hit logic\n\n4. **Respect no-cache mode explicitly**\n - when caller disables cache, call `force_rescan(..., store=false, ...)`\n - do not populate shared cache in a no-cache request path\n\n5. **Wire mutation invalidation for any new write path**\n - after successful write/edit/delete/rename, call the coding-agent invalidation helper\n - for rename/move, invalidate both old and new paths\n\n6. **Do not add per-call TTL knobs**\n - current contract is global policy only (env-configured), no per-request TTL override\n\n## Known boundaries\n\n- Cache scope is process-local in-memory (`DashMap`), not persisted across process restarts.\n- Cache stores scan entries, not final tool results.\n- `glob`/`fuzzyFind`/`grep` share scan entries only when key dimensions (`root`, `hidden`, `gitignore`, `skip_node_modules`) match.\n- `.git` is always excluded at scan collection time regardless of caller options.\n",
|
|
22
|
-
"gajae-remote.md": "# Gajae Remote — v0 Design (thin phone steering wheel)\n\nStatus: **design / pending approval** · Tracks issue #565 · Scope: **v0 only**\n\nGajae Remote is a thin remote *steering wheel* for an already-running PC-side\n`gjc` session. It is intentionally **not** a universal phone shell, a remote\nfilesystem editor, or a remote-desktop replacement. The PC stays the authority\nboundary for file edits, shell execution, approval gates, and sensitive output.\n\nThis document fixes the two things the issue calls out as blockers/gates before\nany code lands: the **authority boundary contract** and the **transmitted-data\ncontract**. It then maps v0 onto existing surfaces and splits the work into\nPR-sized steps.\n\n## TL;DR architecture decision\n\nv0 is a thin **read + one-line-submit** surface layered on subsystems that\nalready exist. It introduces **no new remote-control protocol** — that would\nrequire ADR-level rationale per [`docs/bridge.md`](bridge.md).\n\n| Concern | Reused existing surface |\n| --- | --- |\n| Network transport, TLS, bearer auth, fail-closed posture | Bridge mode (`gjc --mode bridge`), [`docs/bridge.md`](bridge.md) |\n| Client SDK / framing | `@gajae-code/bridge-client` (`BridgeClient`, `events()`) |\n| Session state storage, liveness, bounded status, submit gating | Harness control plane (`gjc harness`), `packages/coding-agent/src/harness-control-plane/` |\n| Bounded observation (never a raw transcript dump) | `Observation` / `SessionStateView` in `harness-control-plane/types.ts` |\n| Web client precedent (local server + SPA) | `packages/stats` (`server.ts` + `src/client/`) |\n\nThe only genuinely new piece is a thin **Gajae Remote gateway**: one PC-side\nprocess that enumerates harness control-plane sessions and proxies a strict,\nbounded subset of read/submit operations to each session's owner. The phone\ntalks to exactly one gateway endpoint.\n\n## Why a gateway (and not one bridge per session)\n\nBridge mode serves **exactly one live `AgentSession` per process** and is\nfail-closed by default (see the endpoint matrix in\n`packages/coding-agent/src/modes/bridge/bridge-mode.ts`). The v0 requirement\n\"list active PC-side sessions\" needs cross-session enumeration, which a single\nbridge process does not provide.\n\nThe harness control plane already centralizes exactly this: per-session\n`state.json`, lease + heartbeat liveness, a single-writer severity event log,\nand a **bounded** observation vocabulary. So the gateway is a thin read/submit\nproxy over the control plane, wearing the bridge security model (TLS + bearer +\nfail-closed). This keeps bridge single-session semantics untouched and avoids a\nproliferation of per-session ports and pairing surfaces.\n\n```\n phone (mobile web)\n │ HTTPS + bearer (scoped: remote:view + remote:submit)\n ▼\n Gajae Remote gateway ── enumerates ─▶ harness control-plane session-state dir\n (one PC process) ── observe ───▶ owner process (RuntimeOwner, lease holder)\n ── submit ────▶ owner submit path (readyForSubmit gating)\n │\n └─ never: file edits · shell · gate answers · raw transcript · secrets\n```\n\n## Authority boundary contract\n\nThe PC-side runtime is the sole authority. The gateway and the phone are\n**observers + one-line submitters**, nothing more.\n\nThe gateway MUST NOT, in v0:\n\n- edit files, run shell, or invoke any mutating tool;\n- answer workflow-gate / permission / approval prompts (those stay on the PC);\n- expose bridge/RPC command scopes (`message:read`, `session`, `model`, `bash`,\n `host_tools`, `host_uri`, `export`, `admin`, or `control`) to the phone;\n- stream raw pane output, transcripts, tool arguments/results, diffs, file\n contents, environment, or secrets;\n- bypass the owner's `readyForSubmit` gating or submit while a session is busy.\n\nThe phone MAY, in v0:\n\n- list active sessions (bounded metadata);\n- open one session and read its **bounded** status/observation;\n- submit a single one-line instruction through the owner's normal submit path;\n- see idle / working / blocked status and a human-readable reason when blocked.\n\nA one-line submit travels the same path as a local `gjc harness submit`, so it\ninherits the owner's submission gating (`Observation.readyForSubmit` /\n`submitUnavailableReason`). The phone cannot force a submit the local runtime\nwould itself refuse.\n\n## Transmitted-data contract (allowlist)\n\nThe contract is an **allowlist**: only the fields below leave the PC. Anything\nnot listed is withheld by default. This is enforced in code as a typed\nprojection from the control plane's already-bounded `Observation` /\n`SessionStateView` — never a passthrough of internal state.\n\n### Session list entry → phone (`RemoteSessionSummary`)\n\n| Field | Source | Notes |\n| --- | --- | --- |\n| `sessionId` | `SessionState.sessionId` | opaque id |\n| `name` | derived from handle metadata (`issueOrPr`, repo, branch, or session id fallback) | sanitized, length-capped |\n| `harness` | `SessionState.harness` | `gajae-code` in v1 |\n| `status` | derived (see state mapping) | `idle` \\| `working` \\| `blocked` \\| `offline` |\n| `lastActivityAt` | `Observation.lastActivityAt` | ISO timestamp |\n| `branch` | `Observation.branch` | branch name only |\n\n### Open-session view → phone (`RemoteSessionView`)\n\n| Field | Source | Notes |\n| --- | --- | --- |\n| `sessionId`, `name`, `harness`, `status` | as above | |\n| `lifecycle` | `SessionStateView.lifecycle` | bounded enum |\n| `ownerLive` | `SessionStateView.ownerLive` | liveness |\n| `blockers` | `SessionStateView.blockers` | reason strings, sanitized |\n| `observedSignals` | `Observation.observedSignals` | bounded vocab only (`tool-call`, `test-running`, `streaming`, `idle`, …) |\n| `gitDelta` | `Observation.gitDelta` | enum: `clean`/`dirty`/`zero-delta`/`unknown` |\n| `risk` | `Observation.risk` | enum |\n| `readyForSubmit` | `Observation.readyForSubmit` | submit affordance |\n| `submitUnavailableReason` | `Observation.submitUnavailableReason` | when not ready |\n| `lastActivityAt`, `branch` | as above | |\n\n### Phone → PC (`RemoteSubmitRequest`)\n\n| Field | Notes |\n| --- | --- |\n| `sessionId` | target session |\n| `text` | single one-line instruction; length-capped, control-chars stripped |\n| `idempotencyKey` | optional; dedupes retries (mirrors bridge idempotency) |\n\n### Never transmitted by default\n\nRaw pane/terminal output, full transcript / message bodies, tool call arguments\nor results, file contents, diffs, system prompt, environment variables, tokens\nor secrets, and absolute paths beyond the session `cwd`/`branch` metadata. When\ncontent is intentionally held back, the phone shows a neutral *\"withheld on PC\"*\nmarker rather than a redacted blob.\n\n## Session-state model (idle / working / blocked)\n\n`status` is derived from harness lifecycle + liveness + bounded signals:\n\n- **offline** — `ownerLive == false`, lease dead, or gateway cannot reach the\n owner. (Distinct from blocked; the PC is gone, not waiting.)\n- **blocked** — `lifecycle == \"blocked\"`, OR a workflow-gate / permission prompt\n is pending on the PC, OR `readyForSubmit == false` with a\n `submitUnavailableReason`. Phone shows the reason; it does **not** resolve it.\n- **working** — owner live and `lifecycle` in\n `{started, submitted, observing, recovering, validating, finalizing}` with\n recent activity signals (`streaming` / `tool-call` / `test-running`).\n- **idle** — owner live, stable lifecycle, last signal `idle`/`completed`, and\n `readyForSubmit == true`.\n\n## Failure states (must be understandable)\n\n| Condition | Detection | Phone UX |\n| --- | --- | --- |\n| Disconnected PC | `ownerLive`/lease dead or gateway unreachable | `offline`; submit disabled; \"PC is offline\" |\n| Expired pairing | bearer/pairing token expired or revoked | \"Pairing expired — re-pair on PC\" |\n| Session busy | `readyForSubmit == false` (+ reason) | submit disabled with reason; optionally queue |\n| Submit rejected | typed object error (e.g. `{ code: \"scope_denied\" }`, see [`docs/rpc.md`](rpc.md)) | inline rejection reason |\n| Sensitive output withheld | bounded observation by design | neutral \"withheld on PC\" marker |\n\n## Pairing and auth (minimum that is not security soup)\n\nv0 = **local pairing only**. Reuse the bridge security model for transport and authentication posture, but expose a gateway-specific authorization surface:\n\n- **TLS mandatory for every bind, including loopback** (no plaintext fallback;\n matches `docs/bridge.md`).\n- **Bearer token mandatory** for every endpoint except health/help.\n- Pairing flow: the PC prints/serves a short-lived **pairing code**; the phone\n submits host + code and receives a **scoped bearer** capped to\n gateway-only scopes: `remote:view` + `remote:submit` only. These scopes are\n not aliases for bridge/RPC `message:read` or `prompt`. Phone bearers MUST NOT\n authorize bridge command-catalog calls such as `get_messages`,\n `get_last_assistant_text`, `get_state` with `include: [\"systemPrompt\",\n \"tools\"]`, `new_session`, `switch_session`, `branch`, `set_model`,\n `bash`, `host_*`, `control`, or `admin`.\n- Tokens expire; re-pairing is the recovery path. The gateway is fail-closed:\n unknown/expired tokens, non-gateway scopes, bridge command-catalog methods,\n and out-of-scope commands are rejected before dispatch.\n\nHosted relay is **deferred to v1** and gated behind a separate ADR (it changes\nthe trust model and is where \"security soup\" risk concentrates).\n\n## Open questions from the issue — v0 decisions\n\n| Question | v0 decision | Deferred |\n| --- | --- | --- |\n| Hosted relay vs local pairing vs both | Local pairing only | Hosted relay → v1 (ADR) |\n| Minimum pairing/auth | Pairing code → gateway-scoped bearer, TLS mandatory | Identity/relay accounts → v1 |\n| Which session states are public | Bounded: `idle`/`working`/`blocked`/`offline` + bounded observation vocab | Richer telemetry → v1 |\n| Web vs native first | Mobile web first (reuse stats SPA build pattern) | Native app / PWA polish → v1 |\n| Notifications / pause / resume | Out of scope for v0 | Staged in v1 |\n\n## Implementation plan (PR-sized steps)\n\nEach step is independently shippable; later steps stay fail-closed until wired.\n\n1. **PR 1 — this doc.** `docs/gajae-remote.md` + README cross-link. Resolves the\n authority + transmitted-data gates. No code.\n2. **PR 2 — typed contract + schema.** `RemoteSessionSummary`,\n `RemoteSessionView`, `RemoteSubmitRequest`, `RemoteSubmitResult`,\n `RemoteErrorCode`, plus a projection `Observation`/`SessionStateView →\n RemoteSessionView` and a JSON schema. Tests assert the allowlist (no\n forbidden field can leak). Types only; no runtime wiring.\n3. **PR 3 — gateway read path.** Enumerate harness sessions (list) and serve the\n per-session bounded view, behind an explicit opt-in flag, fail-closed\n otherwise, reusing bridge TLS + bearer. Tests for liveness derivation and the\n redaction projection.\n4. **PR 4 — gateway submit path.** One-line submit through the owner's submit\n gating; typed rejections for busy/denied; idempotency. Tests for\n busy/rejected paths.\n5. **PR 5 — pairing/auth.** Pairing code → gateway-scoped bearer\n (`remote:view` + `remote:submit`) with expiry. Tests prove phone bearers\n cannot call bridge/RPC `message:read`, `prompt`, session/model, shell, host,\n control, or admin surfaces.\n6. **PR 6 — mobile web client.** Minimal SPA (list → open → status → submit)\n using `@gajae-code/bridge-client` and the `packages/stats` build pattern.\n7. **PR 7 — failure-state UX + hardening.** Failure-state surfaces, redaction\n hardening, CHANGELOG, docs finalize.\n\n## Non-goals (v0)\n\n- No arbitrary phone-side shell.\n- No raw secret/log dumping to mobile by default.\n- No direct filesystem editor from the phone.\n- No bypass around PC-side approval/confirmation gates.\n- No remote-desktop replacement.\n- No second authenticated remote-control protocol (reuse bridge; relay needs an\n ADR).\n\n## Key source references\n\n- Bridge transport / security: `packages/coding-agent/src/modes/bridge/`, [`docs/bridge.md`](bridge.md)\n- RPC command/response contract: `packages/coding-agent/src/modes/rpc/`, [`docs/rpc.md`](rpc.md)\n- Client SDK: `packages/bridge-client/src/`\n- Control plane (sessions, leases, bounded observation, submit): `packages/coding-agent/src/harness-control-plane/`, `packages/coding-agent/src/commands/harness.ts`\n- Web client precedent: `packages/stats/src/server.ts`, `packages/stats/src/client/`\n\n—\n*[repo owner's gaebal-gajae (clawdbot) 🦞]*\n",
|
|
23
22
|
"geobench.md": "# GEO benchmark for Gajae-Code\n\nThis repository includes a [`geobench`](https://github.com/NomaDamas/geobench) product spec for measuring LLM answer visibility: hit rate, MRR, share of voice, citation rate/share, and confidence intervals.\n\n```bash\n/path/to/geobench/dist/geobench estimate --product geobench/gajae-code.yaml --providers openai --tier cheap\n/path/to/geobench/dist/geobench profile geobench/gajae-code.yaml\n/path/to/geobench/dist/geobench bench --product geobench/gajae-code.yaml --providers openai --tier cheap --mode benchmark\n```\n\nPublish aggregate metrics only; do not publish raw provider answers, secrets, or private run logs.\n",
|
|
24
23
|
"gjc-dogfood-skill-template.md": "# GJC dogfood local skill template\n\nIssue #93 requested a gaebal-gajae/operator dogfood skill. The live issue has no comment approving a fifth bundled default workflow skill, so this stays a local template instead of changing the default workflow surface. Operators can copy it into a user or project override when they want GJC-first session guidance:\n\n```sh\nmkdir -p ~/.gjc/skills/gjc-dogfood\ncp docs/gjc-dogfood-skill-template.md ~/.gjc/skills/gjc-dogfood/SKILL.md\n```\n\nFor a single project, copy it to `<project>/.gjc/skills/gjc-dogfood/SKILL.md` instead. Do not commit that project `.gjc` copy unless the project explicitly wants a local override.\n\n---\nname: gjc-dogfood\ndescription: Use when running or reviewing work through GJC sessions, dogfooding Gajae-Code, or migrating an operator workflow from OMX to GJC.\n---\n\n# GJC Dogfood Operator Workflow\n\nUse GJC first for coding, review, planning, and follow-up sessions. Treat OMX as a fallback only when GJC is unavailable, broken, or missing a required capability.\n\n## Locate and launch GJC\n\n- Installed CLI: run `command -v gjc` and then launch with `gjc --tmux`.\n- Repository checkout: from the gajae-code repo, prefer `bun packages/coding-agent/src/cli.ts --tmux` when testing source changes before install.\n- Worktree isolation: for branch-specific work, either let GJC create a managed sibling worktree with `gjc --tmux --worktree <branch-like-name>` or `cd <existing-worktree-path>` and run `gjc --tmux` there. Do not pass filesystem paths to `--worktree`.\n- Name sessions explicitly with the project and issue, for example `gajae-code-93-dogfood-skill`, so tmux panes, logs, and exports remain traceable.\n\n## Start the session\n\n- Put git operations inside the GJC session: fetch, branch/worktree setup, focused commits, pushes, and PR creation should be visible in-session.\n- Submit the initial prompt with the issue URL, target branch, acceptance criteria, verification limits, and any existing plan/spec link.\n- Verify the prompt was accepted: the TUI should show the user prompt, an active assistant turn, or a tool/action request. If the session silently idles, resend once with a shorter prompt and capture the failure.\n- Verify working state before leaving the session unattended: confirm the target cwd/worktree, branch, and issue scope are visible in the transcript or command output.\n\n## During work\n\n- Keep session names and branch names issue-scoped.\n- Prefer GJC workflow skills only when they fit: `deep-interview` for unclear requirements, `ralplan` for planning, `ultragoal` for durable ledgers, and `team` for coordinated tmux execution.\n- Keep evidence in the session: issue reads, focused tests/checks, screenshots only when visual behavior matters, and PR URLs.\n- When GJC is weaker than OMX, finish the urgent work with the smallest safe fallback and file a gajae-code follow-up issue with the missing capability, exact command/session context, expected behavior, and evidence.\n\n## Fallback policy\n\nUse OMX or another operator path only when:\n\n- `gjc` cannot be located or launched after checking installed and repo-local commands;\n- authentication, model routing, tmux, or prompt submission is broken;\n- GJC lacks a required capability that OMX already has;\n- an urgent production/review deadline would be missed by debugging GJC first.\n\nRecord the fallback reason and create or link the gajae-code issue that would make GJC sufficient next time.\n\n## Evidence checklist\n\nReport:\n\n- project, issue, branch/worktree, and session name;\n- whether GJC was installed or repo-local;\n- prompt acceptance and working-state evidence;\n- git operations performed in-session;\n- focused verification commands and results;\n- PR/issue URLs;\n- follow-up gajae-code issues for any GJC gap or fallback.\n",
|
|
25
24
|
"grok-build-provider-design.md": "# Grok Build provider design\n\n## Status\n\nProposal for maintainer design review. This document intentionally does not add a bundled provider implementation. It records the product/API decisions that must be accepted before any Grok Build implementation PR should land.\n\nThis is not an authorization claim for xAI endpoints, not a final naming decision, not approval for a bundled-loading exception, and not trademark/display-name approval. Those items require explicit owner sign-off before implementation.\n\n## Required owner sign-off gates\n\nImplementation should remain blocked until the owner signs off on these gates:\n\n1. **Authorized use / ToS** — confirm that GJC may use `cli-chat-proxy.grok.com` and the xAI CLI OAuth public client from a third-party tool. A public OAuth client id is not proof that this use is authorized.\n2. **Bundled-loading trust boundary** — confirm whether a source-controlled bundled provider may load even when ordinary user extension discovery is disabled.\n3. **Public selector naming** — choose the stable provider selector prefix: `grok-cli`, `grok-build`, or another owner-selected id.\n4. **Trademark/display-name** — confirm whether GJC may present the provider/profile using `Grok Build` or should use a more neutral owner-approved label.\n\nIf gate 1 is not accepted, the Grok Build provider implementation should not ship against `cli-chat-proxy.grok.com`. The fallback direction would be a documented user-supplied xAI/API-key provider or a different officially authorized integration path.\n\n## Problem\n\nGJC can load third-party extensions, but the first-run interactive path needs a maintainer-owned decision before a bundled Grok Build provider can be accepted. The desired product flow is:\n\n```text\ngjc -> /login -> OAuth -> Grok Build -> browser xAI login -> /model -> <provider-id>/grok-composer-2.5-fast\n```\n\nThe previously proposed implementation touched bundled extension loading, OAuth registration, model profiles, vendor code, usage reporting, and tests in one PR. That is too much surface for review without first agreeing on the provider contract and the owner sign-off gates above.\n\n## Goals\n\n- Keep Grok Build, if accepted, as a bundled provider extension rather than a workflow skill.\n- Preserve the existing four bundled workflow skills and four role agents.\n- Define the `/login` OAuth contract for an owner-approved display name, with `Grok Build` only as a candidate label.\n- Define the `/model` contract for `grok-composer-2.5-fast` without committing to the final selector prefix before owner sign-off.\n- Define the guardrails for any bundled provider that loads while ordinary extension discovery is disabled.\n- Keep credentials in the existing auth storage path; no tokens or user env values are checked into the repo.\n- Keep implementation PRs small enough for independent review, rejection, or rollback.\n\n## Non-goals\n\n- No new workflow command or `/skill` surface.\n- No automatic installation from npm or remote code at runtime.\n- No direct `packages/ai/src/models.json` edits.\n- No broad model-profile reshuffle.\n- No provider-specific secrets in source.\n- No claim that xAI has authorized this endpoint/client usage without owner review.\n\n## Candidate provider contract\n\nThese are candidate values for owner review, not final commitments:\n\n| Field | Candidate value | Decision status | Notes |\n| --- | --- | --- | --- |\n| Public provider id | `grok-cli` or `grok-build` | **Owner decision required** | See naming section below. |\n| Display name | `Grok Build` or owner-selected label | **Owner decision required** | Name shown in `/login` and UI surfaces; see trademark/display-name section below. |\n| Default model id | `grok-composer-2.5-fast` | Proposed | Full selector depends on final provider id. |\n| Secondary model id | `grok-build` | Proposed | Candidate for executor/architect roles if a profile is accepted. |\n| Base URL | `https://cli-chat-proxy.grok.com/v1` | **Authorized-use sign-off required** | Undocumented/private-looking endpoint; do not ship without owner approval. |\n| OAuth issuer | `https://auth.x.ai` | **Authorized-use sign-off required** | OIDC discovery must validate xAI-owned HTTPS endpoints. |\n| OAuth callback | loopback `127.0.0.1` | Proposed | Uses PKCE + state validation. |\n| API adapter | `grok-cli-responses` | Proposed internal name | Provider-specific stream adapter; not a new generic API shape. |\n| Env bypass | `GROK_CLI_OAUTH_TOKEN` | Optional follow-up | Local bypass only; no refresh or discovery guarantees. |\n\n## Authorized-use and ToS caveat\n\n`cli-chat-proxy.grok.com` and the xAI CLI OAuth public client appear to be designed for xAI/Grok CLI traffic. Reusing them from GJC may be technically possible but still unauthorized or contrary to xAI terms.\n\nBefore implementation, the owner should explicitly decide one of:\n\n- **Accept** — proceed with this integration after reviewing the legal/product risk.\n- **Defer** — keep this design document only; no code ships until authorization is clarified.\n- **Reject** — do not integrate against `cli-chat-proxy.grok.com`; use only an official public API path.\n\nImplementation PRs must not describe the public client id as a secret, but they also must not present it as authorization. Tests should avoid real tokens and should not require an xAI account.\n\n## Trademark/display-name caveat\n\n`Grok` and `xAI` are third-party marks. `Grok Build` may also imply an official xAI/Grok product relationship even when the integration is third-party. Before implementation, the owner should explicitly choose one of:\n\n- **Use `Grok Build`** — acceptable as the user-facing provider/profile label after trademark/product-risk review.\n- **Use a neutral label** — for example `xAI Grok`, `Grok OAuth`, or another owner-selected name that avoids implying official endorsement.\n- **Avoid built-in branding** — keep any Grok-specific naming only in user-provided configuration until authorization/branding is clarified.\n\nImplementation PRs should avoid lock-in language such as \"official\" unless there is explicit authorization. UI labels, profile names, docs, tests, and screenshots must all use the owner-approved label consistently.\n\n## OAuth behavior\n\nIf authorized-use is accepted, the OAuth implementation should use the existing custom OAuth provider path:\n\n1. The chosen provider id registers an OAuth provider using the owner-approved display name.\n2. `/login` calls the existing auth storage login path for that provider.\n3. The provider opens an xAI authorization URL using OIDC discovery, PKCE, `state`, and a loopback callback.\n4. The callback exchanges the authorization code for access and refresh tokens.\n5. Credentials are stored by the existing auth storage code path.\n6. Refresh uses the stored refresh token and validates the token endpoint origin.\n\nSecurity constraints:\n\n- OIDC `authorization_endpoint` and `token_endpoint` must be HTTPS and under owner-approved xAI hosts.\n- The callback server binds to loopback by default.\n- The callback must reject state mismatches.\n- Access and refresh tokens must not be logged, rendered, committed, or included in tests.\n- Error messages may include status and provider error text, but not credential values.\n- Env overrides for base URL, scope, callback host, or client id must be treated as local developer/debug escape hatches, not default product behavior.\n\n## Bundled-loading trust boundary\n\nA bundled provider is different from ordinary user extension discovery, but loading it while `disableExtensionDiscovery: true` still expands the bootstrap trust boundary. Owner sign-off is required before implementation.\n\nMinimum guardrails if accepted:\n\n- Load only source-controlled, maintainer-reviewed bundled provider paths.\n- Use a static allowlist or exported enumerator; never scan arbitrary user directories for this path.\n- Do not install, fetch, or resolve remote package code at runtime.\n- Keep ordinary user extension discovery disabled when `disableExtensionDiscovery: true`; the exception is only for bundled provider defaults.\n- Add tests proving bundled providers load before model selection and caller-supplied `additionalExtensionPaths` still coexist.\n- Keep this bootstrap change separate from the Grok vendor implementation so it can be reviewed independently.\n\nAlternatives the owner may choose:\n\n- Do not load bundled providers when extension discovery is disabled; require explicit setup/defaults install.\n- Gate bundled provider loading behind a setting or compile-time default.\n- Allow bundled loading only in packaged builds, not arbitrary source checkouts.\n\n## Provider selector naming\n\nThe selector prefix is a stable user-facing contract and must be chosen before implementation.\n\n| Option | Example selector | Pros | Cons |\n| --- | --- | --- | --- |\n| `grok-cli` | `grok-cli/grok-composer-2.5-fast` | Matches the upstream CLI/proxy lineage and existing prototype. | User-facing name is less aligned with `Grok Build`; may expose implementation detail. |\n| `grok-build` | `grok-build/grok-composer-2.5-fast` | Matches UI label and requested product wording. | Diverges from existing prototype and env names; migration needed if prototypes used `grok-cli`. |\n| Owner-selected third id | `<id>/grok-composer-2.5-fast` | Lets maintainers align with broader provider taxonomy. | Requires updating all docs/tests before implementation. |\n\nUntil this is decided, implementation docs and PRs should use `<provider-id>` when describing the public selector. Internal adapter names may still use `grok-cli-responses` if maintainers accept that as an implementation detail.\n\n## Model/profile behavior\n\nModel registration should be provider-owned. If accepted, the provider should register at least:\n\n- `grok-composer-2.5-fast`\n- `grok-build`\n\nA built-in profile is optional and should be reviewed separately. If accepted, a candidate profile is:\n\n```text\ngrok-pro.default -> <provider-id>/grok-composer-2.5-fast\ngrok-pro.planner -> <provider-id>/grok-composer-2.5-fast\ngrok-pro.critic -> <provider-id>/grok-composer-2.5-fast\ngrok-pro.executor -> <provider-id>/grok-build\ngrok-pro.architect -> <provider-id>/grok-build\n```\n\nIf maintainers prefer not to add a built-in profile, the provider can still satisfy the core `/login` and `/model` flow through direct model selection.\n\n## Usage reporting behavior\n\nUsage reporting should be an optional follow-up after login/model support lands:\n\n- Provider id: the owner-selected `<provider-id>`.\n- Fetches usage with the effective OAuth access token.\n- Returns `null` when no token is available.\n- Does not require the usage provider for chat/model selection to work.\n- Should be skipped entirely if the authorized-use gate is not accepted.\n\n## Staged PR plan\n\n### PR 1: this design document\n\nPurpose: agree on caveats, owner sign-off gates, provider id, OAuth contract, bundled-loading trust boundary, model selector, security boundaries, and implementation split.\n\n### PR 2: bundled provider bootstrap contract\n\nSmall core change only, after owner sign-off on the bundled-loading gate:\n\n- Add a maintainer-owned way to enumerate bundled provider extension paths.\n- Load those paths during session/bootstrap only under the accepted guardrails.\n- Add tests proving bundled providers and caller-supplied extension paths coexist.\n\nNo Grok vendor implementation in this PR.\n\n### PR 3: Grok Build provider extension\n\nProvider implementation only, after owner sign-off on authorized use, public selector naming, and trademark/display-name:\n\n- Add bundled Grok Build provider source.\n- Register the chosen provider id, OAuth provider, and models.\n- Include sanitize and provider-specific stream handling.\n- Test `/login` provider registration and `grok-composer-2.5-fast` model availability.\n\n### PR 4: profile and model defaults\n\nOptional product-surface PR:\n\n- Add `grok-pro` only if maintainers accept a built-in profile.\n- Add model profile catalog tests.\n\n### PR 5: usage reporting\n\nOptional observability PR:\n\n- Add usage provider for the owner-selected provider id.\n- Add focused usage tests.\n\n## Acceptance criteria for the implementation series\n\n- Owner sign-off is recorded for authorized use, bundled loading, selector naming, and trademark/display-name before implementation lands.\n- Fresh checkout test proves `createAgentSession` registers the bundled provider under the accepted bootstrap rules.\n- `/login` includes the owner-approved display name for the owner-selected provider id.\n- `/model` includes `<provider-id>/grok-composer-2.5-fast`.\n- A real OAuth URL redirects to the owner-approved xAI account login page.\n- Third-party extension paths still load alongside bundled providers when configured.\n- Token values never appear in tests, logs, checked-in docs, or git history.\n\n## Open maintainer decisions\n\n- Is using `cli-chat-proxy.grok.com` plus the xAI CLI OAuth client from GJC authorized and acceptable for this project?\n- Should bundled provider defaults load while `disableExtensionDiscovery: true`, and under which guardrails?\n- Should the final public provider id be `grok-cli`, `grok-build`, or another id?\n- May GJC use `Grok Build` as the display/profile name, or should the integration use a neutral owner-selected label?\n- Should `grok-pro` be a built-in profile or documented as a user profile?\n- Should usage reporting be included in the initial provider PR or kept as a separate follow-up?",
|
|
@@ -29,7 +28,8 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
29
28
|
"keybindings.md": "# Keybindings\n\nRun `/hotkeys` inside an `gjc` session to see the active chords for your current build. The list reflects any remaps loaded from disk and any bindings added by extensions.\n\n## Customize keybindings\n\nUser remaps live in `~/.gjc/agent/keybindings.json`. The file is a JSON object whose keys are keybinding action IDs and whose values are either one chord string or an array of chord strings. It is not read from `~/.gjc/agent/config.yml`, and there is no nested `keybindings` object.\n\n```json\n{\n \"app.model.cycleForward\": \"Ctrl+P\",\n \"app.model.selectTemporary\": \"Alt+P\",\n \"app.plan.toggle\": \"Alt+Shift+P\"\n}\n```\n\nChord names are case-insensitive and use the same notation shown in the UI, such as `Ctrl+P`, `Alt+Shift+P`, `Shift+Enter`, and `Ctrl+Backspace`.\n\nSet an action to an empty array to disable it:\n\n```json\n{\n \"app.stt.toggle\": []\n}\n```\n\n## Common action IDs\n\n| Action ID | Default | Meaning |\n| --- | --- | --- |\n| `app.model.cycleForward` | `Ctrl+P` | Cycle role models forward |\n| `app.model.cycleBackward` | `Shift+Ctrl+P` | Cycle role models backward |\n| `app.model.selectTemporary` | `Alt+P` | Pick a model temporarily for this session |\n| `app.model.select` | `Ctrl+L` | Open the model selector and set roles |\n| `app.plan.toggle` | `Alt+Shift+P` | Toggle plan mode |\n| `app.history.search` | `Ctrl+R` | Search prompt history |\n| `app.tools.expand` | `Ctrl+O` | Toggle tool-output expansion |\n| `app.thinking.toggle` | `Ctrl+T` | Toggle thinking-block visibility |\n| `app.thinking.cycle` | `Shift+Tab` | Cycle thinking level |\n| `app.editor.external` | `Ctrl+G` | Edit the draft in `$VISUAL` / `$EDITOR` |\n| `app.message.followUp` | `Ctrl+Enter` | Send a follow-up message |\n| `app.message.queue` | `Alt+Enter` | Explicitly queue a message for the next turn |\n| `app.message.dequeue` | `Alt+Up` | Dequeue a queued message back into the editor |\n| `app.clipboard.copyLine` | `Alt+Shift+L` | Copy the current line |\n| `app.clipboard.copyPrompt` | `Alt+Shift+C` | Copy the whole prompt |\n| `app.stt.toggle` | `Alt+H` | Toggle speech-to-text recording |\n\nOlder unqualified action names are migrated when `keybindings.json` is loaded, but new docs and new configs should use the namespaced action IDs above.\n",
|
|
30
29
|
"lsp-config.md": "# LSP configuration in GJC\n\nThis guide explains how to configure language servers for the GJC coding agent.\n\nSource of truth in code:\n\n- Server config type: `packages/coding-agent/src/lsp/types.ts` (`ServerConfig`)\n- Config loader: `packages/coding-agent/src/lsp/config.ts`\n- Built-in server definitions: `packages/coding-agent/src/lsp/defaults.json`\n\n## Auto-detection\n\nWhen no LSP config file is present, GJC auto-detects servers by intersecting two conditions:\n\n1. The project directory contains at least one of the server's `rootMarkers`.\n2. The server binary is available — checked in project-local bin directories first (e.g., `node_modules/.bin/`, `.venv/bin/`), then `$PATH`.\n\nNo configuration is required for common setups. The built-in server list covers most popular languages; see [`defaults.json`](../packages/coding-agent/src/lsp/defaults.json) for the full set.\n\n## Config file locations\n\nGJC merges LSP config from multiple files, lowest to highest priority:\n\n| Priority | Location |\n|----------|----------|\n| 5 (lowest) | `~/lsp.json`, `~/.lsp.json`, `~/lsp.yaml`, `~/.lsp.yaml` |\n| 3 | `~/.gjc/agent/lsp.json`, `~/.gjc/agent/lsp.yaml`, `~/.gemini/lsp.*` |\n| 2 | `<project>/.gjc/lsp.json`, `<project>/.gjc/lsp.yaml`, `<project>/.gemini/lsp.*` |\n| 1 (highest) | `<project>/lsp.json`, `<project>/.lsp.json`, `<project>/lsp.yaml` |\n\nEach location accepts both `.json` and `.yaml` / `.yml` variants, as well as hidden-file versions (`.lsp.json`, `.lsp.yaml`). Files are merged in order: higher-priority files override lower-priority fields for the same server. Servers not mentioned in any override file remain at their built-in defaults.\n\n**Recommended locations:**\n\n- User-wide preferences → `~/.gjc/agent/lsp.json`\n- Project-specific overrides → `<project>/.gjc/lsp.json`\n\n> **Note:** The presence of any LSP config file disables auto-detection. When at least one file is found, GJC skips the binary-scan phase and loads all servers that have matching `rootMarkers`, an available binary, and are not explicitly `disabled`.\n\n## File shape\n\nBoth JSON and YAML are accepted. The top-level object can use either a `servers` wrapper key or a flat map directly:\n\n```json\n{\n \"servers\": {\n \"server-name\": { ... }\n },\n \"idleTimeoutMs\": 300000\n}\n```\n\nor (flat, without the `servers` wrapper):\n\n```json\n{\n \"server-name\": { ... },\n \"idleTimeoutMs\": 300000\n}\n```\n\nTop-level keys:\n\n- `servers` — map of server name to `ServerConfig` (optional wrapper; flat form is equivalent)\n- `idleTimeoutMs` — shut down idle language servers after this many milliseconds; disabled by default\n\n## ServerConfig fields\n\n| Field | Type | Required | Description |\n|-------|------|----------|-------------|\n| `command` | `string` | yes | Binary name (resolved via PATH/local bins) or absolute path |\n| `args` | `string[]` | no | Arguments passed to the binary |\n| `fileTypes` | `string[]` | yes | File extensions this server handles, e.g. `[\".ts\", \".tsx\"]` |\n| `rootMarkers` | `string[]` | yes | Files/dirs that indicate a project root; glob patterns (e.g. `*.cabal`) are supported |\n| `initOptions` | `object` | no | Sent as `initializationOptions` during LSP handshake |\n| `settings` | `object` | no | Workspace settings pushed via `workspace/didChangeConfiguration` |\n| `disabled` | `boolean` | no | Set to `true` to disable this server entirely |\n| `warmupTimeoutMs` | `number` | no | Startup timeout in ms for this server (overrides the global default) |\n| `isLinter` | `boolean` | no | Mark server as linter/formatter only; excluded from type-intelligence operations (hover, go-to-definition, etc.) |\n| `capabilities` | `object` | no | Opt-in server-specific features; see [Capabilities](#capabilities) |\n\n`resolvedCommand` is populated automatically at runtime — do not set it manually.\n\n### Capabilities\n\nThe `capabilities` object enables optional server-specific features that GJC supports on a per-server basis:\n\n```json\n{\n \"capabilities\": {\n \"flycheck\": true,\n \"ssr\": true,\n \"expandMacro\": true,\n \"runnables\": true,\n \"relatedTests\": true\n }\n}\n```\n\nAll fields are boolean and optional. They are currently used by `rust-analyzer`.\n\n## Common recipes\n\n### Override a built-in server's settings\n\nPartial overrides are merged onto the built-in defaults. You only need to specify the fields you want to change.\n\n```json\n{\n \"servers\": {\n \"typescript-language-server\": {\n \"args\": [\"--stdio\", \"--log-level\", \"4\"]\n }\n }\n}\n```\n\n```yaml\nservers:\n gopls:\n settings:\n gopls:\n gofumpt: false\n staticcheck: false\n```\n\n### Disable a built-in server\n\n```json\n{\n \"servers\": {\n \"eslint\": {\n \"disabled\": true\n }\n }\n}\n```\n\n### Register a custom server\n\nNew servers require `command`, `fileTypes`, and `rootMarkers`. All other fields are optional.\n\n```json\n{\n \"servers\": {\n \"my-lsp\": {\n \"command\": \"my-lsp-server\",\n \"args\": [\"--stdio\"],\n \"fileTypes\": [\".xyz\"],\n \"rootMarkers\": [\".xyz-project\", \".git\"]\n }\n }\n}\n```\n\n### Set a global idle timeout\n\nShut down language servers that have been inactive for more than five minutes:\n\n```json\n{\n \"idleTimeoutMs\": 300000\n}\n```\n\n### Disable a server for one project, keep it globally\n\nPlace the override in `<project>/.gjc/lsp.json`:\n\n```json\n{\n \"servers\": {\n \"pylsp\": {\n \"disabled\": true\n }\n }\n}\n```\n\nThe user-level config in `~/.gjc/agent/lsp.json` is unaffected; pylsp is only suppressed in this project.\n\n## Built-in server list\n\nThe following servers ship in `defaults.json` and are eligible for auto-detection:\n\n| Server key | Language(s) | Binary |\n|---|---|---|\n| `rust-analyzer` | Rust | `rust-analyzer` |\n| `clangd` | C, C++, ObjC | `clangd` |\n| `zls` | Zig | `zls` |\n| `gopls` | Go | `gopls` |\n| `typescript-language-server` | TypeScript, JavaScript | `typescript-language-server` |\n| `denols` | TypeScript, JavaScript (Deno) | `deno` |\n| `biome` | TS/JS/JSON (linter) | `biome` |\n| `eslint` | TS/JS/Vue/Svelte (linter) | `vscode-eslint-language-server` |\n| `vscode-html-language-server` | HTML | `vscode-html-language-server` |\n| `vscode-css-language-server` | CSS, SCSS, Less | `vscode-css-language-server` |\n| `vscode-json-language-server` | JSON | `vscode-json-language-server` |\n| `tailwindcss` | HTML, CSS, TS/JS | `tailwindcss-language-server` |\n| `svelte` | Svelte | `svelteserver` |\n| `vue-language-server` | Vue | `vue-language-server` |\n| `astro` | Astro | `astro-ls` |\n| `pyright` | Python | `pyright-langserver` |\n| `basedpyright` | Python | `basedpyright-langserver` |\n| `pylsp` | Python | `pylsp` |\n| `ruff` | Python (linter) | `ruff` |\n| `jdtls` | Java | `jdtls` |\n| `kotlin-lsp` | Kotlin | `kotlin-lsp` |\n| `metals` | Scala | `metals` |\n| `hls` | Haskell | `haskell-language-server-wrapper` |\n| `ocamllsp` | OCaml | `ocamllsp` |\n| `elixirls` | Elixir | `elixir-ls` |\n| `erlangls` | Erlang | `erlang_ls` |\n| `gleam` | Gleam | `gleam` |\n| `solargraph` | Ruby | `solargraph` |\n| `ruby-lsp` | Ruby | `ruby-lsp` |\n| `rubocop` | Ruby (linter) | `rubocop` |\n| `bashls` | Bash, Zsh | `bash-language-server` |\n| `lua-language-server` | Lua | `lua-language-server` |\n| `intelephense` | PHP | `intelephense` |\n| `phpactor` | PHP | `phpactor` |\n| `omnisharp` | C# | `omnisharp` |\n| `yamlls` | YAML | `yaml-language-server` |\n| `terraformls` | Terraform | `terraform-ls` |\n| `dockerls` | Dockerfile | `docker-langserver` |\n| `helm-ls` | Helm | `helm_ls` |\n| `nixd` | Nix | `nixd` |\n| `nil` | Nix | `nil` |\n| `ols` | Odin | `ols` |\n| `dartls` | Dart | `dart` |\n| `marksman` | Markdown | `marksman` |\n| `texlab` | LaTeX | `texlab` |\n| `graphql` | GraphQL | `graphql-lsp` |\n| `prismals` | Prisma | `prisma-language-server` |\n| `vimls` | Vim script | `vim-language-server` |\n| `emmet-language-server` | HTML, CSS, JSX | `emmet-language-server` |\n| `sourcekit-lsp` | Swift | `sourcekit-lsp` |\n| `swiftlint` | Swift (linter) | `swiftlint` |\n| `tlaplus` | TLA+ | `tlapm_lsp` |\n",
|
|
31
30
|
"memory.md": "# Autonomous Memory\n\nWhen enabled, the agent automatically extracts durable knowledge from past sessions and injects a compact summary into each new session. Over time it builds a project-scoped memory store — technical decisions, recurring workflows, pitfalls — that carries forward without manual effort.\n\nDisabled by default. Enable via `/settings` or `config.yml`:\n\n```yaml\nmemories:\n enabled: true\n```\n\n## Usage\n\n### What gets injected\n\nAt session start, if a memory summary exists for the current project, it is injected into the system prompt as a **Memory Guidance** block. The agent is instructed to:\n\n- Treat memory as heuristic context — useful for process and prior decisions, not authoritative on current repo state.\n- Pair memory-influenced decisions with current-repo evidence before acting.\n- Prefer repo state and user instruction when they conflict with memory; treat conflicting memory as stale.\n\n### Memory artifacts\n\nGenerated local-memory artifacts are private runtime state, not a public tool or URI surface. They may be summarized into the system prompt when local memory is enabled, but users and model-facing tool docs should not rely on direct `memory://` reads. The legacy internal `memory://` resolver remains only for compatibility with existing persisted guidance and is not part of the public coding harness contract; remove it after legacy local-memory prompts no longer reference it.\n### `/memory` slash command\n\n| Subcommand | Effect |\n| --------------------- | ---------------------------------------------- |\n| `view` | Show the current memory injection payload |\n| `clear` / `reset` | Delete all memory data and generated artifacts |\n| `enqueue` / `rebuild` | Force consolidation to run at next startup |\n\n## How it works\n\nMemories are built by a background pipeline that runs at startup or when manually triggered via slash command.\n\n**Phase 1 — per-session extraction:** For each past session that has changed since it was last processed, a model reads the session history and extracts durable signal: technical decisions, constraints, resolved failures, recurring workflows. Sessions that are too recent, too old, or currently active are skipped. Each extraction produces a raw memory block and a short synopsis for that session.\n\n**Phase 2 — consolidation:** After extraction, a second model pass reads all per-session extractions and produces three outputs written to disk:\n\n- `MEMORY.md` — a curated long-term memory document\n- `memory_summary.md` — the compact text injected at session start\n- `skills/` — reusable procedural playbooks, each in its own subdirectory\n\nPhase 2 uses a lease to prevent double-running when multiple processes start simultaneously. Stale skill directories from prior runs are pruned automatically.\n\nAll output is scanned for secrets before being written to disk.\n\n### Extraction behavior\n\nMemory extraction and consolidation behavior is driven by static prompt files in `packages/coding-agent/src/prompts/memories/`.\n\n| File | Purpose | Variables |\n| --------------------- | ------------------------------------------- | ------------------------------------------- |\n| `stage_one_system.md` | System prompt for per-session extraction | — |\n| `stage_one_input.md` | User-turn template wrapping session content | `{{thread_id}}`, `{{response_items_json}}` |\n| `consolidation.md` | Prompt for cross-session consolidation | `{{raw_memories}}`, `{{rollout_summaries}}` |\n| `read_path.md` | Memory guidance injected into live sessions | `{{memory_summary}}` |\n\n### Model selection\n\nMemory piggybacks on the model role system.\n\n| Phase | Role | Purpose |\n| ----------------------- | ------------------------------------------------------------------- | -------------------------------- |\n| Phase 1 (extraction) | `default` | Per-session knowledge extraction |\n| Phase 2 (consolidation) | `smol` (falls back to `default`, then current/first registry model) | Cross-session synthesis |\n\nIf the requested memory role is not configured, memory model resolution falls back to the `default` role, then the active session model, then the first model in the registry.\n\n## Configuration\n\n| Setting | Default | Description |\n| ------------------------------------- | ------- | --------------------------------------------------------- |\n| `memories.enabled` | `false` | Master switch |\n| `memories.maxRolloutAgeDays` | `30` | Sessions older than this are not processed |\n| `memories.minRolloutIdleHours` | `12` | Sessions active more recently than this are skipped |\n| `memories.maxRolloutsPerStartup` | `64` | Cap on sessions processed in a single startup |\n| `memories.summaryInjectionTokenLimit` | `5000` | Max tokens of the summary injected into the system prompt |\n\nAdditional tuning knobs (concurrency, lease durations, token budgets) are available in config for advanced use.\n\n## Key files\n\n- `packages/coding-agent/src/memories/index.ts` — pipeline orchestration, injection, slash command handling\n- `packages/coding-agent/src/memories/storage.ts` — SQLite-backed job queue and thread registry\n- `packages/coding-agent/src/prompts/memories/` — memory prompt templates\n- `packages/coding-agent/src/internal-urls/memory-protocol.ts` — legacy non-public `memory://` compatibility handler\n",
|
|
32
|
-
"models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects models for the default and agent roles\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — API key + OAuth resolution order\n- `packages/ai/src/models.ts` and `packages/ai/src/types.ts` — built-in providers/models and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.gjc/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n <provider-id>:\n # provider-level config\nequivalence:\n overrides:\n <provider-id>/<model-id>: <canonical-model-id>\n exclude:\n - <provider-id>/<model-id>\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n`equivalence` is optional and configures canonical model grouping on top of concrete provider models:\n\n- `overrides` maps an exact concrete selector (`provider/modelId`) to an official upstream canonical id\n- `exclude` opts a concrete selector out of canonical grouping\n\n## Provider-level fields\n\n```yaml\nproviders:\n my-provider:\n baseUrl: https://api.example.com/v1\n apiKey: MY_PROVIDER_API_KEY\n api: openai-completions\n headers:\n X-Team: platform\n authHeader: true\n auth: apiKey\n disableStrictTools: false # set true for Anthropic-compatible endpoints that reject the strict field\n cacheRetention: short # none | short | long; model entries and modelOverrides can override this\n discovery:\n type: ollama\n modelOverrides:\n some-model-id:\n name: Renamed model\n cacheRetention: long\n models:\n - id: some-model-id\n name: Some Model\n api: openai-completions\n reasoning: false\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 128000\n maxTokens: 16384\n headers:\n X-Model: value\n cacheRetention: none\n thinking:\n minLevel: low\n maxLevel: xhigh\n mode: effort\n defaultLevel: high\n levels: [low, medium, high, xhigh]\n compat:\n supportsStore: true\n supportsDeveloperRole: true\n supportsReasoningEffort: true\n maxTokensField: max_completion_tokens\n openRouterRouting:\n only: [anthropic]\n vercelGatewayRouting:\n order: [anthropic, openai]\n extraBody:\n gateway: m1-01\n controller: mlx\nmodelBindings:\n modelRoles:\n default: my-provider/some-model-id:high\n agentModelOverrides:\n executor: my-provider/some-model-id\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `bedrock-converse-stream`\n- `anthropic-messages`\n- `bedrock-converse-stream`\n- `google-generative-ai`\n- `google-vertex`\n- `google-gemini-cli`\n- `ollama-chat`\n- `cursor-agent`\n\n\n### First-class Azure OpenAI and Amazon Bedrock examples\n\nAzure OpenAI uses canonical OpenAI model IDs in GJC and resolves those IDs to Azure deployment names at request time. Set `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` to avoid assuming model id equals deployment name:\n\n```yaml\nproviders:\n azure-openai:\n baseUrl: https://my-resource.openai.azure.com/openai/v1\n apiKeyEnv: AZURE_OPENAI_API_KEY\n api: azure-openai-responses\n models:\n - id: gpt-4.1\n - id: o3\n```\n\n```sh\nexport AZURE_OPENAI_DEPLOYMENT_NAME_MAP='gpt-4.1=gpt-41-prod,o3=o3-reasoning-prod'\n```\n\nAmazon Bedrock uses the native `bedrock-converse-stream` transport and AWS credential chain auth. Do not put AWS access keys in `models.yml`; configure `AWS_REGION` / `AWS_PROFILE` or standard static AWS credential environment variables instead:\n\n```yaml\nproviders:\n amazon-bedrock:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com\n api: bedrock-converse-stream\n models:\n - id: us.anthropic.claude-opus-4-6-v1\n - id: anthropic.claude-3-5-sonnet-20241022-v2:0\n```\n\n### MiniMax and GLM custom provider examples\n\nFor common MiniMax and GLM/zAI setup, prefer the provider presets so the OpenAI-compatible API, base URL, env var, model id, and compatibility flags are written together:\n\n```sh\ngjc setup provider --preset minimax\ngjc setup provider --preset minimax-cn\ngjc setup provider --preset glm\n```\n\nThe same presets are available inside the TUI:\n\n```text\n/provider add --preset minimax\n/provider add --preset glm\n/provider add zai\n```\n\nPresets only write `models.yml` entries that reference documented environment variable names (`MINIMAX_CODE_API_KEY`, `MINIMAX_CODE_CN_API_KEY`, or `ZAI_API_KEY`); they do not store or validate real credentials. The GLM preset aliases (`glm`, `zai`, `z-ai`) write an OpenAI-compatible custom provider named `glm-proxy` and do not replace the first-class `zai` provider.\n\n## Model profiles (`--mpreset`)\n\nModel profiles are optional top-level `profiles:` entries in `~/.gjc/agent/models.yml`. A profile can require provider credentials before activation and can map one or more model roles; omitted roles inherit from the active defaults.\n\n```yaml\nprofiles:\n team-standard:\n required_providers: [openai, anthropic]\n model_mapping:\n default: openai/gpt-5.2\n executor: anthropic/claude-sonnet-4-6:medium\n architect: openai/o3:high\n planner: openai/o3:high\n critic: openai/o3:high\n```\n\n`model_mapping` keys are role names (`default`, `executor`, `architect`, `planner`, `critic`). Each role maps to exactly one model selector in the form `provider/modelId[:effort]`; comma-separated fallback chains are not supported in a single role value.\n`required_providers` is the aggregate set of providers required across the profile's mapped roles, not a per-role fallback chain.\n\nBuilt-in profiles are grouped by provider mix and tier:\n\n- `codex-{eco,medium,pro}` — all roles on `openai-codex/gpt-5.5`, differing only by per-role reasoning effort\n- `opencodego` — single OpenCode Go preset (Kimi default, DeepSeek executor/architect, Qwen planner, MiMo critic)\n- `claude-opus` — Anthropic OAuth preset centered on `claude-opus-4-8`\n- Single-provider tiers: `glm-{eco,medium,pro}`, `kimi-coding-plan-{eco,medium,pro}`, `mimo-{eco,medium,pro}`, `grok-{eco,medium,pro}`, `cursor-{eco,medium,pro}`, `minimax-{eco,medium,pro}`\n- Combos: `opus-codex` (Claude main agent with Codex support roles), `codex-opencodego` (Codex orchestrator/architect with OpenCode Go workers)\n\nThe `eco` tier favors cheaper/faster defaults, `medium` matches normal production defaults, and `pro` raises reasoning for architect, critic, and planner roles. Effort suffixes are clamped to each model's supported thinking range at preview and activation time (for example `codex-eco`'s executor `:minimal` resolves to effective `low` on `gpt-5.5`). Single-provider tiers pin each provider's current flagship (`zai/glm-5.2`, `kimi-code/kimi-k2.7-code`, `xiaomi/mimo-v2.5-pro`, `xai/grok-4.3`, `cursor/composer-1.5`, `minimax-code/minimax-m3`). User-defined profiles override built-ins by exact profile name.\n\n\nUse `gjc --mpreset <name>` to activate a profile for the current session only. Activation hard-blocks when any provider listed in `required_providers` lacks credentials. Add `--default` to persist the selected profile as `modelProfile.default` in `config.yml`, so it applies at startup:\n\n```sh\ngjc --mpreset codex-medium\ngjc --mpreset opencodego --default\n```\n\nThe `/model` command opens to a preset landing view: presets are grouped by provider with live auth marks (✓/✗), highlighting a group expands its tiers, and selecting a tier shows the full role→model preview before applying for the session or as default. Typing jumps straight to model search, and `Browse all models` opens the classic tabbed model selector. In `/login`, `Add custom provider` is the first option for configuring credentials needed by custom or profile-required providers; after a successful provider login, the matching preset is recommended automatically.\n\nMiniMax's OpenAI-compatible endpoint rejects multiple system messages and emits thinking in `reasoning_content`, so pin the public-safe compatibility fields when hand-authoring a custom provider:\n\n```yaml\nproviders:\n minimax-custom:\n baseUrl: https://api.minimax.io/v1\n apiKeyEnv: MINIMAX_API_KEY\n api: openai-completions\n compat:\n supportsStore: false\n supportsDeveloperRole: false\n supportsReasoningEffort: false\n reasoningContentField: reasoning_content\n models:\n - id: MiniMax-M2.5\n```\n\nGLM via z.ai is available as the first-class `zai` provider. For a private GLM-compatible proxy, keep secrets in an env var and disable OpenAI-only request fields as needed:\n\n```yaml\nproviders:\n glm-proxy:\n baseUrl: https://api.z.ai/api/paas/v4\n apiKeyEnv: ZAI_API_KEY\n api: openai-completions\n compat:\n supportsDeveloperRole: false\n supportsReasoningEffort: false\n models:\n - id: glm-4.6\n```\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default), `none`, or `oauth`; for `models.yml` custom models, `oauth` is accepted by schema but does not waive the `apiKey` requirement\n- `models.yml` is strict: unknown provider/model keys fail validation before provider dispatch, so stale keys such as `requestTransform` or `wireModelId` only work where this document lists them.\n- `discovery.type`: `ollama`, `llama.cpp`, or `lm-studio`\n- `cacheRetention`: `none`, `short`, or `long`; request-time options win over model/modelOverride values, then provider values, then `GJC_CACHE_RETENTION`, then the runtime default. The runtime default is `short` for most providers, but the Anthropic provider defaults to `long` (`ttl: \"1h\"`) because the ~5m default is too fragile for long-running subagent workflows. The 1h marker is only emitted on the canonical Anthropic API (`api.anthropic.com`) for models advertising `supportsLongCacheRetention`; proxies, gateways, and incapable models fall back to the default ephemeral (~5m) breakpoint. For OpenAI Responses, this controls `prompt_cache_retention` only; it does not disable `prompt_cache_key` when a stable session id exists.\n\n## OpenAI-compatible proxy configuration\n\nOpenAI-compatible proxy providers should use schema-supported provider keys first:\n\n```yaml\nproviders:\n proxy-provider:\n baseUrl: https://api.proxy.example/v1\n apiKeyEnv: PROXY_API_KEY\n api: openai-completions\n auth: apiKey\n headers:\n User-Agent: curl/8.7.1\n models:\n - id: local-gpt\n name: Local GPT\n reasoning: true\n input: [text]\n cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }\n contextWindow: 400000\n maxTokens: 128000\n```\n\nUse provider-level `headers` for proxy-required headers. Keep the provider `api` set to `openai-completions` when the proxy exposes Chat Completions-compatible `/v1/chat/completions` semantics. `auth: apiKey` sends the resolved token as bearer auth; use `auth: none` only for trusted local/no-auth endpoints.\n\n`requestTransform` and `wireModelId` remain supported for request-body shaping, but they are not needed for ordinary OpenAI-compatible proxies whose local model id is already the upstream wire id. Unknown config keys fail validation before a provider request is sent.\n\nWhen request shaping is needed:\n\n- `requestTransform.profile: openai-proxy` strips OpenAI SDK/Stainless telemetry and beta headers at final fetch time and sets a generic GJC user agent.\n- `stripHeaders` replaces the preset strip list when provided.\n- `setHeaders` is applied after stripping; use `null` to remove a header.\n- `extraBody` is shallow-merged into the JSON request body after provider compatibility fields; core transport keys such as `model`, `messages`/`input`, `stream`, `tools`, and `tool_choice` are protected and ignored.\n- Model-level `requestTransform` overrides provider-level fields and shallow-merges `setHeaders`/`extraBody`.\n- `wireModelId` changes only the upstream request body model id; local selection still uses `provider/id`.\n\n### Layofflabs-style proxy example\n\n```yaml\nproviders:\n layofflabs:\n baseUrl: https://api.layofflabs.com/v1\n apiKeyEnv: OPENAI_API_KEY\n api: openai-completions\n auth: apiKey\n headers:\n User-Agent: curl/8.7.1\n models:\n - id: gpt-5.5\n name: GPT 5.5 via Layofflabs\n reasoning: true\n thinking:\n minLevel: low\n maxLevel: xhigh\n mode: effort\n defaultLevel: high\n levels: [low, medium, high, xhigh]\n input: [text]\n cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }\n contextWindow: 400000\n maxTokens: 128000\n\nmodelBindings:\n modelRoles:\n default: layofflabs/gpt-5.5:high\n agentModelOverrides:\n executor: layofflabs/gpt-5.5:high\n```\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `headers`\n- `compat`\n- `requestTransform`\n- `disableStrictTools`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`.\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n- unknown provider, model, override, and request-transform keys fail schema validation; remove stale keys instead of relying on them being ignored.\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@gajae-code/ai`.\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`, `requestTransform`, `disableStrictTools`, `cacheRetention`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n - same `provider + id` replaces existing\n - otherwise append\n6. Load cached/runtime-discovered models (Ollama, llama.cpp, LM Studio, plus built-in provider managers), then re-apply model overrides.\n\n### Provider-model cache and static fingerprint\n\nCached per-provider model lists are persisted in the model-cache SQLite\ndatabase (schema v3) with a `static_fingerprint` column that hashes the\nstatic catalog slice merged into the row. When `resolveProviderModels`\nskips the network fetch and the fingerprint of the in-memory static\ncatalog matches the cached one, the cached rows are returned verbatim —\nthe static + dynamic merge is bypassed entirely. The fingerprint is\nmemoized per process via a WeakMap keyed by the static-models array\nreference, so repeated cold-start calls do not re-hash.\n\n## Canonical model equivalence and coalescing\n\nThe registry keeps every concrete provider model and then builds a canonical layer above them.\n\nCanonical ids are official upstream ids only, for example:\n\n- `anthropic-model-opus-4-6`\n- `anthropic-model-haiku-4-5`\n- `gpt-5.3-openai-code`\n\n### `models.yml` equivalence config\n\nExample:\n\n```yaml\nproviders:\n zenmux:\n baseUrl: https://api.zenmux.example/v1\n apiKey: ZENMUX_API_KEY\n api: openai-codex-responses\n models:\n - id: openai-code\n name: Zenmux OpenAI code\n reasoning: true\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 200000\n maxTokens: 32768\n\nequivalence:\n overrides:\n zenmux/openai-code: gpt-5.3-openai-code\n p-openai-code/openai-code: gpt-5.3-openai-code\n exclude:\n - demo/openai-code-preview\n```\n\nBuild order for canonical grouping:\n\n1. exact user override from `equivalence.overrides`\n2. bundled official-id matches from built-in model metadata\n3. conservative heuristic normalization for gateway/provider variants\n4. fallback to the concrete model's own id\n\nCurrent heuristics are intentionally narrow:\n\n- embedded upstream prefixes can be stripped when present, for example `anthropic/...` or `openai/...`\n- dotted and dashed version variants can normalize only when they map to an existing official id, for example `4.6 -> 4-6`\n- ambiguous families or versions are not merged without a bundled match or explicit override\n\n### Canonical resolution behavior\n\nWhen multiple concrete variants share a canonical id, resolution uses:\n\n1. availability and auth\n2. `config.yml` `modelProviderOrder`\n3. existing registry/provider order if `modelProviderOrder` is unset\n\nDisabled or unauthenticated providers are skipped.\n\nSession state and transcripts continue to record the concrete provider/model that actually executed the turn.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `input`, `cost`, `contextWindow`, `maxTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`, `extraBody`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-responses`\n- base URL: `OLLAMA_BASE_URL` or `http://127.0.0.1:11434`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls Ollama endpoints and normalizes discovered OpenAI-compatible models to `openai-responses`.\n\n### Implicit llama.cpp discovery\n\nIf `llama.cpp` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `llama.cpp`\n- api: `openai-responses`\n- base URL: `LLAMA_CPP_BASE_URL` or `http://127.0.0.1:8080`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls llama.cpp model endpoints and synthesizes model entries with local defaults.\n\n### Implicit LM Studio discovery\n\nIf `lm-studio` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `lm-studio`\n- api: `openai-completions`\n- base URL: `LM_STUDIO_BASE_URL` or `http://127.0.0.1:1234/v1`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery fetches models (`GET /models`) and synthesizes model entries with local defaults.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n ollama:\n baseUrl: http://127.0.0.1:11434\n api: openai-responses\n auth: none\n discovery:\n type: ollama\n\n llama.cpp:\n baseUrl: http://127.0.0.1:8080\n api: openai-responses\n auth: none\n discovery:\n type: llama.cpp\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n### Broker mode\n\nWhen `GJC_AUTH_BROKER_URL` (or `auth.broker.url`) is set, the local SQLite credential store is replaced by `RemoteAuthCredentialStore`. Layers 2 and 3 above (stored API key / OAuth in `agent.db`) are served from a broker-supplied snapshot whose `refresh` tokens are redacted; expiry triggers `POST /v1/credential/:id/refresh` on the broker rather than a local refresh.\n\n`AuthStorage.setConfigApiKey` lets a `models.yml` `apiKey` win over a broker-resolved OAuth token without overriding a runtime `--api-key`. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full broker / gateway design and env surface (`GJC_AUTH_BROKER_URL`, `GJC_AUTH_BROKER_TOKEN`, `auth.broker.url`, `auth.broker.token`).\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact canonical model id\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\nResolution precedence for exact selectors:\n\n1. exact `provider/modelId` bypasses coalescing\n2. exact canonical id resolves through the canonical index\n3. exact bare concrete id still works\n4. fuzzy and glob matching run after the exact paths\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default` plus the agent assignment targets `executor`, `architect`, `planner`, `critic`\n\nRole aliases like `pi/default` expand through `settings.modelRoles`. Each role value can also append a thinking selector such as `:minimal`, `:low`, `:medium`, or `:high`.\n\nIf a role points at another role, the target model still inherits normally and any explicit suffix on the referring role wins for that role-specific use.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `modelProviderOrder` (global canonical-provider precedence)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI code provider transport)\n\n`modelRoles` may store either:\n\n- `provider/modelId` to pin a concrete provider variant\n- a canonical id such as `gpt-5.3-openai-code` to allow provider coalescing\n\nFor `enabledModels` and CLI `--models`:\n\n- exact canonical ids expand to all concrete variants in that canonical group\n- explicit `provider/modelId` entries stay exact\n- globs and fuzzy matches still operate on concrete models\n\nGlobal `enabledModels` and `disabledProviders` entries may also be scoped to a path prefix:\n\n```yaml\nenabledModels:\n - anthropic-model-sonnet-4-5\n - path: ~/work\n models:\n - anthropic/anthropic-model-opus-4-5\ndisabledProviders:\n - ollama\n - path: ~/private\n providers:\n - anthropic\n```\n\nString entries apply everywhere. Scoped entries apply when the current working directory is the configured path or one of its subdirectories. Use `path`, `paths`, `pathPrefix`, or `pathPrefixes`; use `models` for `enabledModels`, `providers` for `disabledProviders`, or `values` for either.\n\n## `/model` and `--list-models`\n\nBoth surfaces keep provider-prefixed models visible and selectable.\n\nThey now also expose canonical/coalesced models:\n\n- `/model` includes a canonical view alongside provider tabs\n- `--list-models` prints a canonical section plus the concrete provider rows\n\nSelecting a canonical entry stores the canonical selector. Selecting a provider row stores the explicit `provider/modelId`.\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error. It is **off by default** (`contextPromotion.enabled` is `false`); opt in to enable it.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI code provider websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for Spark -> non-Spark on the same provider:\n\n```yaml\nproviders:\n openai-code:\n modelOverrides:\n gpt-5.3-openai-code-spark:\n contextPromotionTarget: openai-code/gpt-5.3-openai-code\n```\n\nThe built-in model generator also assigns this automatically for `*-spark` models when a same-provider base model exists.\n\n## Compatibility and routing fields\n\nThe `compat` block on a provider or model overrides the URL-based auto-detection in `packages/ai/src/providers/openai-completions-compat.ts`. It is validated by `OpenAICompatSchema` in `packages/coding-agent/src/config/model-registry.ts` and consumed by every `openai-completions` transport (`packages/ai/src/providers/openai-completions.ts`). The canonical type is `OpenAICompat` in `packages/ai/src/types.ts`.\n\n`models.yml` accepts the following keys (all optional; unset falls back to URL detection):\n\nRequest shaping:\n\n- `supportsStore` — emit `store: false` on requests. Default: auto (off for non-standard endpoints).\n- `supportsDeveloperRole` — use the `developer` system role for reasoning models instead of `system`. Default: auto.\n- `supportsUsageInStreaming` — send `stream_options: { include_usage: true }` to receive token usage on streaming responses. Default: `true`.\n- `maxTokensField` — `\"max_completion_tokens\"` or `\"max_tokens\"`. Default: auto.\n- `supportsToolChoice` — emit the `tool_choice` parameter when the caller forces a specific tool. Default: `true`. Set `false` for endpoints that 400 on `tool_choice` (e.g. DeepSeek when reasoning is on).\n- `disableReasoningOnForcedToolChoice` — drop `reasoning_effort` / OpenRouter `reasoning` whenever `tool_choice` forces a call. Default: auto (Kimi/Anthropic-fronted endpoints).\n- `extraBody` — extra top-level fields merged into every request body (gateway hints, controller selectors, etc.).\n\nReasoning / thinking:\n\n- `supportsReasoningEffort` — accept `reasoning_effort`. Default: auto (off for Grok and zAI).\n- `reasoningEffortMap` — partial map from internal effort levels (`minimal|low|medium|high|xhigh`) to provider-specific strings (e.g. DeepSeek maps `xhigh -> \"max\"`).\n- `thinkingFormat` — request shape for thinking: `\"openai\"` (`reasoning_effort`), `\"openrouter\"` (`reasoning: { effort }`), `\"zai\"` (`thinking: { type: \"enabled\" }`), `\"qwen\"` (top-level `enable_thinking`), or `\"qwen-chat-template\"` (`chat_template_kwargs.enable_thinking`). Default: `\"openai\"`.\n- `reasoningContentField` — assistant field carrying chain-of-thought: `\"reasoning_content\"`, `\"reasoning\"`, or `\"reasoning_text\"`. Default: auto.\n- `requiresReasoningContentForToolCalls` — assistant tool-call turns must round-trip the reasoning field (DeepSeek-R1, Kimi, OpenRouter when reasoning is on). Default: `false`.\n- `requiresAssistantContentForToolCalls` — assistant tool-call turns must include non-empty text content (Kimi). Default: `false`.\n\nTool / message normalization:\n\n- `requiresToolResultName` — tool-result messages need a `name` field (Mistral). Default: auto.\n- `requiresAssistantAfterToolResult` — a user message after a tool result needs an assistant turn in between. Default: auto.\n- `requiresThinkingAsText` — convert thinking blocks to text wrapped in `<thinking>` delimiters (Mistral). Default: auto.\n- `requiresMistralToolIds` — normalize tool-call ids to exactly 9 alphanumeric chars. Default: auto.\n- `supportsStrictMode` — accept the per-tool `strict` field on tool schemas. Default: conservative auto-detect per provider/baseUrl.\n- `toolStrictMode` — `\"all_strict\"` forces strict on every tool, `\"none\"` forces it off; unset keeps the existing per-tool mixed behavior.\n\nGateway routing (only applied when `baseUrl` matches the gateway):\n\n- `openRouterRouting.only` / `openRouterRouting.order` — provider routing on `openrouter.ai` (see <https://openrouter.ai/docs/provider-routing>).\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order` — provider routing on `ai-gateway.vercel.sh` (see <https://vercel.com/docs/ai-gateway/models-and-providers/provider-options>).\n\nProvider-level `compat` is the baseline; per-model `compat` is deep-merged on top, with `openRouterRouting`, `vercelGatewayRouting`, and `extraBody` merged as nested objects.\n\n### Anthropic compatibility (`anthropic-messages`)\n\nFor `anthropic-messages` models the runtime uses a separate `AnthropicCompat` shape (`packages/ai/src/types.ts`). The `models.yml` schema currently exposes only the strict-tools opt-out as a top-level provider field (see below); the remaining Anthropic-side knobs (`disableAdaptiveThinking`, `supportsEagerToolInputStreaming`, `supportsLongCacheRetention`) are set by built-in catalog metadata and are not user-configurable from `models.yml`.\n\n### Strict tool schemas (`disableStrictTools`)\n\nAnthropic's API supports a `strict` field on tool definitions that forces the model to always follow the provided schema exactly. This is enabled by default for all `anthropic-messages` providers because it guarantees schema conformance in agentic systems.\n\nThird-party providers that front the Anthropic API (AWS Bedrock, Azure, self-hosted proxies) do not always implement this field and will reject requests that include it. Set `disableStrictTools: true` at the provider level to opt out:\n\n```yaml\nproviders:\n bedrock-anthropic:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com/anthropic\n apiKey: AWS_BEARER_TOKEN\n api: anthropic-messages\n disableStrictTools: true\n models:\n - id: anthropic-model-sonnet-4-20250514\n name: Anthropic model Sonnet 4 (Bedrock)\n input: [text, image]\n contextWindow: 200000\n maxTokens: 16384\n cost:\n input: 3.00\n output: 15.00\n cacheRead: 0.30\n cacheWrite: 3.75\n```\n\n`disableStrictTools` is a provider-level flag that applies to all models in the provider.\n\nTool schemas going on the wire are normalized by the unified flow in\n`packages/ai/src/utils/schema/normalize.ts` (Google/CCA/MCP dispatchers\nplus the OpenAI strict-mode sanitize+enforce pipeline). See\n[`ai-schema-normalize.md`](./ai-schema-normalize.md) for the strict-mode\nedge cases (local `$ref` inlining, single-item `allOf` collapse,\n`anyOf`-wrapper description hoist, enum/const primitive-type inference)\nand the per-provider dispatcher mapping.\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n local-openai:\n baseUrl: http://127.0.0.1:8000/v1\n auth: none\n api: openai-completions\n models:\n - id: Qwen/Qwen2.5-Coder-32B-Instruct\n name: Qwen 2.5 Coder 32B (local)\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n anthropic-proxy:\n baseUrl: https://proxy.example.com/anthropic\n apiKey: ANTHROPIC_PROXY_API_KEY\n api: anthropic-messages\n authHeader: true\n disableStrictTools: true # if the proxy doesn't support strict tool schemas\n models:\n - id: anthropic-model-sonnet-4-20250514\n name: Anthropic model Sonnet 4 (Proxy)\n reasoning: true\n input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n openrouter:\n baseUrl: https://my-proxy.example.com/v1\n headers:\n X-Team: platform\n modelOverrides:\n anthropic/anthropic-model-sonnet-4:\n name: Sonnet 4 (Corp)\n compat:\n openRouterRouting:\n only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`. Explicit `.json` / `.jsonc` paths remain supported only when passed programmatically to `ModelRegistry`; the default user config is `~/.gjc/agent/models.yml`.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
|
|
31
|
+
"models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects models for the default and agent roles\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — API key + OAuth resolution order\n- `packages/ai/src/models.ts` and `packages/ai/src/types.ts` — built-in providers/models and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.gjc/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n <provider-id>:\n # provider-level config\nequivalence:\n overrides:\n <provider-id>/<model-id>: <canonical-model-id>\n exclude:\n - <provider-id>/<model-id>\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n`equivalence` is optional and configures canonical model grouping on top of concrete provider models:\n\n- `overrides` maps an exact concrete selector (`provider/modelId`) to an official upstream canonical id\n- `exclude` opts a concrete selector out of canonical grouping\n\n## Provider-level fields\n\n```yaml\nproviders:\n my-provider:\n baseUrl: https://api.example.com/v1\n apiKey: MY_PROVIDER_API_KEY\n api: openai-completions\n headers:\n X-Team: platform\n authHeader: true\n auth: apiKey\n disableStrictTools: false # set true for Anthropic-compatible endpoints that reject the strict field\n cacheRetention: short # none | short | long; model entries and modelOverrides can override this\n discovery:\n type: ollama\n modelOverrides:\n some-model-id:\n name: Renamed model\n cacheRetention: long\n models:\n - id: some-model-id\n name: Some Model\n api: openai-completions\n reasoning: false\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 128000\n maxTokens: 16384\n headers:\n X-Model: value\n cacheRetention: none\n thinking:\n minLevel: low\n maxLevel: xhigh\n mode: effort\n defaultLevel: high\n levels: [low, medium, high, xhigh]\n compat:\n supportsStore: true\n supportsDeveloperRole: true\n supportsReasoningEffort: true\n maxTokensField: max_completion_tokens\n openRouterRouting:\n only: [anthropic]\n vercelGatewayRouting:\n order: [anthropic, openai]\n extraBody:\n gateway: m1-01\n controller: mlx\nmodelBindings:\n modelRoles:\n default: my-provider/some-model-id:high\n agentModelOverrides:\n executor: my-provider/some-model-id\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `bedrock-converse-stream`\n- `anthropic-messages`\n- `bedrock-converse-stream`\n- `google-generative-ai`\n- `google-vertex`\n- `google-gemini-cli`\n- `ollama-chat`\n- `cursor-agent`\n\n\n### First-class Azure OpenAI and Amazon Bedrock examples\n\nAzure OpenAI uses canonical OpenAI model IDs in GJC and resolves those IDs to Azure deployment names at request time. Set `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` to avoid assuming model id equals deployment name:\n\n```yaml\nproviders:\n azure-openai:\n baseUrl: https://my-resource.openai.azure.com/openai/v1\n apiKeyEnv: AZURE_OPENAI_API_KEY\n api: azure-openai-responses\n models:\n - id: gpt-4.1\n - id: o3\n```\n\n```sh\nexport AZURE_OPENAI_DEPLOYMENT_NAME_MAP='gpt-4.1=gpt-41-prod,o3=o3-reasoning-prod'\n```\n\nAmazon Bedrock uses the native `bedrock-converse-stream` transport and AWS credential chain auth. Do not put AWS access keys in `models.yml`; configure `AWS_REGION` / `AWS_PROFILE` or standard static AWS credential environment variables instead:\n\n```yaml\nproviders:\n amazon-bedrock:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com\n api: bedrock-converse-stream\n models:\n - id: us.anthropic.claude-opus-4-6-v1\n - id: anthropic.claude-3-5-sonnet-20241022-v2:0\n```\n\n### MiniMax and GLM custom provider examples\n\nFor common MiniMax and GLM/zAI setup, prefer the provider presets so the OpenAI-compatible API, base URL, env var, model id, and compatibility flags are written together:\n\n```sh\ngjc setup provider --preset minimax\ngjc setup provider --preset minimax-cn\ngjc setup provider --preset glm\n```\n\nThe same presets are available inside the TUI:\n\n```text\n/provider add --preset minimax\n/provider add --preset glm\n/provider add zai\n```\n\nPresets only write `models.yml` entries that reference documented environment variable names (`MINIMAX_CODE_API_KEY`, `MINIMAX_CODE_CN_API_KEY`, or `ZAI_API_KEY`); they do not store or validate real credentials. The GLM preset aliases (`glm`, `zai`, `z-ai`) write an OpenAI-compatible custom provider named `glm-proxy` and do not replace the first-class `zai` provider.\n\n## Model profiles (`--mpreset`)\n\nModel profiles are optional top-level `profiles:` entries in `~/.gjc/agent/models.yml`. A profile can require provider credentials before activation and can map one or more model roles; omitted roles inherit from the active defaults.\n\n> See also: [Cross-vendor role-based profiles](./multi-vendor-profiles.md) — a curated multi-vendor `profiles:` recipe and verified selector notes that build on the mechanism described here.\n\n```yaml\nprofiles:\n team-standard:\n required_providers: [openai, anthropic]\n model_mapping:\n default: openai/gpt-5.2\n executor: anthropic/claude-sonnet-4-6:medium\n architect: openai/o3:high\n planner: openai/o3:high\n critic: openai/o3:high\n```\n\n`model_mapping` keys are role names (`default`, `executor`, `architect`, `planner`, `critic`). Each role maps to exactly one model selector in the form `provider/modelId[:effort]`; comma-separated fallback chains are not supported in a single role value.\n`required_providers` is the aggregate set of providers required across the profile's mapped roles, not a per-role fallback chain.\n\nBuilt-in profiles are grouped by provider mix and tier:\n\n- `codex-{eco,medium,pro}` — all roles on `openai-codex/gpt-5.5`, differing only by per-role reasoning effort\n- `opencodego` — single OpenCode Go preset (Kimi default, DeepSeek executor/architect, Qwen planner, MiMo critic)\n- `claude-opus` — Anthropic OAuth preset centered on `claude-opus-4-8`\n- Single-provider tiers: `glm-{eco,medium,pro}`, `kimi-coding-plan-{eco,medium,pro}`, `mimo-{eco,medium,pro}`, `grok-{eco,medium,pro}`, `cursor-{eco,medium,pro}`, `minimax-{eco,medium,pro}`\n- Combos: `opus-codex` (Claude main agent with Codex support roles), `codex-opencodego` (Codex orchestrator/architect with OpenCode Go workers)\n\nThe `eco` tier favors cheaper/faster defaults, `medium` matches normal production defaults, and `pro` raises reasoning for architect, critic, and planner roles. Effort suffixes are clamped to each model's supported thinking range at preview and activation time (for example `codex-eco`'s executor `:minimal` resolves to effective `low` on `gpt-5.5`). Single-provider tiers pin each provider's current flagship (`zai/glm-5.2`, `kimi-code/kimi-k2.7-code`, `xiaomi/mimo-v2.5-pro`, `xai/grok-4.3`, `cursor/composer-1.5`, `minimax-code/minimax-m3`). User-defined profiles override built-ins by exact profile name.\n\n\nUse `gjc --mpreset <name>` to activate a profile for the current session only. Activation hard-blocks when any provider listed in `required_providers` lacks credentials. Add `--default` to persist the selected profile as `modelProfile.default` in `config.yml`, so it applies at startup:\n\n```sh\ngjc --mpreset codex-medium\ngjc --mpreset opencodego --default\n```\n\nThe `/model` command opens to a preset landing view: presets are grouped by provider with live auth marks (✓/✗), highlighting a group expands its tiers, and selecting a tier shows the full role→model preview before applying for the session or as default. Typing jumps straight to model search, and `Browse all models` opens the classic tabbed model selector. In `/login`, `Add custom provider` is the first option for configuring credentials needed by custom or profile-required providers; after a successful provider login, the matching preset is recommended automatically.\n\nMiniMax's OpenAI-compatible endpoint rejects multiple system messages and emits thinking in `reasoning_content`, so pin the public-safe compatibility fields when hand-authoring a custom provider:\n\n```yaml\nproviders:\n minimax-custom:\n baseUrl: https://api.minimax.io/v1\n apiKeyEnv: MINIMAX_API_KEY\n api: openai-completions\n compat:\n supportsStore: false\n supportsDeveloperRole: false\n supportsReasoningEffort: false\n reasoningContentField: reasoning_content\n models:\n - id: MiniMax-M2.5\n```\n\nGLM via z.ai is available as the first-class `zai` provider. For a private GLM-compatible proxy, keep secrets in an env var and disable OpenAI-only request fields as needed:\n\n```yaml\nproviders:\n glm-proxy:\n baseUrl: https://api.z.ai/api/paas/v4\n apiKeyEnv: ZAI_API_KEY\n api: openai-completions\n compat:\n supportsDeveloperRole: false\n supportsReasoningEffort: false\n models:\n - id: glm-4.6\n```\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default), `none`, or `oauth`; for `models.yml` custom models, `oauth` is accepted by schema but does not waive the `apiKey` requirement\n- `models.yml` is strict: unknown provider/model keys fail validation before provider dispatch, so stale keys such as `requestTransform` or `wireModelId` only work where this document lists them.\n- `discovery.type`: `ollama`, `llama.cpp`, or `lm-studio`\n- `cacheRetention`: `none`, `short`, or `long`; request-time options win over model/modelOverride values, then provider values, then `GJC_CACHE_RETENTION`, then the runtime default. The runtime default is `short` for most providers, but the Anthropic provider defaults to `long` (`ttl: \"1h\"`) because the ~5m default is too fragile for long-running subagent workflows. The 1h marker is only emitted on the canonical Anthropic API (`api.anthropic.com`) for models advertising `supportsLongCacheRetention`; proxies, gateways, and incapable models fall back to the default ephemeral (~5m) breakpoint. For OpenAI Responses, this controls `prompt_cache_retention` only; it does not disable `prompt_cache_key` when a stable session id exists.\n\n## OpenAI-compatible proxy configuration\n\nOpenAI-compatible proxy providers should use schema-supported provider keys first:\n\n```yaml\nproviders:\n proxy-provider:\n baseUrl: https://api.proxy.example/v1\n apiKeyEnv: PROXY_API_KEY\n api: openai-completions\n auth: apiKey\n headers:\n User-Agent: curl/8.7.1\n models:\n - id: local-gpt\n name: Local GPT\n reasoning: true\n input: [text]\n cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }\n contextWindow: 400000\n maxTokens: 128000\n```\n\nUse provider-level `headers` for proxy-required headers. Keep the provider `api` set to `openai-completions` when the proxy exposes Chat Completions-compatible `/v1/chat/completions` semantics. `auth: apiKey` sends the resolved token as bearer auth; use `auth: none` only for trusted local/no-auth endpoints.\n\n`requestTransform` and `wireModelId` remain supported for request-body shaping, but they are not needed for ordinary OpenAI-compatible proxies whose local model id is already the upstream wire id. Unknown config keys fail validation before a provider request is sent.\n\nWhen request shaping is needed:\n\n- `requestTransform.profile: openai-proxy` strips OpenAI SDK/Stainless telemetry and beta headers at final fetch time and sets a generic GJC user agent.\n- `stripHeaders` replaces the preset strip list when provided.\n- `setHeaders` is applied after stripping; use `null` to remove a header.\n- `extraBody` is shallow-merged into the JSON request body after provider compatibility fields; core transport keys such as `model`, `messages`/`input`, `stream`, `tools`, and `tool_choice` are protected and ignored.\n- Model-level `requestTransform` overrides provider-level fields and shallow-merges `setHeaders`/`extraBody`.\n- `wireModelId` changes only the upstream request body model id; local selection still uses `provider/id`.\n\n### Layofflabs-style proxy example\n\n```yaml\nproviders:\n layofflabs:\n baseUrl: https://api.layofflabs.com/v1\n apiKeyEnv: OPENAI_API_KEY\n api: openai-completions\n auth: apiKey\n headers:\n User-Agent: curl/8.7.1\n models:\n - id: gpt-5.5\n name: GPT 5.5 via Layofflabs\n reasoning: true\n thinking:\n minLevel: low\n maxLevel: xhigh\n mode: effort\n defaultLevel: high\n levels: [low, medium, high, xhigh]\n input: [text]\n cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }\n contextWindow: 400000\n maxTokens: 128000\n\nmodelBindings:\n modelRoles:\n default: layofflabs/gpt-5.5:high\n agentModelOverrides:\n executor: layofflabs/gpt-5.5:high\n```\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `headers`\n- `compat`\n- `requestTransform`\n- `disableStrictTools`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`.\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n- unknown provider, model, override, and request-transform keys fail schema validation; remove stale keys instead of relying on them being ignored.\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@gajae-code/ai`.\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`, `requestTransform`, `disableStrictTools`, `cacheRetention`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n - same `provider + id` replaces existing\n - otherwise append\n6. Load cached/runtime-discovered models (Ollama, llama.cpp, LM Studio, plus built-in provider managers), then re-apply model overrides.\n\n### Provider-model cache and static fingerprint\n\nCached per-provider model lists are persisted in the model-cache SQLite\ndatabase (schema v3) with a `static_fingerprint` column that hashes the\nstatic catalog slice merged into the row. When `resolveProviderModels`\nskips the network fetch and the fingerprint of the in-memory static\ncatalog matches the cached one, the cached rows are returned verbatim —\nthe static + dynamic merge is bypassed entirely. The fingerprint is\nmemoized per process via a WeakMap keyed by the static-models array\nreference, so repeated cold-start calls do not re-hash.\n\n## Canonical model equivalence and coalescing\n\nThe registry keeps every concrete provider model and then builds a canonical layer above them.\n\nCanonical ids are official upstream ids only, for example:\n\n- `anthropic-model-opus-4-6`\n- `anthropic-model-haiku-4-5`\n- `gpt-5.3-openai-code`\n\n### `models.yml` equivalence config\n\nExample:\n\n```yaml\nproviders:\n zenmux:\n baseUrl: https://api.zenmux.example/v1\n apiKey: ZENMUX_API_KEY\n api: openai-codex-responses\n models:\n - id: openai-code\n name: Zenmux OpenAI code\n reasoning: true\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 200000\n maxTokens: 32768\n\nequivalence:\n overrides:\n zenmux/openai-code: gpt-5.3-openai-code\n p-openai-code/openai-code: gpt-5.3-openai-code\n exclude:\n - demo/openai-code-preview\n```\n\nBuild order for canonical grouping:\n\n1. exact user override from `equivalence.overrides`\n2. bundled official-id matches from built-in model metadata\n3. conservative heuristic normalization for gateway/provider variants\n4. fallback to the concrete model's own id\n\nCurrent heuristics are intentionally narrow:\n\n- embedded upstream prefixes can be stripped when present, for example `anthropic/...` or `openai/...`\n- dotted and dashed version variants can normalize only when they map to an existing official id, for example `4.6 -> 4-6`\n- ambiguous families or versions are not merged without a bundled match or explicit override\n\n### Canonical resolution behavior\n\nWhen multiple concrete variants share a canonical id, resolution uses:\n\n1. availability and auth\n2. `config.yml` `modelProviderOrder`\n3. existing registry/provider order if `modelProviderOrder` is unset\n\nDisabled or unauthenticated providers are skipped.\n\nSession state and transcripts continue to record the concrete provider/model that actually executed the turn.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `input`, `cost`, `contextWindow`, `maxTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`, `extraBody`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-responses`\n- base URL: `OLLAMA_BASE_URL` or `http://127.0.0.1:11434`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls Ollama endpoints and normalizes discovered OpenAI-compatible models to `openai-responses`.\n\n### Implicit llama.cpp discovery\n\nIf `llama.cpp` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `llama.cpp`\n- api: `openai-responses`\n- base URL: `LLAMA_CPP_BASE_URL` or `http://127.0.0.1:8080`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls llama.cpp model endpoints and synthesizes model entries with local defaults.\n\n### Implicit LM Studio discovery\n\nIf `lm-studio` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `lm-studio`\n- api: `openai-completions`\n- base URL: `LM_STUDIO_BASE_URL` or `http://127.0.0.1:1234/v1`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery fetches models (`GET /models`) and synthesizes model entries with local defaults.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n ollama:\n baseUrl: http://127.0.0.1:11434\n api: openai-responses\n auth: none\n discovery:\n type: ollama\n\n llama.cpp:\n baseUrl: http://127.0.0.1:8080\n api: openai-responses\n auth: none\n discovery:\n type: llama.cpp\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n### Broker mode\n\nWhen `GJC_AUTH_BROKER_URL` (or `auth.broker.url`) is set, the local SQLite credential store is replaced by `RemoteAuthCredentialStore`. Layers 2 and 3 above (stored API key / OAuth in `agent.db`) are served from a broker-supplied snapshot whose `refresh` tokens are redacted; expiry triggers `POST /v1/credential/:id/refresh` on the broker rather than a local refresh.\n\n`AuthStorage.setConfigApiKey` lets a `models.yml` `apiKey` win over a broker-resolved OAuth token without overriding a runtime `--api-key`. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full broker / gateway design and env surface (`GJC_AUTH_BROKER_URL`, `GJC_AUTH_BROKER_TOKEN`, `auth.broker.url`, `auth.broker.token`).\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact canonical model id\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\nResolution precedence for exact selectors:\n\n1. exact `provider/modelId` bypasses coalescing\n2. exact canonical id resolves through the canonical index\n3. exact bare concrete id still works\n4. fuzzy and glob matching run after the exact paths\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default` plus the agent assignment targets `executor`, `architect`, `planner`, `critic`\n\nRole aliases like `pi/default` expand through `settings.modelRoles`. Each role value can also append a thinking selector such as `:minimal`, `:low`, `:medium`, or `:high`.\n\nIf a role points at another role, the target model still inherits normally and any explicit suffix on the referring role wins for that role-specific use.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `modelProviderOrder` (global canonical-provider precedence)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI code provider transport)\n\n`modelRoles` may store either:\n\n- `provider/modelId` to pin a concrete provider variant\n- a canonical id such as `gpt-5.3-openai-code` to allow provider coalescing\n\nFor `enabledModels` and CLI `--models`:\n\n- exact canonical ids expand to all concrete variants in that canonical group\n- explicit `provider/modelId` entries stay exact\n- globs and fuzzy matches still operate on concrete models\n\nGlobal `enabledModels` and `disabledProviders` entries may also be scoped to a path prefix:\n\n```yaml\nenabledModels:\n - anthropic-model-sonnet-4-5\n - path: ~/work\n models:\n - anthropic/anthropic-model-opus-4-5\ndisabledProviders:\n - ollama\n - path: ~/private\n providers:\n - anthropic\n```\n\nString entries apply everywhere. Scoped entries apply when the current working directory is the configured path or one of its subdirectories. Use `path`, `paths`, `pathPrefix`, or `pathPrefixes`; use `models` for `enabledModels`, `providers` for `disabledProviders`, or `values` for either.\n\n## `/model` and `--list-models`\n\nBoth surfaces keep provider-prefixed models visible and selectable.\n\nThey now also expose canonical/coalesced models:\n\n- `/model` includes a canonical view alongside provider tabs\n- `--list-models` prints a canonical section plus the concrete provider rows\n\nSelecting a canonical entry stores the canonical selector. Selecting a provider row stores the explicit `provider/modelId`.\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error. It is **off by default** (`contextPromotion.enabled` is `false`); opt in to enable it.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI code provider websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for Spark -> non-Spark on the same provider:\n\n```yaml\nproviders:\n openai-code:\n modelOverrides:\n gpt-5.3-openai-code-spark:\n contextPromotionTarget: openai-code/gpt-5.3-openai-code\n```\n\nThe built-in model generator also assigns this automatically for `*-spark` models when a same-provider base model exists.\n\n## Compatibility and routing fields\n\nThe `compat` block on a provider or model overrides the URL-based auto-detection in `packages/ai/src/providers/openai-completions-compat.ts`. It is validated by `OpenAICompatSchema` in `packages/coding-agent/src/config/model-registry.ts` and consumed by every `openai-completions` transport (`packages/ai/src/providers/openai-completions.ts`). The canonical type is `OpenAICompat` in `packages/ai/src/types.ts`.\n\n`models.yml` accepts the following keys (all optional; unset falls back to URL detection):\n\nRequest shaping:\n\n- `supportsStore` — emit `store: false` on requests. Default: auto (off for non-standard endpoints).\n- `supportsDeveloperRole` — use the `developer` system role for reasoning models instead of `system`. Default: auto.\n- `sendSessionHeaders` — forward the agent session id as `session_id` and `x-session-id` request headers so OpenAI-compatible relays/proxies can do session-affinity routing and reuse a server-side prompt cache. Default: `false`. Caller-set `headers`/`requestTransform` values are never overwritten.\n- `supportsUsageInStreaming` — send `stream_options: { include_usage: true }` to receive token usage on streaming responses. Default: `true`.\n- `maxTokensField` — `\"max_completion_tokens\"` or `\"max_tokens\"`. Default: auto.\n- `supportsToolChoice` — emit the `tool_choice` parameter when the caller forces a specific tool. Default: `true`. Set `false` for endpoints that 400 on `tool_choice` (e.g. DeepSeek when reasoning is on).\n- `disableReasoningOnForcedToolChoice` — drop `reasoning_effort` / OpenRouter `reasoning` whenever `tool_choice` forces a call. Default: auto (Kimi/Anthropic-fronted endpoints).\n- `extraBody` — extra top-level fields merged into every request body (gateway hints, controller selectors, etc.).\n\nReasoning / thinking:\n\n- `supportsReasoningEffort` — accept `reasoning_effort`. Default: auto (off for Grok and zAI).\n- `reasoningEffortMap` — partial map from internal effort levels (`minimal|low|medium|high|xhigh`) to provider-specific strings (e.g. DeepSeek maps `xhigh -> \"max\"`).\n- `thinkingFormat` — request shape for thinking: `\"openai\"` (`reasoning_effort`), `\"openrouter\"` (`reasoning: { effort }`), `\"zai\"` (`thinking: { type: \"enabled\" }`), `\"qwen\"` (top-level `enable_thinking`), or `\"qwen-chat-template\"` (`chat_template_kwargs.enable_thinking`). Default: `\"openai\"`.\n- `reasoningContentField` — assistant field carrying chain-of-thought: `\"reasoning_content\"`, `\"reasoning\"`, or `\"reasoning_text\"`. Default: auto.\n- `requiresReasoningContentForToolCalls` — assistant tool-call turns must round-trip the reasoning field (DeepSeek-R1, Kimi, OpenRouter when reasoning is on). Default: `false`.\n- `requiresAssistantContentForToolCalls` — assistant tool-call turns must include non-empty text content (Kimi). Default: `false`.\n\nTool / message normalization:\n\n- `requiresToolResultName` — tool-result messages need a `name` field (Mistral). Default: auto.\n- `requiresAssistantAfterToolResult` — a user message after a tool result needs an assistant turn in between. Default: auto.\n- `requiresThinkingAsText` — convert thinking blocks to text wrapped in `<thinking>` delimiters (Mistral). Default: auto.\n- `requiresMistralToolIds` — normalize tool-call ids to exactly 9 alphanumeric chars. Default: auto.\n- `supportsStrictMode` — accept the per-tool `strict` field on tool schemas. Default: conservative auto-detect per provider/baseUrl.\n- `toolStrictMode` — `\"all_strict\"` forces strict on every tool, `\"none\"` forces it off; unset keeps the existing per-tool mixed behavior.\n\nGateway routing (only applied when `baseUrl` matches the gateway):\n\n- `openRouterRouting.only` / `openRouterRouting.order` — provider routing on `openrouter.ai` (see <https://openrouter.ai/docs/provider-routing>).\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order` — provider routing on `ai-gateway.vercel.sh` (see <https://vercel.com/docs/ai-gateway/models-and-providers/provider-options>).\n\nProvider-level `compat` is the baseline; per-model `compat` is deep-merged on top, with `openRouterRouting`, `vercelGatewayRouting`, and `extraBody` merged as nested objects.\n\n### Anthropic compatibility (`anthropic-messages`)\n\nFor `anthropic-messages` models the runtime uses a separate `AnthropicCompat` shape (`packages/ai/src/types.ts`). The `models.yml` schema currently exposes only the strict-tools opt-out as a top-level provider field (see below); the remaining Anthropic-side knobs (`disableAdaptiveThinking`, `supportsEagerToolInputStreaming`, `supportsLongCacheRetention`) are set by built-in catalog metadata and are not user-configurable from `models.yml`.\n\n### Strict tool schemas (`disableStrictTools`)\n\nAnthropic's API supports a `strict` field on tool definitions that forces the model to always follow the provided schema exactly. This is enabled by default for all `anthropic-messages` providers because it guarantees schema conformance in agentic systems.\n\nThird-party providers that front the Anthropic API (AWS Bedrock, Azure, self-hosted proxies) do not always implement this field and will reject requests that include it. Set `disableStrictTools: true` at the provider level to opt out:\n\n```yaml\nproviders:\n bedrock-anthropic:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com/anthropic\n apiKey: AWS_BEARER_TOKEN\n api: anthropic-messages\n disableStrictTools: true\n models:\n - id: anthropic-model-sonnet-4-20250514\n name: Anthropic model Sonnet 4 (Bedrock)\n input: [text, image]\n contextWindow: 200000\n maxTokens: 16384\n cost:\n input: 3.00\n output: 15.00\n cacheRead: 0.30\n cacheWrite: 3.75\n```\n\n`disableStrictTools` is a provider-level flag that applies to all models in the provider.\n\nTool schemas going on the wire are normalized by the unified flow in\n`packages/ai/src/utils/schema/normalize.ts` (Google/CCA/MCP dispatchers\nplus the OpenAI strict-mode sanitize+enforce pipeline). See\n[`ai-schema-normalize.md`](./ai-schema-normalize.md) for the strict-mode\nedge cases (local `$ref` inlining, single-item `allOf` collapse,\n`anyOf`-wrapper description hoist, enum/const primitive-type inference)\nand the per-provider dispatcher mapping.\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n local-openai:\n baseUrl: http://127.0.0.1:8000/v1\n auth: none\n api: openai-completions\n models:\n - id: Qwen/Qwen2.5-Coder-32B-Instruct\n name: Qwen 2.5 Coder 32B (local)\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n anthropic-proxy:\n baseUrl: https://proxy.example.com/anthropic\n apiKey: ANTHROPIC_PROXY_API_KEY\n api: anthropic-messages\n authHeader: true\n disableStrictTools: true # if the proxy doesn't support strict tool schemas\n models:\n - id: anthropic-model-sonnet-4-20250514\n name: Anthropic model Sonnet 4 (Proxy)\n reasoning: true\n input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n openrouter:\n baseUrl: https://my-proxy.example.com/v1\n headers:\n X-Team: platform\n modelOverrides:\n anthropic/anthropic-model-sonnet-4:\n name: Sonnet 4 (Corp)\n compat:\n openRouterRouting:\n only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`. Explicit `.json` / `.jsonc` paths remain supported only when passed programmatically to `ModelRegistry`; the default user config is `~/.gjc/agent/models.yml`.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
|
|
32
|
+
"multi-vendor-profiles.md": "# Choosing models in GJC: role-based profiles\n\nA practical guide to picking models for GJC's roles, for every subscription situation — one vendor, two vendors, or the full multi-vendor set. It adds curated cross-vendor `profiles:` for `~/.gjc/agent/models.yml` and verified selector notes on top of the mechanism in [Model profiles](./models.md#model-profiles---mpreset). Everything here is **user config**; it complements the built-in `--mpreset` presets and overrides a built-in only when it shares its exact name.\n\n> Selectors, prices, and \"axis leaders\" are catalog- and time-sensitive (observed 2026-06 on the current bundled catalog). Re-verify any selector with `gjc -p --no-session --no-tools --model <selector> \"Reply OK\"`.\n\n## The five roles\n\n`default` runs the main loop and most turns; `executor` / `architect` / `planner` / `critic` are the four bundled task agents, delegated only when the work calls for it.\n\n| Role | What it optimizes for |\n| --- | --- |\n| `default` | tool-calling reliability + honesty (it routes — its quality bounds the whole system) |\n| `executor` | real coding (SWE-bench Verified) |\n| `planner` | reasoning + sequencing (GPQA / ARC-AGI-2) |\n| `architect` | large-context + multimodal review |\n| `critic` | independent adversarial review (different family from what it reviews) |\n\n## Pick by what you subscribe to\n\n| You have | Use |\n| --- | --- |\n| **One vendor** | the built-in preset for that vendor — `claude-opus` (Anthropic), `codex-{eco,medium,pro}` (OpenAI/Codex), `opencodego` (OpenCode Go), or a single-vendor flagship tier (`zai/glm-5.2`, `kimi-code/...`, `xiaomi/...`, `xai/grok-4.3`, `minimax-code/...`). These already map all five roles inside one vendor. |\n| **Claude + Codex** | the built-in `opus-codex` (Claude main loop + Codex support roles). |\n| **Three or more / all five** | the cross-vendor profiles below — each role on its axis leader, `critic` kept cross-family. |\n\nThe single guiding rule across all of these: **keep `default` on the strongest router you have** (Anthropic Opus when available). A weak `default` caps quality regardless of the delegated models.\n\n## Cross-vendor profiles (3+ vendors)\n\nNo single vendor leads every axis, so these put each role on its axis leader and keep `critic` on a different family from the `executor` it reviews.\n\n```yaml\nprofiles:\n\n daily: # everyday balance\n required_providers: [anthropic, openai-codex, google-antigravity, xai]\n model_mapping:\n default: anthropic/claude-opus-4-8:medium\n executor: openai-codex/gpt-5.4:high\n planner: google-antigravity/gemini-3.1-pro-low:high\n architect: google-antigravity/gemini-3.1-pro-low:high\n critic: xai/grok-4.3:medium\n\n ultimate: # cost-no-object, best per role\n required_providers: [anthropic, openai-codex, google-antigravity, xai]\n model_mapping:\n default: anthropic/claude-opus-4-8:high\n executor: anthropic/claude-opus-4-8:max\n planner: openai-codex/gpt-5.5:xhigh\n architect: google-antigravity/gemini-3.1-pro-low:high\n critic: xai/grok-4.3:high\n\n eco: # cheapest delegated work; main loop stays on Opus\n required_providers: [anthropic, opencode-go, google-antigravity, xai]\n model_mapping:\n default: anthropic/claude-opus-4-8:low\n executor: opencode-go/deepseek-v4-flash\n planner: xai/grok-4-1-fast:high\n architect: google-antigravity/gemini-3.1-pro-low\n critic: google-antigravity/gemini-3.5-flash\n\n monorepo: # huge codebases (openai-codex excluded: 272k context cap)\n required_providers: [anthropic, google-antigravity, opencode-go]\n model_mapping:\n default: anthropic/claude-opus-4-8:medium\n executor: anthropic/claude-opus-4-8:high\n planner: google-antigravity/gemini-3.1-pro-low:high\n architect: anthropic/claude-opus-4-8:high\n critic: opencode-go/glm-5.2\n```\n\n## Model cheatsheet (by need)\n\nCurrent axis leaders and the cheaper second option, with metered price ($/1M in/out; Gemini via Antigravity runs on the Google AI subscription):\n\n| Need | First pick | Cheaper option |\n| --- | --- | --- |\n| Router / tool-calling (`default`) | `anthropic/claude-opus-4-8` (5/25) | `anthropic/claude-sonnet-4-6` (3/15) |\n| Coding (`executor`) | `anthropic/claude-opus-4-8` — SWE-bench Verified ~88.6 (5/25) | `openai-codex/gpt-5.4` (2.5/15) · `opencode-go/deepseek-v4-flash` (0.14/0.28) |\n| Reasoning (`planner`) | `openai-codex/gpt-5.5` (ARC-AGI-2) / `google-antigravity/gemini-3.1-pro-low:high` (GPQA) | `xai/grok-4-1-fast` (0.2/0.5) |\n| Large context (`architect`) | `anthropic/claude-opus-4-8` (effective long-context) | `xai/grok-4-fast` (2M nominal, 0.2/0.5) |\n| Multimodal review (`architect`) | `google-antigravity/gemini-3.1-pro-low:high` | `google-antigravity/gemini-3.5-flash` |\n| Independent critic | `xai/grok-4.3` (1.25/2.5) | `opencode-go/glm-5.2` · `google-antigravity/gemini-3.5-flash` |\n\nOn standard tasks, all current frontier models in the catalog are accurate; **pick by cost, latency, and role fit, not by raw accuracy on easy prompts.** As an indicative GJC-routed latency reference (`gjc -p`, identical coding + reasoning prompts, all correct): `grok-4.3` and `glm-5.2` ≈ 2–3s, `deepseek-v4-pro` ≈ 3–4s, `claude-opus-4-8` / `gpt-5.5` ≈ 4–7s, `gemini-3.1-pro-low:high` ≈ 7s.\n\n## Verified selector notes (current catalog)\n\nObserved via live `gjc -p` calls; useful when wiring the profiles above:\n\n- **Antigravity Gemini, high reasoning** → use `google-antigravity/gemini-3.1-pro-low:high`. The id `gemini-3.1-pro-high` returns HTTP 400 (no matching backend model); `thinkingLevel` is a per-request parameter, so raising it on `gemini-3.1-pro-low` invokes the model's native high-reasoning mode rather than a degraded one.\n- **openai-codex on a ChatGPT account** serves base GPT only (`gpt-5.5`, `gpt-5.4`). Standalone `-codex` variants (`gpt-5.3-codex`, `gpt-5.2-codex`, `gpt-5.1-codex-max` / `-mini`) return `not supported when using Codex with a ChatGPT account`.\n- **Single-message input limit is separate from the context window.** `claude-opus-4-8` runs with a 1M window via multi-turn accumulation, but a single `@file` message above ~400k tokens returns 400 on `anthropic` / `google-antigravity`; `xai` / `opencode-go` accept larger single messages. Chunk very large inputs across turns instead of pasting one block.\n- **Some selectors come from a provider's live catalog, not the bundled snapshot.** `opencode-go/glm-5.2` and `google-antigravity/gemini-3.5-flash` resolved in `gjc -p` tests but are **not** in `packages/ai/src/models.json`; they appear only after the provider's online model discovery has populated the registry. `required_providers` verifies credentials at activation — it does **not** guarantee fresh, non-stale discovery — so activation can still fail with `selector did not resolve` until discovery runs (re-login or retry to refresh). If you hit that, substitute a bundled id: `opencode-go/deepseek-v4-pro` for the critic, or `zai/glm-5.2` (add `zai` to `required_providers`) for GLM 5.2.\n\n## Activation\n\n```bash\ngjc --mpreset daily # this session only\ngjc --mpreset ultimate --default # persist as the startup default (config.yml)\n```\n\nActivation hard-blocks when any provider in `required_providers` lacks credentials, so log in first: `/login anthropic`, `/login openai-codex`, `/login google-antigravity`, `/login xai` (and `opencode-go` via `OPENCODE_API_KEY`).\n",
|
|
33
33
|
"native-ffi-optimization-policy.md": "# ADR: Native FFI Optimization Policy\n\n- Status: Accepted\n- Scope: `crates/pi-natives` algorithmic ports proposed for performance reasons\n- Related: [`porting-to-natives.md`](./porting-to-natives.md), [`natives-architecture.md`](./natives-architecture.md), [`natives-binding-contract.md`](./natives-binding-contract.md), [`cpu-hotspot-map.json`](./cpu-hotspot-map.json), [`hotspot-map-successor.md`](./hotspot-map-successor.md)\n\n## Decision\n\nA new native (Rust N-API / FFI) port proposed **to optimize a leftover hot path** does not land unless **all** of the following gates pass:\n\n1. **Corpus evidence** — a profiling-corpus trace shows the path has user-visible latency or RSS impact on a representative workload (not just a static complexity argument).\n2. **Self-time attribution** — a `profilerSelfTime` artifact identifies the proposed hotspot, **or** fallback-toggle evidence proves an end-to-end benefit without byte changes. Wall-clock proxy timing alone is never sufficient.\n3. **Measured FFI overhead** — the N-API call/marshalling overhead is measured against the JS/TS baseline, not assumed away.\n4. **Representative win** — a representative p50/p95 win exists on realistic inputs, not only microbenchmark seed results.\n5. **Byte parity** — a byte-identical corpus covers rendered, persisted, and provider-visible bytes for the changed path.\n6. **Operational cost** — fallback, packaging, and rollback costs are documented.\n\nThis policy governs **speculative algorithmic ports**. It does **not** re-litigate already-native platform/system surfaces (see [Scope boundary](#scope-boundary)).\n\n## Context\n\nThe CPU/memory hotspot program (Optimization Suites v1–v3, tracked in [`cpu-hotspot-map.json`](./cpu-hotspot-map.json)) is closed out. Its prioritization was a **static structural ranking** (algorithmic complexity × trigger frequency), and the map's own `method` field records that real CPU self-time was \"to be measured by the agreed profiling corpus during optimization.\" That corpus is being built separately; until its evidence exists, new native ports for leftover hotspots would repeat the same evidence gap.\n\nThe suites already produced concrete decisions that this policy codifies so they are not re-discovered:\n\n- **v2 (#530)** measured and **rejected the five remaining Rust port candidates** per the FFI cost gates after shipping only `diffLines` (H03) natively. Native overhead did not beat the JS/TS baseline for those candidates on realistic inputs.\n- **v3 (#558) rejected a native word-diff (H04)** \"without a fresh FFI gate\" — the TS fast paths were retained instead; a native port would need to re-clear gates 1–6 above.\n- **Hunt-Szymanski LCS (H05)** was implemented as a native/algorithmic replacement, then **reverted** because it produced byte-different rendered diffs (reproduced by red-team). Byte parity is the gate, not raw speed.\n- **The custom JSON length counter (H08)** was implemented, made exact, then **deleted** — an exact JS reimplementation was not faster than native `JSON.stringify`. \"More native\" is not automatically \"faster.\"\n\nThese four precedents share a root cause: a plausible algorithmic/native win that failed a real gate (cost, byte parity, or end-to-end benefit). The policy makes those gates a precondition rather than a post-hoc discovery.\n\n## Evidence taxonomy\n\nNative-port claims must classify their evidence using the same separated classes as the profiling corpus. These classes must never be conflated:\n\n- **`wallClockPhase`** — elapsed timing around a phase or operation. Useful for perceived-latency and regression detection; **insufficient** to confirm CPU self-time or to justify a port on its own.\n- **`processCpuUsage`** — `process.cpuUsage()` user/system deltas, optionally normalized by elapsed time. Indicates process-level CPU pressure; **cannot** attribute self-time to a specific hotspot.\n- **`profilerSelfTime`** — profiler (or equivalent sampled/trace) attribution of self-time to a function, module, or native symbol. **Required** before a hotspot may be called \"CPU-self-time confirmed.\"\n\nA native-optimization proposal that cites only `wallClockPhase` or `processCpuUsage` is **not** CPU-self-time confirmed and does not clear gate 2.\n\n## Approval checklist\n\nBefore opening a native-optimization PR, confirm and attach evidence for each:\n\n- [ ] Corpus trace shows user-visible latency or RSS impact for the path (gate 1).\n- [ ] `profilerSelfTime` artifact identifies the hotspot, **or** fallback-toggle before/after evidence proves end-to-end benefit without byte changes (gate 2).\n- [ ] FFI/marshalling overhead measured vs the JS/TS baseline in the same benchmark run (gate 3).\n- [ ] Representative p50/p95 win on realistic inputs, not only seeded microbench results (gate 4).\n- [ ] Byte-identical corpus covers rendered, persisted, and provider-visible bytes (gate 5).\n- [ ] Fallback, packaging (platform variants / embedded addon), and rollback costs documented (gate 6).\n\nIf any box is unchecked, keep the work in TypeScript or hold it as a tracked candidate; do not switch callsites. This mirrors the existing **Rule of thumb** in [`porting-to-natives.md`](./porting-to-natives.md): if native is not faster *and* behavior-compatible, do not switch callsites.\n\n## Scope boundary\n\nThis policy targets **speculative algorithmic ports**, not the established native surface. The following are **already native** by design and are explicitly out of scope (see `alreadyNativeExcluded` in [`cpu-hotspot-map.json`](./cpu-hotspot-map.json)):\n\n`grep`, `fd`/`glob`, text width/wrap/truncate/slice, syntax highlighting, HTML→Markdown, token counting, AST, summary, process/PTY/shell, SIXEL, clipboard, `Bun.hash.xxHash32/64`, and `JSON.parse`/`JSON.stringify`.\n\nThese are native because they are I/O, OS/process integration, or platform primitives — the criteria in [`porting-to-natives.md`](./porting-to-natives.md#when-to-port). Distinguishing them from algorithmic ports matters: a leftover algorithmic hotspot must clear gates 1–6, whereas adding a new OS/process/native-primitive binding follows the standard porting guide.\n\n## Consequences\n\n- New native algorithmic ports require profiling-corpus evidence and a measured cost gate before review; this slows speculative optimization but prevents byte-parity regressions and dead native code.\n- The default answer for a leftover hotspot is \"keep it in TypeScript\" until the corpus proves it matters.\n- Already-native platform/system primitives and new OS/process bindings are unaffected; they follow [`porting-to-natives.md`](./porting-to-natives.md) as before.\n- Reviewers can reject a native-optimization PR purely on a missing gate, citing this ADR, without re-deriving the rationale.\n\n## Follow-ups\n\n- Held native candidates (H04 word-diff, H05 LCS, and other v2-rejected candidates) stay held unless a future PR clears gates 1–6 with fresh corpus evidence.\n- When the profiling corpus lands, link its threshold/evidence ledger here so native-port proposals can cite concrete corpus artifacts.\n",
|
|
34
34
|
"natives-addon-loader-runtime.md": "# Natives Addon Loader Runtime\n\nThis document covers the runtime loader shipped by `@gajae-code/natives`: how `native/index.js` decides which `.node` file to require, how compiled-binary embedded payloads are extracted, and what startup failures report.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/native/embedded-addon.js`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/package.json`\n\n## Scope and responsibility\n\nThe loader is intentionally narrow:\n\n- Build a platform/CPU-aware candidate list for addon filenames and directories.\n- Treat an embedded-addon manifest as the authoritative compiled-binary signal when present.\n- Optionally materialize an embedded addon into a versioned per-user cache directory.\n- Attempt candidates in deterministic order and return the first addon that `require(...)` loads.\n\nThe current loader does **not** run a separate `validateNative(...)` export-presence gate. API shape is provided by the generated N-API binding file (`native/index.d.ts`) and the loaded addon itself. A stale binary therefore normally fails as a missing property or native load error rather than as a custom \"missing exports\" validation error.\n\n## Runtime inputs and derived state\n\nAt module initialization, `native/index.js` computes:\n\n- **Platform tag**: `${process.platform}-${process.arch}` (for example `darwin-arm64`).\n- **Package version**: from `packages/natives/package.json`.\n- **Core directories**:\n - `nativeDir`: package-local `packages/natives/native`.\n - `execDir`: directory containing `process.execPath`.\n - `versionedDir`: `<getNativesDir()>/<packageVersion>`.\n - `userDataDir` fallback:\n - Windows: `%LOCALAPPDATA%/gjc` or `%USERPROFILE%/AppData/Local/gjc`.\n - Non-Windows: `~/.local/bin`.\n- **Natives cache root** (`getNativesDir()`):\n - if `$XDG_DATA_HOME/gjc` exists, `$XDG_DATA_HOME/gjc/natives`;\n - otherwise `~/.gjc/natives`.\n- **Compiled-binary mode** (`detectCompiledBinary`): true if any of:\n - embedded-addon manifest is non-null,\n - `GJC_COMPILED` env var is set,\n - `import.meta.url` contains Bun embedded markers (`$bunfs`, `~BUN`, `%7EBUN`).\n- **Variant override**: `GJC_NATIVE_VARIANT` (`modern`/`baseline` only; invalid values ignored).\n- **Selected variant**: explicit override, otherwise runtime AVX2 detection on x64 (`modern` if AVX2, else `baseline`).\n\n## Platform support and tag resolution\n\n`SUPPORTED_PLATFORMS` is fixed to:\n\n- `linux-x64`\n- `linux-arm64`\n- `darwin-arm64`\n- `win32-x64`\n\nUnsupported platforms are not rejected before probing. The loader first tries the computed candidate paths. If all fail and `platformTag` is unsupported, it throws an unsupported-platform error listing supported tags.\n\n## Variant selection (`modern` / `baseline` / default)\n\n### x64 behavior\n\n1. `GJC_NATIVE_VARIANT=modern|baseline` wins when valid.\n2. Otherwise AVX2 support is detected:\n - Linux: scan `/proc/cpuinfo` for `avx2`.\n - macOS: `sysctl -n machdep.cpu.leaf7_features`, then `machdep.cpu.features`.\n - Windows: PowerShell `[System.Runtime.Intrinsics.X86.Avx2]::IsSupported`.\n3. AVX2 selects `modern`; unavailable or undetectable AVX2 selects `baseline`.\n\n### Non-x64 behavior\n\nNo variant suffix is used; the filename is `pi_natives.<platform>-<arch>.node`.\n\n### Filename construction\n\n`loader-state.js#getAddonFilenames` returns:\n\n- Non-x64 or no variant: `pi_natives.<tag>.node`\n- x64 + `modern`:\n 1. `pi_natives.<tag>-modern.node`\n 2. `pi_natives.<tag>-baseline.node`\n 3. `pi_natives.<tag>.node`\n- x64 + `baseline`:\n 1. `pi_natives.<tag>-baseline.node`\n 2. `pi_natives.<tag>.node`\n\nThe default unsuffixed fallback remains part of the x64 candidate list.\n\n## Candidate path construction and fallback ordering\n\n`resolveLoaderCandidates(...)` expands every filename across directories, then de-duplicates while preserving first occurrence order.\n\n### Non-compiled runtime\n\nFor each filename, candidates are:\n\n1. `<nativeDir>/<filename>`\n2. `<execDir>/<filename>`\n\n### Compiled runtime\n\nFor each filename, candidates are:\n\n1. `<versionedDir>/<filename>`\n2. `<userDataDir>/<filename>`\n3. `<nativeDir>/<filename>`\n4. `<execDir>/<filename>`\n\nAt load time, an extracted embedded candidate, when produced, is prepended ahead of these de-duplicated candidates.\n\n## Embedded addon extraction lifecycle\n\n`embedded-addon.js` is generated by `scripts/embed-native.ts`. The reset stub exports `embeddedAddon = null`. A populated manifest has:\n\n- `platformTag`\n- `version`\n- `files[]` entries with `variant`, `filename`, and `filePath`\n\nExtraction (`maybeExtractEmbeddedAddon`) runs only when:\n\n1. compiled-binary mode is true,\n2. `embeddedAddon` is non-null,\n3. manifest `platformTag` equals the runtime platform tag,\n4. manifest `version` equals the package version,\n5. a variant-appropriate embedded file exists.\n\nVariant file selection:\n\n- Non-x64: prefer `default`, then first available file.\n- x64 + `modern`: prefer `modern`, fallback to `baseline`.\n- x64 + `baseline`: require `baseline`.\n\nMaterialization:\n\n1. Ensure `<versionedDir>` exists.\n2. Reuse `<versionedDir>/<selected filename>` if it already exists.\n3. Otherwise read `selectedEmbeddedFile.filePath` and write the target path.\n4. Return the target path as the first candidate.\n\nDirectory creation or write failures are appended to the loader error list; probing continues through normal candidates.\n\n## Lifecycle and state transitions\n\n```text\nInit\n -> Load package metadata and embedded-addon manifest\n -> Compute platform/version/variant/filenames/candidate paths\n -> (compiled + embedded manifest matches?)\n yes -> try extract to versionedDir (record errors, continue)\n no -> skip extraction\n -> For each runtime candidate in order:\n require(candidate)\n -> success: return addon exports (READY)\n -> failure: record error, continue\n -> none loaded:\n if unsupported platform tag -> throw Unsupported platform\n else -> throw Failed to load (tried-path diagnostics + hints)\n```\n\n## Failure behavior and diagnostics\n\n### Unsupported platform\n\nIf all candidates fail and `platformTag` is not supported, the loader throws:\n\n- `Unsupported platform: <tag>`\n- supported platform list\n- issue-reporting guidance\n\n### No loadable candidate\n\nIf the platform is supported but no candidate can be loaded, the final error includes:\n\n- `Failed to load pi_natives native addon for <platformTag>` or `<platformTag> (<variant>)`\n- every attempted path with the corresponding `require(...)` error\n- mode-specific remediation hints\n\n### Compiled-binary startup failures\n\nCompiled mode diagnostics include:\n\n- expected versioned cache target paths (`<versionedDir>/<filename>`),\n- remediation to delete the versioned cache and rerun,\n- direct release download `curl` commands for each expected filename.\n\n### Non-compiled startup failures\n\nNormal package/runtime diagnostics include:\n\n- reinstall hint (`bun install @gajae-code/natives`),\n- local rebuild command (`bun --cwd=packages/natives run build`),\n- optional x64 variant build hint (`TARGET_VARIANT=baseline|modern bun --cwd=packages/natives run build`).\n",
|
|
35
35
|
"natives-architecture.md": "# Natives Architecture\n\n`@gajae-code/natives` is now a two-layer package around a loader:\n\n1. **CommonJS loader/package entrypoint** resolves and loads the correct `.node` addon and patches generated enum objects onto the export object.\n2. **Rust N-API module layer** implements the exported functions/classes and emits the generated TypeScript declarations.\n\nThis document is the foundation for deeper module-level docs. Performance-motivated native ports of leftover algorithmic hot paths are additionally gated by [`native-ffi-optimization-policy.md`](./native-ffi-optimization-policy.md).\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/index.d.ts`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/native/embedded-addon.js`\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/scripts/gen-enums.ts`\n- `packages/natives/package.json`\n- `crates/pi-natives/src/lib.rs`\n\n## Package entrypoint and public surface\n\n`packages/natives/package.json` points directly at generated native bindings:\n\n- `main`: `./native/index.js`\n- `types`: `./native/index.d.ts`\n- `exports[\".\"].types`: `./native/index.d.ts`\n- `exports[\".\"].import`: `./native/index.js`\n\nThere is no current `packages/natives/src` TypeScript wrapper layer. Consumers import functions/classes/enums directly from `@gajae-code/natives`; the type contract is the generated `native/index.d.ts` plus enum exports appended by `scripts/gen-enums.ts`.\n\nCurrent capability groups in the generated API include:\n\n- **Search/text/code primitives**: `grep`, `search`, `hasMatch`, `fuzzyFind`, `glob`, `astGrep`, `astEdit`, text width/slicing/wrapping/sanitization, syntax highlighting, token counting.\n- **Execution/process/terminal primitives**: `executeShell`, `Shell`, `PtySession`, process-tree helpers, key parsing.\n- **System/media/conversion primitives**: clipboard, image resize/encode/SIXEL, HTML-to-Markdown, macOS appearance/power helpers, work profiling, Windows ProjFS overlay helpers.\n\n## Loader layer\n\n`packages/natives/native/index.js` owns runtime addon selection and optional embedded extraction.\n\n### Candidate resolution model\n\n- Platform tag is `${process.platform}-${process.arch}`.\n- Supported tags are currently:\n - `linux-x64`\n - `linux-arm64`\n - `darwin-arm64`\n - `win32-x64`\n- x64 can use CPU variants:\n - `modern` (AVX2-capable)\n - `baseline` (fallback)\n- Non-x64 uses the default filename without a variant suffix.\n\nFilename strategy:\n\n- Default: `pi_natives.<platform>-<arch>.node`\n- x64 variant: `pi_natives.<platform>-<arch>-modern.node` or `...-baseline.node`\n- x64 runtime fallback includes the unsuffixed default filename after variant candidates.\n\n### Platform-specific variant detection\n\nFor x64, variant selection uses:\n\n- Linux: `/proc/cpuinfo`\n- macOS: `sysctl -n machdep.cpu.leaf7_features`, then `machdep.cpu.features`\n- Windows: PowerShell check for `System.Runtime.Intrinsics.X86.Avx2`\n\n`GJC_NATIVE_VARIANT` can force `modern` or `baseline`; invalid values are ignored.\n\n### Binary distribution and extraction model\n\n`packages/natives/package.json` publishes `native/`, which contains the loader, generated declarations, generated enum patch, embedded-addon manifest stub, and prebuilt `.node` artifacts.\n\nFor compiled binaries, loader behavior is:\n\n1. Check versioned user cache path: `<getNativesDir()>/<packageVersion>/...`.\n2. Check legacy compiled-binary location:\n - Windows: `%LOCALAPPDATA%/gjc` (fallback `%USERPROFILE%/AppData/Local/gjc`)\n - non-Windows: `~/.local/bin`\n3. Fall back to packaged `native/` and executable directory candidates.\n\n`getNativesDir()` uses `$XDG_DATA_HOME/gjc/natives` when `$XDG_DATA_HOME/gjc` exists; otherwise it uses `~/.gjc/natives`.\n\nIf a populated embedded addon manifest is present, it is also treated as a compiled-binary signal. The loader can extract the matching embedded `.node` into the versioned cache directory before candidate probing.\n\n### Failure modes\n\nLoader failures are explicit:\n\n- **Unsupported platform tag**: after failed probing, throws with supported platform list.\n- **No loadable candidate**: throws with all attempted paths and remediation hints.\n- **Embedded extraction errors**: directory/write failures are recorded and included in final load diagnostics if no candidate loads.\n\nThe current loader does not perform a separate post-`require` export validation pass.\n\n## Rust N-API module layer\n\n`crates/pi-natives/src/lib.rs` declares exported module ownership:\n\n- `appearance`\n- `ast`\n- `clipboard`\n- `fd`\n- `fs_cache`\n- `glob`\n- `glob_util`\n- `grep`\n- `highlight`\n- `html`\n- `image`\n- `keys`\n- `language`\n- `power`\n- `prof`\n- `projfs_overlay`\n- `ps`\n- `pty`\n- `shell`\n- `task`\n- `text`\n- `tokens`\n- `utils` (crate-private helpers)\n\nN-API exports are generated from Rust `#[napi]` functions/classes/objects/enums. Snake_case Rust names are exposed as camelCase JavaScript names unless explicitly configured by napi-rs.\n\n## Ownership boundaries\n\n- **Loader/package ownership (`packages/natives/native`, `packages/natives/scripts`)**\n - runtime binary selection\n - CPU variant selection and override handling\n - compiled-binary embedded extraction\n - generated TypeScript declarations and enum export patching\n- **Rust ownership (`crates/pi-natives/src`)**\n - algorithmic and system-level implementation\n - platform-native behavior and performance-sensitive logic\n - N-API symbol implementation consumed directly by package callers\n- **Consumer ownership (`packages/coding-agent`, `packages/tui`)**\n - user-facing policy and fallbacks that are not built into the native API\n - higher-level rendering, artifact, shell-session, and command behavior\n\n## Runtime flow (high level)\n\n1. Consumer imports from `@gajae-code/natives`.\n2. `native/index.js` computes platform/arch/variant and candidate paths.\n3. Optional embedded binary extraction occurs for compiled distributions.\n4. The first `require(candidate)` that succeeds becomes the exported addon object.\n5. Generated enum objects are appended to `module.exports`.\n6. Caller invokes generated N-API functions/classes directly.\n\n## Glossary\n\n- **Native addon**: A `.node` binary loaded via Node-API (N-API).\n- **Platform tag**: Runtime tuple `platform-arch` (for example `darwin-arm64`).\n- **Variant**: x64 CPU-specific build flavor (`modern` AVX2, `baseline` fallback).\n- **Generated binding declaration**: `native/index.d.ts` emitted by napi-rs during `build-native.ts`.\n- **Compiled binary mode**: Runtime mode where the CLI is bundled and native addons are resolved from embedded/cache paths before package-local paths.\n- **Embedded addon**: Build artifact metadata and file references generated into `native/embedded-addon.js` so compiled binaries can extract matching `.node` payloads.\n",
|
|
@@ -60,7 +60,6 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
|
|
|
60
60
|
"session-switching-and-recent-listing.md": "# Session switching and recent session listing\n\nThis document describes how coding-agent discovers recent sessions, resolves `--resume` targets, presents session pickers, and switches the active runtime session.\n\nIt focuses on current implementation behavior, including fallback paths and caveats.\n\n## Implementation files\n\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/cli/session-picker.ts`](../packages/coding-agent/src/cli/session-picker.ts)\n- [`../src/modes/components/session-selector.ts`](../packages/coding-agent/src/modes/components/session-selector.ts)\n- [`../src/modes/controllers/selector-controller.ts`](../packages/coding-agent/src/modes/controllers/selector-controller.ts)\n- [`../src/main.ts`](../packages/coding-agent/src/main.ts)\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/modes/interactive-mode.ts`](../packages/coding-agent/src/modes/interactive-mode.ts)\n- [`../src/modes/utils/ui-helpers.ts`](../packages/coding-agent/src/modes/utils/ui-helpers.ts)\n\n## Recent-session discovery\n\n### Directory scope\n\n`SessionManager` stores sessions under a cwd-scoped directory by default:\n\n- `~/.gjc/agent/sessions/--<cwd-encoded>--/*.jsonl`\n\n`SessionManager.list(cwd, sessionDir?)` reads only that directory unless an explicit `sessionDir` is provided.\n\n### Two listing paths with different payloads\n\nThere are two different listing pipelines:\n\n1. `getRecentSessions(sessionDir, limit)` (welcome/summary view)\n - Reads only a 4KB prefix (`readTextPrefix(..., 4096)`) from each file.\n - Parses header + earliest user text preview.\n - Returns lightweight `RecentSessionInfo` with lazy `name` and `timeAgo` getters.\n - Sorts by file `mtime` descending.\n\n2. `SessionManager.list(...)` / `SessionManager.listAll()` (resume pickers and ID matching)\n - Reads full session files.\n - Builds `SessionInfo` objects (`id`, `cwd`, `title`, `messageCount`, `firstMessage`, `allMessagesText`, timestamps).\n - Drops sessions with zero `message` entries.\n - Sorts by `modified` descending.\n\n### Metadata fallback behavior\n\nFor recent summaries (`RecentSessionInfo`):\n\n- display name preference: `header.title` -> first user prompt -> `header.id` -> filename\n- name is truncated to 40 chars for compact displays\n- control characters/newlines are stripped/sanitized from title-derived names\n\nFor `SessionInfo` list entries:\n\n- `title` is `header.title` or latest compaction `shortSummary`\n- `firstMessage` is first user message text or `\"(no messages)\"`\n\n## `--continue` resolution and terminal breadcrumb preference\n\n`SessionManager.continueRecent(cwd, sessionDir?)` resolves the target in this order:\n\n1. Read terminal-scoped breadcrumb (`~/.gjc/agent/terminal-sessions/<terminal-id>`)\n2. Validate breadcrumb:\n - current terminal can be identified\n - breadcrumb cwd matches current cwd (resolved path compare)\n - referenced file still exists\n3. If breadcrumb is invalid/missing, fall back to newest file by mtime in the session dir (`findMostRecentSession`)\n4. If none found, create a new session\n\nTerminal ID derivation prefers TTY path and falls back to env-based identifiers (`KITTY_WINDOW_ID`, `TMUX_PANE`, `TERM_SESSION_ID`, `WT_SESSION`).\n\nBreadcrumb writes are best-effort and non-fatal.\n\n## Startup-time resume target resolution (`main.ts`)\n\n### `--resume <value>`\n\n`createSessionManager(...)` handles string-valued `--resume` in two modes:\n\n1. Path-like value (contains `/`, `\\\\`, or ends with `.jsonl`)\n - direct `SessionManager.open(sessionArg, parsed.sessionDir)`\n\n2. ID prefix value\n - find match in `SessionManager.list(cwd, sessionDir)` by `id.startsWith(sessionArg)`\n - if no local match and `sessionDir` is not forced, try `SessionManager.listAll()`\n - first match is used (no ambiguity prompt)\n\nCross-project match behavior:\n\n- if matched session cwd differs from current cwd, CLI prompts whether to fork into current project\n- yes -> `SessionManager.forkFrom(...)`\n- no -> throws error (`Session \"...\" is in another project (...)`)\n\nNo match -> throws error (`Session \"...\" not found.`).\n\n### `--resume` (no value)\n\nHandled after initial session-manager construction:\n\n1. list local sessions with `SessionManager.list(cwd, parsed.sessionDir)`\n2. if empty: print `No sessions found` and exit early\n3. open TUI picker (`selectSession`)\n4. if canceled: print `No session selected` and exit early\n5. if selected: `SessionManager.open(selectedPath)`\n\n### `--continue`\n\nUses `SessionManager.continueRecent(...)` directly (breadcrumb-first behavior above).\n\n## Picker-based selection internals\n\n## CLI picker (`src/cli/session-picker.ts`)\n\n`selectSession(sessions)` creates a standalone TUI with `SessionSelectorComponent` and resolves exactly once:\n\n- selection -> resolves selected path\n- cancel (Esc) -> resolves `null`\n- hard exit (Ctrl+C path) -> stops TUI and `process.exit(0)`\n\n## Interactive in-session picker (`SelectorController.showSessionSelector`)\n\nFlow:\n\n1. fetch sessions from current session dir via `SessionManager.list(currentCwd, currentSessionDir)`\n2. mount `SessionSelectorComponent` in editor area using `showSelector(...)`\n3. callbacks:\n - select -> close selector and call `handleResumeSession(sessionPath)`\n - cancel -> restore editor and rerender\n - exit -> `ctx.shutdown()`\n\n## Session selector component behavior\n\n`SessionList` supports:\n\n- arrow/page navigation\n- Enter to select\n- Esc to cancel\n- Ctrl+C to exit\n- fuzzy search across session id/title/cwd/first message/all messages/path\n\nEmpty-list render behavior:\n\n- renders a message instead of crashing\n- Enter on empty does nothing (no callback)\n- Esc/Ctrl+C still work\n\nCaveat: UI text says `Press Tab to view all`, but this component currently has no Tab handler and current wiring only lists current-scope sessions.\n\n## Runtime switch execution (`AgentSession.switchSession`)\n\n`switchSession(sessionPath)` is the core in-process switch path.\n\nLifecycle/state transition:\n\n1. capture `previousSessionFile`\n2. emit `session_before_switch` hook event (`reason: \"resume\"`, cancellable)\n3. if canceled -> return `false` with no switch\n4. disconnect from current agent event stream\n5. abort active generation/tool flow\n6. clear queued steering/follow-up/next-turn message buffers\n7. flush session writer (`sessionManager.flush()`) to persist pending writes\n8. `sessionManager.setSessionFile(sessionPath)`\n - updates session file pointer\n - writes terminal breadcrumb\n - loads entries / migrates / blob-resolves / reindexes\n - if missing/invalid file data: initializes a new session at that path and rewrites header\n9. update `agent.sessionId`\n10. rebuild display context via `buildDisplaySessionContext()`\n11. restore persisted/discovered MCP tool selections and rebuild active tools/system prompt when discovery is enabled\n12. emit `session_switch` hook event (`reason: \"resume\"`, `previousSessionFile`)\n13. replace agent messages with rebuilt context and sync todos\n14. close provider sessions when switching to a different session or when same-session reload changed replay messages\n15. restore default model from `sessionContext.models.default` if available and present in model registry\n16. restore thinking level and service tier:\n - thinking uses persisted `thinking_level_change`, otherwise the configured default clamped to model capability\n - service tier uses persisted `service_tier_change`, otherwise the configured `serviceTier` setting (`\"none\"` becomes unset)\n17. reconnect agent listeners and return `true`\n\n## UI state rebuild after interactive switch\n\n`SelectorController.handleResumeSession` performs UI reset around `switchSession`:\n\n- stop loading animation\n- clear status container\n- clear pending-message UI and pending tool map\n- reset streaming component/message references\n- call `session.switchSession(...)`\n- clear chat container and rerender from session context (`renderInitialMessages`)\n- reload todos from new session artifacts\n- show `Resumed session`\n\nSo visible conversation/todo state is rebuilt from the new session file.\n\n## Startup resume vs in-session switch\n\n### Startup resume (`--continue`, `--resume`, direct open)\n\n- Session file is chosen before `createAgentSession(...)`.\n- `sdk.ts` builds `existingSession = sessionManager.buildSessionContext()`.\n- Agent messages are restored once during session creation.\n- Model/thinking are selected during creation (including restore/fallback logic).\n- Interactive mode then runs `#restoreModeFromSession()` to re-enter persisted mode state (currently plan/plan_paused).\n\n### In-session switch (`/resume`-style selector path)\n\n- Uses `AgentSession.switchSession(...)` on an already-running `AgentSession`.\n- Messages/model/thinking are rebuilt immediately in place.\n- Hook `session_before_switch`/`session_switch` events are emitted.\n- UI chat/todos are refreshed.\n- No dedicated post-switch mode restore call is made in selector flow; mode re-entry behavior is not symmetric with startup `#restoreModeFromSession()`.\n\n## Failure and edge-case behavior\n\n### Cancellation paths\n\n- CLI picker cancel -> returns `null`, caller prints `No session selected`, process exits early.\n- Interactive picker cancel -> editor restored, no session change.\n- Hook cancellation (`session_before_switch`) -> `switchSession()` returns `false`.\n\n### Empty list paths\n\n- CLI `--resume` (no value): empty list prints `No sessions found` and exits.\n- Interactive selector: empty list renders message and remains cancellable.\n\n### Missing/invalid target session file\n\nWhen opening/switching to a specific path (`setSessionFile`):\n\n- ENOENT -> treated as empty -> new session initialized at that exact path and persisted.\n- malformed/invalid header (or effectively unreadable parsed entries) -> treated as empty -> new session initialized and persisted.\n\nThis is recovery behavior, not hard failure.\n\n### Hard failures\n\nSwitch/open can still throw on true I/O failures (permission errors, rewrite failures, etc.), which propagate to callers.\n\n### ID prefix matching caveats\n\n- ID matching uses `startsWith` and takes first match in sorted list.\n- No ambiguity UI if multiple sessions share prefix.\n- `SessionManager.list(...)` excludes sessions with zero messages, so those sessions are not resumable via ID match/list picker.\n",
|
|
61
61
|
"session-tree-plan.md": "# Session tree architecture (current)\n\nReference: [session.md](../docs/session.md)\n\nThis document describes how session tree navigation works today: in-memory tree model, leaf movement rules, branching behavior, and extension/event integration.\n\n## What this subsystem is\n\nThe session is stored as an append-only entry log, but runtime behavior is tree-based:\n\n- Every non-header entry has `id` and `parentId`.\n- The active position is `leafId` in `SessionManager`.\n- Appending an entry always creates a child of the current leaf.\n- Branching does **not** rewrite history; it only changes where the leaf points before the next append.\n\nKey files:\n\n- `src/session/session-manager.ts` — tree data model, traversal, leaf movement, branch/session extraction\n- `src/session/agent-session.ts` — `/tree` navigation flow, summarization, hook/event emission\n- `src/modes/components/tree-selector.ts` — interactive tree UI behavior and filtering\n- `src/modes/controllers/selector-controller.ts` — selector orchestration for `/tree` and `/branch`\n- `src/modes/controllers/input-controller.ts` — command routing (`/tree`, `/branch`, double-escape behavior)\n- `src/session/messages.ts` — conversion of `branch_summary`, `compaction`, and `custom_message` entries into LLM context messages\n\n## Tree data model in `SessionManager`\n\nRuntime indices:\n\n- `#byId: Map<string, SessionEntry>` — fast lookup for any entry\n- `#leafId: string | null` — current position in the tree\n- `#labelsById: Map<string, string>` — resolved labels by target entry id\n\nTree APIs:\n\n- `getBranch(fromId?)` walks parent links to root and returns root→node path\n- `getTree()` returns `SessionTreeNode[]` (`entry`, `children`, `label`)\n - parent links become children arrays\n - entries with missing parents are treated as roots\n - children are sorted oldest→newest by timestamp\n- `getChildren(parentId)` returns direct children\n- `getLabel(id)` resolves current label from `labelsById`\n\n`getTree()` is a runtime projection; persistence remains append-only JSONL entries.\n\n## Leaf movement semantics\n\nThere are three leaf movement primitives:\n\n1. `branch(entryId)`\n - Validates entry exists\n - Sets `leafId = entryId`\n - No new entry is written\n\n2. `resetLeaf()`\n - Sets `leafId = null`\n - Next append creates a new root entry (`parentId = null`)\n\n3. `branchWithSummary(branchFromId, summary, details?, fromExtension?)`\n - Accepts `branchFromId: string | null`\n - Sets `leafId = branchFromId`\n - Appends a `branch_summary` entry as child of that leaf\n - When `branchFromId` is `null`, `fromId` is persisted as `\"root\"`\n\n## `/tree` navigation behavior (same session file)\n\n`AgentSession.navigateTree()` is navigation, not file forking.\n\nFlow:\n\n1. Validate target and compute abandoned path (`collectEntriesForBranchSummary`)\n2. Emit `session_before_tree` with `TreePreparation`\n3. Optionally summarize abandoned entries (hook-provided summary or built-in summarizer)\n4. Compute new leaf target:\n - selecting a **user** message: leaf moves to its parent, and message text is returned for editor prefill\n - selecting a **custom_message**: same rule as user message (leaf = parent, text prefills editor)\n - selecting any other entry: leaf = selected entry id\n5. Apply leaf move:\n - with summary: `branchWithSummary(newLeafId, ...)`\n - without summary and `newLeafId === null`: `resetLeaf()`\n - otherwise: `branch(newLeafId)`\n6. Rebuild agent context from new leaf and emit `session_tree`\n\nImportant: summary entries are attached at the **new navigation position**, not on the abandoned branch tail.\n\n## `/branch` behavior (new session file)\n\n`/branch` and `/tree` are intentionally different:\n\n- `/tree` navigates within the current session file.\n- `/branch` creates a new session branch file (or in-memory replacement for non-persistent mode).\n\nUser-facing `/branch` flow (`SelectorController.showUserMessageSelector` → `AgentSession.branch`):\n\n- Branch source must be a **user message**.\n- Selected user text is extracted for editor prefill.\n- If selected user message is root (`parentId === null`): start a new session via `newSession({ parentSession: previousSessionFile })`.\n- Otherwise: `createBranchedSession(selectedEntry.parentId)` to fork history up to the selected prompt boundary.\n\n`SessionManager.createBranchedSession(leafId)` specifics:\n\n- Builds root→leaf path via `getBranch(leafId)`; throws if missing.\n- Excludes existing `label` entries from copied path.\n- Rebuilds fresh label entries from resolved `labelsById` for entries that remain in path.\n- Persistent mode: writes new JSONL file and switches manager to it; returns new file path.\n- In-memory mode: replaces in-memory entries; returns `undefined`.\n\n## Context reconstruction and summary/custom integration\n\n`buildSessionContext()` (in `session-manager.ts`) resolves the active root→leaf path and builds effective LLM context state:\n\n- Tracks latest thinking/model/service-tier/mode/TTSR/MCP-selection state on path.\n- Handles latest compaction on path:\n - emits compaction summary first\n - replays kept messages from `firstKeptEntryId` to compaction point\n - then replays post-compaction messages\n- Includes `branch_summary` and `custom_message` entries as `AgentMessage` objects.\n\n`session/messages.ts` then maps these message types for model input:\n\n- `branchSummary` and `compactionSummary` become user-role templated context messages\n- `custom`/`hookMessage` become user-role content messages\n\nSo tree movement changes context by changing the active leaf path, not by mutating old entries.\n\n## Labels and tree UI behavior\n\nLabel persistence:\n\n- `appendLabelChange(targetId, label?)` writes `label` entries on the current leaf chain.\n- `labelsById` is updated immediately (set or delete).\n- `getTree()` resolves current label onto each returned node.\n\nTree selector behavior (`tree-selector.ts`):\n\n- Flattens tree for navigation, keeps active-path highlighting, and prioritizes displaying the active branch first.\n- Supports filter modes: `default`, `no-tools`, `user-only`, `labeled-only`, `all`.\n- Supports free-text search over rendered semantic content.\n- `Shift+L` opens inline label editing and writes via `appendLabelChange`.\n\nCommand routing:\n\n- `/tree` always opens tree selector.\n- `/branch` opens user-message selector unless `doubleEscapeAction=tree`, in which case it also uses tree selector UX.\n\n## Extension and hook touchpoints for tree operations\n\nCommand-time extension API (`ExtensionCommandContext`):\n\n- `branch(entryId)` — create branched session file\n- `navigateTree(targetId, { summarize? })` — move within current tree/file\n\nEvents around tree navigation:\n\n- `session_before_tree`\n - receives `TreePreparation`:\n - `targetId`\n - `oldLeafId`\n - `commonAncestorId`\n - `entriesToSummarize`\n - `userWantsSummary`\n - may cancel navigation\n - may provide summary payload used instead of built-in summarizer\n - receives abort `signal` (Escape cancellation path)\n- `session_tree`\n - emits `newLeafId`, `oldLeafId`\n - includes `summaryEntry` when a summary was created\n - `fromExtension` indicates summary origin\n\nAdjacent but related lifecycle hooks:\n\n- `session_before_branch` / `session_branch` for `/branch` flow\n- `session_before_compact`, `session.compacting`, `session_compact` for compaction entries that later affect tree-context reconstruction\n\n## Real constraints and edge conditions\n\n- `branch()` cannot target `null`; use `resetLeaf()` for root-before-first-entry state.\n- `branchWithSummary()` supports `null` target and records `fromId: \"root\"`.\n- Selecting current leaf in tree selector is a no-op.\n- Summarization requires an active model; if absent, summarize navigation fails fast.\n- If summarization is aborted, navigation is cancelled and leaf is unchanged.\n- In-memory sessions never return a branch file path from `createBranchedSession`.\n- Tree context reconstruction includes service-tier and MCP tool-selection state, but those entries do not become LLM messages.\n\n## Plan approval session naming\n\nWhen a user approves a plan from plan mode (`InteractiveMode.#approvePlan`), the approval handler seeds the session name from the plan's title so the resulting (fresh or compacted) session does not stay unnamed.\n\nTrigger:\n\n- Plan approval reaches `#approvePlan(...)` with `options.title` populated from the plan-approval details.\n- This runs for every approval choice (`Approve and execute`, `Approve and compact context`, plain `Approve`); the synthetic `plan-approved` prompt is what otherwise bypasses the input-controller's title-generation path.\n\nNaming source:\n\n- The normalized plan title is humanized via `humanizePlanTitle(title)` (`packages/coding-agent/src/plan-mode/approved-plan.ts`):\n - replaces runs of `-`/`_` with a single space\n - trims whitespace\n - capitalizes the first character\n - returns `\"\"` for whitespace-only / separator-only input\n- The humanized name is applied with `sessionManager.setSessionName(name, \"auto\")`. Because `setSessionName` is a no-op when `titleSource === \"user\"`, the seeded name never overrides a name the user already chose (e.g. on the `preserveContext` path where the session continues with prior naming).\n- On successful apply, the terminal title (`setSessionTerminalTitle`) and the editor border color are refreshed to reflect the new name.\n\nExamples (from `humanizePlanTitle`):\n\n- `migrate-mcp-loader` → `Migrate mcp loader`\n- `fix_session_naming` → `Fix session naming`\n- `foo--bar__baz` → `Foo bar baz`\n- `RefactorRouter` → `RefactorRouter` (no separators to expand)\n- `\"\"` / `\"---\"` → `\"\"` (no name applied)\n\n## Legacy compatibility still present\n\nSession migrations still run on load:\n\n- v1→v2 adds `id`/`parentId` and converts compaction index anchor to id anchor\n- v2→v3 migrates legacy `hookMessage` role to `custom`\n\nCurrent runtime behavior is version-3 tree semantics after migration.\n",
|
|
62
62
|
"session.md": "# Session Storage and Entry Model\n\nThis document is the source of truth for how coding-agent sessions are represented, persisted, migrated, and reconstructed at runtime.\n\n## Scope\n\nCovers:\n\n- Session JSONL format and versioning\n- Entry taxonomy and tree semantics (`id`/`parentId` + leaf pointer)\n- Migration/compatibility behavior when loading old or malformed files\n- Context reconstruction (`buildSessionContext`)\n- Persistence guarantees, failure behavior, truncation/blob externalization\n- Storage abstractions (`FileSessionStorage`, `MemorySessionStorage`) and related utilities\n\nDoes not cover `/tree` UI rendering behavior beyond semantics that affect session data.\n\n## Implementation Files\n\n- [`src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`src/session/messages.ts`](../packages/coding-agent/src/session/messages.ts)\n- [`src/session/session-storage.ts`](../packages/coding-agent/src/session/session-storage.ts)\n- [`src/session/history-storage.ts`](../packages/coding-agent/src/session/history-storage.ts)\n- [`src/session/blob-store.ts`](../packages/coding-agent/src/session/blob-store.ts)\n\n## On-Disk Layout\n\nDefault session file location:\n\n```text\n~/.gjc/agent/sessions/--<cwd-encoded>--/<timestamp>_<sessionId>.jsonl\n```\n\n`<cwd-encoded>` is derived from the working directory by stripping leading slash and replacing `/`, `\\\\`, and `:` with `-`.\n\nBlob store location:\n\n```text\n~/.gjc/agent/blobs/<sha256>\n```\n\nTerminal breadcrumb files are written under:\n\n```text\n~/.gjc/agent/terminal-sessions/<terminal-id>\n```\n\nBreadcrumb content is two lines: original cwd, then session file path. `continueRecent()` prefers this terminal-scoped pointer before scanning most-recent mtime.\n\n## File Format\n\nSession files are JSONL: one JSON object per line.\n\n- Line 1 is always the session header (`type: \"session\"`).\n- Remaining lines are `SessionEntry` values.\n- Entries are append-only at runtime; branch navigation moves a pointer (`leafId`) rather than mutating existing entries.\n\n### Header (`SessionHeader`)\n\n```json\n{\n \"type\": \"session\",\n \"version\": 3,\n \"id\": \"1f9d2a6b9c0d1234\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\",\n \"cwd\": \"/work/pi\",\n \"title\": \"optional session title\",\n \"titleSource\": \"auto\",\n \"parentSession\": \"optional lineage marker\"\n}\n```\n\nNotes:\n\n- `version` is optional in v1 files; absence means v1.\n- `parentSession` is an opaque lineage string. Current code writes either a session id or a session path depending on flow (`fork`, `forkFrom`, `createBranchedSession`, or explicit `newSession({ parentSession })`). Treat as metadata, not a typed foreign key.\n\n### Entry Base (`SessionEntryBase`)\n\nAll non-header entries include:\n\n```json\n{\n \"type\": \"...\",\n \"id\": \"8-char-id\",\n \"parentId\": \"previous-or-branch-parent\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\"\n}\n```\n\n`parentId` can be `null` for a root entry (first append, or after `resetLeaf()`).\n\n## Entry Taxonomy\n\n`SessionEntry` is the union of:\n\n- `message`\n- `thinking_level_change`\n- `service_tier_change`\n- `compaction`\n- `branch_summary`\n- `custom`\n- `custom_message`\n- `label`\n- `ttsr_injection`\n- `session_init`\n- `mode_change`\n- `mcp_tool_selection`\n\n### `message`\n\nStores an `AgentMessage` directly.\n\n```json\n{\n \"type\": \"message\",\n \"id\": \"a1b2c3d4\",\n \"parentId\": null,\n \"timestamp\": \"2026-02-16T10:21:00.000Z\",\n \"message\": {\n \"role\": \"assistant\",\n \"provider\": \"anthropic\",\n \"model\": \"anthropic-model-sonnet-4-5\",\n \"content\": [{ \"type\": \"text\", \"text\": \"Done.\" }],\n \"usage\": {\n \"input\": 100,\n \"output\": 20,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"cost\": {\n \"input\": 0,\n \"output\": 0,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"total\": 0\n }\n },\n \"timestamp\": 1760000000000\n }\n}\n```\n\n### `model_change`\n\n```json\n{\n \"type\": \"model_change\",\n \"id\": \"b1c2d3e4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:21:30.000Z\",\n \"model\": \"openai/gpt-4o\",\n \"role\": \"default\"\n}\n```\n\n`role` is optional; missing is treated as `default` in context reconstruction.\n\n### `service_tier_change`\n\n```json\n{\n \"type\": \"service_tier_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:21:45.000Z\",\n \"serviceTier\": \"flex\"\n}\n```\n\n`serviceTier` can also be `null`.\n\n### `thinking_level_change`\n\n```json\n{\n \"type\": \"thinking_level_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:22:00.000Z\",\n \"thinkingLevel\": \"high\"\n}\n```\n\n### `compaction`\n\n```json\n{\n \"type\": \"compaction\",\n \"id\": \"d1e2f3a4\",\n \"parentId\": \"c1d2e3f4\",\n \"timestamp\": \"2026-02-16T10:23:00.000Z\",\n \"summary\": \"Conversation summary\",\n \"shortSummary\": \"Short recap\",\n \"firstKeptEntryId\": \"a1b2c3d4\",\n \"tokensBefore\": 42000,\n \"details\": { \"readFiles\": [\"src/a.ts\"] },\n \"preserveData\": { \"hookState\": true },\n \"fromExtension\": false\n}\n```\n\n### `branch_summary`\n\n```json\n{\n \"type\": \"branch_summary\",\n \"id\": \"e1f2a3b4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:24:00.000Z\",\n \"fromId\": \"a1b2c3d4\",\n \"summary\": \"Summary of abandoned path\",\n \"details\": { \"note\": \"optional\" },\n \"fromExtension\": true\n}\n```\n\nIf branching from root (`branchFromId === null`), `fromId` is the literal string `\"root\"`.\n\n### `custom`\n\nExtension state persistence; ignored by `buildSessionContext`.\n\n```json\n{\n \"type\": \"custom\",\n \"id\": \"f1a2b3c4\",\n \"parentId\": \"e1f2a3b4\",\n \"timestamp\": \"2026-02-16T10:25:00.000Z\",\n \"customType\": \"my-extension\",\n \"data\": { \"state\": 1 }\n}\n```\n\n### `custom_message`\n\nExtension-provided message that does participate in LLM context. `content` can be a string or text/image content blocks, and `attribution` records whether the user or agent initiated it.\n\n```json\n{\n \"type\": \"custom_message\",\n \"id\": \"a2b3c4d5\",\n \"parentId\": \"f1a2b3c4\",\n \"timestamp\": \"2026-02-16T10:26:00.000Z\",\n \"customType\": \"my-extension\",\n \"content\": \"Injected context\",\n \"display\": true,\n \"details\": { \"debug\": false },\n \"attribution\": \"agent\"\n}\n```\n\n### `label`\n\n```json\n{\n \"type\": \"label\",\n \"id\": \"b2c3d4e5\",\n \"parentId\": \"a2b3c4d5\",\n \"timestamp\": \"2026-02-16T10:27:00.000Z\",\n \"targetId\": \"a1b2c3d4\",\n \"label\": \"checkpoint\"\n}\n```\n\n`label: undefined` clears a label for `targetId`.\n\n### `ttsr_injection`\n\n```json\n{\n \"type\": \"ttsr_injection\",\n \"id\": \"c2d3e4f5\",\n \"parentId\": \"b2c3d4e5\",\n \"timestamp\": \"2026-02-16T10:28:00.000Z\",\n \"injectedRules\": [\"ruleA\", \"ruleB\"]\n}\n```\n\n### `mcp_tool_selection`\n\n```json\n{\n \"type\": \"mcp_tool_selection\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:28:30.000Z\",\n \"selectedToolNames\": [\"server.tool\"]\n}\n```\n\n### `session_init`\n\n```json\n{\n \"type\": \"session_init\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:29:00.000Z\",\n \"systemPrompt\": \"...\",\n \"task\": \"...\",\n \"tools\": [\"read\", \"edit\"],\n \"outputSchema\": { \"type\": \"object\" }\n}\n```\n\n### `mode_change`\n\n```json\n{\n \"type\": \"mode_change\",\n \"id\": \"e2f3a4b5\",\n \"parentId\": \"d2e3f4a5\",\n \"timestamp\": \"2026-02-16T10:30:00.000Z\",\n \"mode\": \"plan\",\n \"data\": { \"planFile\": \"/tmp/plan.md\" }\n}\n```\n\n## Versioning and Migration\n\nCurrent session version: `3`.\n\n### v1 -> v2\n\nApplied when header `version` is missing or `< 2`:\n\n- Adds `id` and `parentId` to each non-header entry.\n- Reconstructs a linear parent chain using file order.\n- Migrates compaction field `firstKeptEntryIndex` -> `firstKeptEntryId` when present.\n- Sets header `version = 2`.\n\n### v2 -> v3\n\nApplied when header `version < 3`:\n\n- For `message` entries: rewrites legacy `message.role === \"hookMessage\"` to `\"custom\"`.\n- Sets header `version = 3`.\n\n### Migration Trigger and Persistence\n\n- Migrations run during session load (`setSessionFile`).\n- If any migration ran, the entire file is rewritten to disk immediately.\n- Migration mutates in-memory entries first, then persists rewritten JSONL.\n\n## Load and Compatibility Behavior\n\n`loadEntriesFromFile(path)` behavior:\n\n- Missing file (`ENOENT`) -> returns `[]`.\n- Non-parseable lines are handled by lenient JSONL parser (`parseJsonlLenient`).\n- If first parsed entry is not a valid session header (`type !== \"session\"` or missing string `id`) -> returns `[]`.\n\n`SessionManager.setSessionFile()` behavior:\n\n- `[]` from loader is treated as empty/nonexistent session and replaced with a new initialized session file at that path.\n- Valid files are loaded, migrated if needed, blob refs resolved, then indexed.\n\n## Tree and Leaf Semantics\n\nThe underlying model is append-only tree + mutable leaf pointer:\n\n- Every append method creates exactly one new entry whose `parentId` is current `leafId`.\n- The new entry becomes the new `leafId`.\n- `branch(entryId)` moves only `leafId`; existing entries remain unchanged.\n- `resetLeaf()` sets `leafId = null`; next append creates a new root entry (`parentId: null`).\n- `branchWithSummary()` sets leaf to branch target and appends a `branch_summary` entry.\n\n`getEntries()` returns all non-header entries in insertion order. Existing entries are not deleted in normal operation; rewrites preserve logical history while updating representation (migrations, move, targeted rewrite helpers).\n\n## Context Reconstruction (`buildSessionContext`)\n\n`buildSessionContext(entries, leafId, byId?)` resolves what is sent to the model.\n\nAlgorithm:\n\n1. Determine leaf:\n - `leafId === null` -> return empty context.\n - explicit `leafId` -> use that entry if found.\n - otherwise fallback to last entry.\n2. Walk `parentId` chain from leaf to root and reverse to root->leaf path.\n3. Derive runtime state across path:\n - `thinkingLevel` from latest `thinking_level_change` (default `\"off\"`)\n - `serviceTier` from latest `service_tier_change`\n - model map from `model_change` entries (`role ?? \"default\"`)\n - fallback `models.default` from assistant message provider/model if no explicit model change\n - deduplicated `injectedTtsrRules` from all `ttsr_injection` entries\n - selected MCP discovery tools from latest `mcp_tool_selection`\n - mode/modeData from latest `mode_change` (default mode `\"none\"`)\n4. Build message list:\n - `message` entries pass through\n - `custom_message` entries become `custom` AgentMessages via `createCustomMessage`\n - `branch_summary` entries become `branchSummary` AgentMessages via `createBranchSummaryMessage`\n - if a `compaction` exists on path:\n - emit compaction summary first (`createCompactionSummaryMessage`)\n - emit path entries starting at `firstKeptEntryId` up to the compaction boundary\n - emit entries after the compaction boundary\n\n`custom`, `session_init`, `service_tier_change`, `mcp_tool_selection`, and `ttsr_injection` entries do not inject model context directly.\n\n## Persistence Guarantees and Failure Model\n\n### Persist vs in-memory\n\n- `SessionManager.create/open/continueRecent/forkFrom` -> persistent mode (`persist = true`).\n- `SessionManager.inMemory` -> non-persistent mode (`persist = false`) with `MemorySessionStorage`.\n\n### Write pipeline\n\nWrites are serialized through an internal promise chain (`#persistChain`) and `NdjsonFileWriter`.\n\n- `append*` updates in-memory state immediately.\n- Persistence is deferred until at least one assistant message exists.\n - Before first assistant: entries are retained in memory; no file append occurs.\n - When first assistant exists: full in-memory session is flushed to file.\n - Afterwards: new entries append incrementally.\n\nRationale in code: avoid persisting sessions that never produced an assistant response.\n\n### Durability operations\n\n- `flush()` flushes writer and calls `fsync()`.\n- Atomic full rewrites (`#rewriteFile`) write to temp file, flush+fsync, close, then rename over target.\n- Used for migrations, `setSessionName`, `rewriteEntries`, move operations, and tool-call arg rewrites.\n\n### Error behavior\n\n- Persistence errors are latched (`#persistError`) and rethrown on subsequent operations.\n- First error is logged once with session file context.\n- Writer close is best-effort but propagates the first meaningful error.\n\n## Data Size Controls and Blob Externalization\n\nBefore persisting entries:\n\n- Large strings are truncated to `MAX_PERSIST_CHARS` (500,000 chars) with notice:\n - `\"[Session persistence truncated large content]\"`\n- Transient fields `partialJson` and `jsonlEvents` are removed.\n- If object has both `content` and `lineCount`, line count is recomputed after truncation.\n- Image blocks in `content` arrays with base64 length >= 1024 are externalized to blob refs:\n - stored as `blob:sha256:<hash>`\n - raw bytes written to blob store (`BlobStore.put`)\n\nOn load, blob refs are resolved back to base64 for message/custom_message image blocks.\n\n## Storage Abstractions\n\n`SessionStorage` interface provides all filesystem operations used by `SessionManager`:\n\n- sync: `ensureDirSync`, `existsSync`, `writeTextSync`, `statSync`, `listFilesSync`\n- async: `exists`, `readText`, `readTextPrefix`, `writeText`, `rename`, `unlink`, `openWriter`\n\nImplementations:\n\n- `FileSessionStorage`: real filesystem (Bun + node fs)\n- `MemorySessionStorage`: map-backed in-memory implementation for tests/non-persistent sessions\n\n`SessionStorageWriter` exposes `writeLine`, `flush`, `fsync`, `close`, `getError`.\n\n## Session Discovery Utilities\n\nDefined in `session-manager.ts`:\n\n- `getRecentSessions(sessionDir, limit)` -> lightweight metadata for UI/session picker\n- `findMostRecentSession(sessionDir)` -> newest by mtime\n- `list(cwd, sessionDir?)` -> sessions in one project scope\n- `listAll()` -> sessions across all project scopes under `~/.gjc/agent/sessions`\n\nMetadata extraction reads only a prefix (`readTextPrefix(..., 4096)`) where possible.\n\n## Related but Distinct: Prompt History Storage\n\n`HistoryStorage` (`history-storage.ts`) is a separate SQLite subsystem for prompt recall/search, not session replay.\n\n- DB: `~/.gjc/agent/history.db`\n- Table: `history(id, prompt, created_at, cwd)`\n- FTS5 index: `history_fts` with trigger-maintained sync\n- Deduplicates consecutive identical prompts using in-memory last-prompt cache\n- Async insertion (`setImmediate`) so prompt capture does not block turn execution\n\nUse session files for conversation graph/state replay; use `HistoryStorage` for prompt history UX.\n",
|
|
63
|
-
"telegram-remote.md": "# Telegram Remote — v0 Roadmap (tiny operator button, not a cockpit)\n\nStatus: **spec + reference implementation** · Tracks issue #681 · Target: **0.6.0** · Reference implementation: [`packages/telegram-remote`](../packages/telegram-remote/README.md) (incl. rich messaging; push deferred)\n\nTelegram Remote is a tiny, safe operator surface for Gajae-Code (`gjc`) session\n**lifecycle and observation** from a phone. It is deliberately **not** a remote\nRPC cockpit, a remote shell, a config editor, or a transcript viewer. The real\nsession owner stays GJC/tmux/harness-side; Telegram is only the control button.\n\nThis document fixes the v0 contract the issue calls out before any code lands:\nthe **first backend** (Coordinator MCP), the **preset-only session model**, the\n**minimal command contract**, the **authorization posture**, and the\n**transmitted-data allowlist**. It then splits the work into PR-sized steps.\n\n## TL;DR architecture decision\n\nv0 is a thin **command + bounded-read** surface layered on the **Coordinator\nMCP**, which already exists and already enforces the safety properties this\nroadmap needs. It introduces **no new remote-control protocol** — a second\nauthenticated control protocol would require ADR-level rationale per\n[`docs/bridge.md`](bridge.md).\n\n| Concern | Reused existing surface |\n| --- | --- |\n| Cross-session enumeration + bounded status | Coordinator MCP read tools ([`docs/bot-integration.md`](bot-integration.md), [`docs/hermes-mcp-bridge.md`](hermes-mcp-bridge.md)) |\n| Preset-bounded session creation (workdir allowlist + session command) | `GJC_COORDINATOR_MCP_WORKDIR_ROOTS`, `GJC_COORDINATOR_MCP_SESSION_COMMAND` |\n| Mutation gating (fail-closed: startup opt-in **and** per-call `allow_mutation`) | `GJC_COORDINATOR_MCP_MUTATIONS` |\n| Namespacing so one bot cannot enumerate another's state | `GJC_COORDINATOR_MCP_PROFILE`, `GJC_COORDINATOR_MCP_REPO` |\n| Bounded artifact reads (byte-capped, symlink-safe) | `GJC_COORDINATOR_MCP_ARTIFACT_BYTE_CAP` |\n\nThe only genuinely new piece is a thin **Telegram gateway**: one PC-side or\nsystemd-managed process that authenticates an allowlisted Telegram user/chat,\nmaps a four-command vocabulary onto Coordinator MCP tool calls, and projects the\nalready-bounded coordinator state into short, redacted chat messages.\n\n```\n Telegram user (allowlisted chat id)\n │ bot commands: /start-session /sessions /observe /stop\n ▼\n Telegram gateway ── default-deny auth (allowlist) ──▶ reject everyone else\n (one process) ── preset resolve ────────────────▶ fixed workdir + command + task template\n │ ── Coordinator MCP calls ─────────▶ gjc mcp-serve coordinator\n │\n └─ never: arbitrary shell · raw RPC · gate answers · raw tail/transcript · secrets\n```\n\n## Why Coordinator MCP first (and not bridge or harness directly)\n\nThe issue asks which backend is the first supported target. v0 picks\n**Coordinator MCP** because it is the only existing surface that already gives\nthe gateway everything it needs without new code:\n\n- **Cross-session enumeration.** `gjc_coordinator_list_sessions` answers\n `/sessions` directly. Bridge mode serves exactly one live session per process,\n so it cannot list.\n- **Preset-bounded creation.** `gjc_coordinator_start_session` is constrained to\n `GJC_COORDINATOR_MCP_WORKDIR_ROOTS` and launches `GJC_COORDINATOR_MCP_SESSION_COMMAND`.\n That is exactly the \"fixed workdir root + fixed session command\" preset model\n below — no arbitrary command string crosses Telegram.\n- **Fail-closed mutations.** Mutating calls require both a startup mutation\n opt-in and per-call `allow_mutation: true`. Missing either fails closed, so a\n read-only gateway is the safe default.\n- **Bounded observation.** Coordinator read tools return durable, bounded status\n rather than raw scrollback, which matches the \"no raw dumps in Telegram\" rule.\n\nThe web \"steering wheel\" remote ([`docs/gajae-remote.md`](gajae-remote.md), issue\n#565) chose the harness control plane + bridge because it needs live\n`Observation`/`readyForSubmit` submit gating for a one-line submit surface.\nTelegram Remote v0 has **no submit surface** — only lifecycle and observation —\nso it does not need the harness/bridge path. The two roadmaps stay independent;\na future Telegram backend adapter could front the harness control plane behind\nthe same four commands without changing this contract.\n\n## Authority boundary contract\n\nThe Coordinator MCP and the GJC/tmux/harness runtime behind it are the sole\nauthority. The Telegram gateway and the chat user are **operators of a tiny\nbutton set**, nothing more.\n\nThe gateway MUST NOT, in v0:\n\n- run arbitrary shell, send arbitrary prompts/turns, or proxy raw RPC/MCP calls\n from chat text;\n- accept a workdir, session command, branch, or repo chosen from chat (only\n preset ids are accepted);\n- answer workflow-gate / permission / approval / structured questions (those\n stay owner-side; the gateway never enables the `questions` mutation class);\n- stream raw tmux tail, transcripts, tool arguments/results, diffs, file\n contents, environment, system prompt, or secrets to chat;\n- expand the preset task template into anything beyond a single length-capped,\n control-char-stripped task string.\n\nThe chat user MAY, in v0:\n\n- start one session from an **approved preset** (`/start-session`);\n- list live/recent sessions with concise bounded status (`/sessions`);\n- read one session's bounded public-safe status slice (`/observe`);\n- request a graceful stop/retire for one session (`/stop`), with confirmation.\n\n## Preset-only session model\n\nSession creation is **preset-only**. A preset is a named, server-side bundle —\nnever assembled from chat input:\n\n| Preset field | Source / binding | Notes |\n| --- | --- | --- |\n| `workdir` | one entry from `GJC_COORDINATOR_MCP_WORKDIR_ROOTS` | fixed; chat cannot pass a path |\n| `sessionCommand` | `GJC_COORDINATOR_MCP_SESSION_COMMAND` | fixed; e.g. `gjc --worktree`; chat cannot pass a command |\n| `taskTemplate` | optional fixed template string | optional; the only injection point is one `{{task}}` slot |\n| `taskMaxLen` | gateway config | hard length cap on the chat-supplied task string |\n| `id` | gateway config | the only preset reference a chat user may name |\n\n`/start-session <presetId> [task]` resolves `presetId` to a configured preset and,\nif the preset has a `taskTemplate`, substitutes a **single length-capped,\ncontrol-char-stripped** task string into the `{{task}}` slot. With no template,\nthe task argument is ignored or rejected per preset policy. The resolved\n`{ cwd, prompt }` is passed to `gjc_coordinator_start_session` with\n`allow_mutation: true`. No part of `workdir` or `sessionCommand` is ever derived\nfrom chat input. This is the \"smallest preset format that is useful without\nbecoming arbitrary remote execution\" the issue asks for.\n\n## Command contract (v0)\n\nFour commands plus help. Everything else is rejected as unknown.\n\n| Command | Intent | Coordinator MCP mapping | Mutation |\n| --- | --- | --- | --- |\n| `/start-session <presetId> [task]` | Create a bounded session from an approved preset | `gjc_coordinator_start_session` (preset `cwd` + templated `prompt`) | `sessions` |\n| `/sessions` | List live/recent sessions with concise status | `gjc_coordinator_list_sessions` (+ `gjc_coordinator_read_status` for derived status) | none (read) |\n| `/observe <sessionId>` | Show one session's bounded public-safe status slice | `gjc_coordinator_read_status` / `gjc_coordinator_read_coordination_status` | none (read) |\n| `/stop <sessionId>` | Request graceful stop/retire for a session | `gjc_coordinator_report_status` with `status: \"cancelled\"` (records terminal turn state) | `reports` |\n| `/help` | Show the command set | none | none |\n\nNotes:\n\n- `/observe` deliberately uses **bounded status**, not `gjc_coordinator_read_tail`.\n Raw tail is advisory debug context that can contain scrollback and is never\n surfaced to chat in v0.\n- `/stop` over Coordinator MCP records a **terminal turn status** (`cancelled`);\n per the coordinator contract this is coordination state, **not** a tmux process\n kill. v0 treats `/stop` as \"request graceful stop/retire\"; the actual process\n teardown remains an owner-side concern. A true remote teardown is deferred and\n gated behind an explicit decision (see open questions).\n- The gateway runs the Coordinator MCP with the **smallest** mutation set it\n needs: `sessions` for `/start-session`, plus `reports` only if `/stop` is\n enabled. It NEVER enables `questions`, and it exposes no read tool beyond list\n and bounded status.\n\n## Authorization and safety posture\n\n- **Default deny.** Only an explicit allowlist of Telegram user ids (and/or chat\n ids) may issue any command. Unlisted senders get a boring, identical refusal —\n no capability hints, no enumeration, no preset names.\n- **No raw dumps.** Chat output is short and redacted by construction: session\n ids, derived status, branch, timestamps. Never transcripts, tool IO, diffs,\n file contents, env, system prompt, or secrets.\n- **Allowlisted presets only.** Session creation cannot name a workdir, command,\n repo, or branch from chat — only a configured preset id.\n- **Confirmation for destructive actions.** `/stop` requires an explicit\n confirmation step (e.g. a confirm callback or a second `/stop <id> confirm`)\n before any cancellation is recorded.\n- **Stable session identity.** `/observe` and `/stop` operate on the coordinator\n `session_id`; the gateway resolves and echoes a stable id so the operator\n cannot accidentally stop the wrong owner.\n- **Telegram is the button, GJC owns the session.** The gateway never bypasses\n coordinator mutation gating, never answers PC-side gates, and degrades to\n read-only or fully closed when mutations are not opted in.\n\n## Transmitted-data contract (allowlist)\n\nOnly the fields below leave the PC into chat. Anything not listed is withheld by\ndefault; this is a typed projection from the coordinator's already-bounded\nstatus, never a passthrough of internal state.\n\n### Session list entry → chat\n\n| Field | Source | Notes |\n| --- | --- | --- |\n| `sessionId` | coordinator session id | opaque, stable id |\n| `name` | derived (repo/branch/preset or id fallback) | sanitized, length-capped |\n| `status` | derived from coordinator session/turn state | bounded enum: `idle` \\| `working` \\| `blocked` \\| `offline` |\n| `branch` | coordinator status | branch name only |\n| `lastActivityAt` | coordinator status | timestamp |\n\n### Open-session view → chat\n\n| Field | Source | Notes |\n| --- | --- | --- |\n| `sessionId`, `name`, `status`, `branch` | as above | |\n| `lifecycle` | coordinator session/turn lifecycle | bounded enum |\n| `activeTurn` | coordinator turn status | `queued`/`active`/`waiting_for_answer`/terminal, no body |\n| `blockerSummary` | coordinator status | short sanitized reason when blocked |\n\n### Never transmitted\n\nRaw tmux tail / scrollback, full transcript or message bodies, tool call\narguments or results, file contents, diffs, system prompt, environment\nvariables, tokens or secrets, and absolute paths beyond `branch`/preset\nmetadata. When content is intentionally withheld, chat shows a neutral\n*\"withheld on PC\"* marker rather than a redacted blob.\n\n## Failure states (must be boring and understandable)\n\n| Condition | Detection | Chat UX |\n| --- | --- | --- |\n| Unauthorized sender | not in allowlist | identical boring refusal; no hints |\n| Unknown preset | preset id not configured | \"unknown preset\"; no preset enumeration |\n| Mutations disabled | `coordinator_mutation_class_disabled:*` | \"session control is disabled\"; stays read-only |\n| Mutation not allowed for call | `coordinator_mutation_call_not_allowed:*` | refusal; no auto-escalation |\n| Unknown session | `unknown_session` | \"no such session\"; re-list with `/sessions` |\n| Active turn exists | `active_turn_exists` | report current turn; do not force |\n| Coordinator unreachable / session offline | liveness/`offline` | \"session offline\"; control disabled |\n| Task too long | exceeds `taskMaxLen` | rejected before any MCP call |\n\n## Open questions from the issue — v0 decisions\n\n| Question | v0 decision | Deferred |\n| --- | --- | --- |\n| Where does it live (in-repo app / example / companion package)? | Companion **gateway** spec here; first reference implementation as an example integration / small service, not a core `gjc` mode | In-repo first-class mode → later, behind a decision |\n| First session backend (tmux GJC / harness / both)? | **Coordinator MCP** (covers managed + registered visible-tmux sessions) | Harness control-plane adapter behind the same four commands → later |\n| Smallest useful preset format? | Fixed `workdir` + fixed `sessionCommand` + optional fixed `taskTemplate` with one length-capped `{{task}}` slot | Multi-step / parameterized presets → later |\n| Does `/stop` kill the process? | No — v0 records coordinator terminal status (`cancelled`); teardown stays owner-side | True remote teardown → decision-gated |\n| Hosted relay vs local bot token? | Standard Telegram bot token + allowlist; PC/systemd-hosted gateway | Hosted multi-tenant relay → ADR-gated |\n\n## Implementation plan (PR-sized steps)\n\nEach step is independently shippable; later steps stay fail-closed until wired.\n\n1. **PR 1 — this doc.** `docs/telegram-remote.md` + README cross-link. Resolves\n the backend, preset, command, authorization, and transmitted-data decisions.\n No code.\n2. **PR 2 — preset + command contract types.** Preset shape (`id`, `workdir`,\n `sessionCommand`, `taskTemplate?`, `taskMaxLen`), command parse model, and a\n typed projection `coordinator status → chat summary/view`. Tests assert the\n allowlist (no forbidden field can leak; task length cap enforced). Types only;\n no Telegram wiring.\n3. **PR 3 — gateway read path.** `/sessions` + `/observe` over coordinator read\n tools, behind an explicit opt-in, default-deny allowlist, redaction\n projection. Tests for status derivation and redaction.\n4. **PR 4 — gateway create path.** `/start-session <presetId> [task]` →\n `gjc_coordinator_start_session` with preset-bound `cwd`/`prompt` and the\n length-capped task slot. Tests for preset resolution and rejection of\n arbitrary workdir/command/task injection.\n5. **PR 5 — gateway stop path.** `/stop <sessionId>` with confirmation →\n coordinator `cancelled`. Tests for confirmation gating and unknown/active\n session handling.\n6. **PR 6 — auth + hardening.** Allowlist enforcement, boring refusals, mutation\n opt-in matrix, CHANGELOG, docs finalize. Tests prove unauthorized senders and\n non-preset inputs are rejected before any MCP call.\n\n## Non-goals (v0)\n\n- No arbitrary Telegram-side shell or raw RPC/MCP passthrough.\n- No raw transcript/tail/secret/log dumping to chat by default.\n- No filesystem editor or config editor from chat.\n- No answering of PC-side approval/confirmation/structured-question gates.\n- No second authenticated remote-control protocol (reuse Coordinator MCP; relay\n needs an ADR).\n- No remote-desktop replacement.\n\n## Key source references\n\n- Coordinator MCP contract + setup: [`docs/bot-integration.md`](bot-integration.md), [`docs/hermes-mcp-bridge.md`](hermes-mcp-bridge.md), `packages/coding-agent/src/commands/harness.ts`\n- External-control readiness classification: [`docs/external-control-readiness.md`](external-control-readiness.md)\n- Web \"steering wheel\" remote (sibling roadmap, harness/bridge-backed): [`docs/gajae-remote.md`](gajae-remote.md)\n- Bridge transport / fail-closed posture (why no second protocol): [`docs/bridge.md`](bridge.md)\n- RPC command/response contract and error shapes: [`docs/rpc.md`](rpc.md)\n- v0 reference implementation (this contract, as a small companion service): [`packages/telegram-remote`](../packages/telegram-remote/README.md)\n\n## Rich messaging and push notifications (implemented)\n\nThe reference implementation adds optional rich messaging (default on) as a presentation +\nalternate-entry layer, without widening the action surface or transmitted-data allowlist:\n\n- HTML formatting and inline keyboards (**Observe** / **Stop** / **Refresh** / **Confirm stop** /\n **Cancel**), a `setMyCommands` Bot menu (`sessions`/`observe`/`stop`/`help`/`start`; the hyphenated\n `/start-session` cannot be registered and is documented in `/help`), and `/start` onboarding.\n- Callback queries are a new authenticated input channel that reuses the **same** default-deny\n authorization, the **same** `CoordinatorClient` → Coordinator MCP calls, and the **same**\n redaction. `callback_data` is an opaque `gtr:v1:<token>` (≤64 bytes, never the session id) backed\n by TTL-bound, chat/user-bound, single-use server-side token metadata holding the exact raw id.\n Every callback is answered; unauthorized/expired/malformed/missing-chat/replayed/cancel callbacks\n are answer-only (no chat message, no backend call).\n\n**Push notifications reuse the `gjc_coordinator_watch_events` event surface.** Follow/Mute state is gateway-owned and bounded; delivery preserves the transmitted-data allowlist and does not introduce a Telegram-side session journal or shadow notification protocol.\n\n## RPC mode (second backend)\n\nThe package also supports a second backend, `GJC_TELEGRAM_REMOTE_BACKEND=rpc`,\nfor a single persistent session. In this mode the gateway dials an existing\nowner-only UNIX socket exposed by `gjc launch --output rpc`; it never spawns,\nkills, or tears down the session. Telegram acts as an attach/detach remote\nkeyboard with `/attach`, `/detach`, `/status`, `/abort`, and `/help`/`/start`.\nCoordinator browsing commands (`/sessions`, `/observe`, `/presets`,\n`/start-session`, `/stop`) are not part of RPC mode and are rejected as unknown.\n\nRPC delivery is event-driven: agent questions and gates render as inline buttons,\nand turn completion sends only the final assistant text, HTML-escaped and\nTelegram-chunked. Session exit or liveness timeout produces exactly one\nstale-attachment alert. The socket's OS ownership is the real boundary: same-UID\nclients are fully trusted in v1, while protection targets different-UID users and\nunsafe filesystem placement. Controller ownership is last-connected-wins; a later\nUDS client becomes current, the gateway alerts once, reconnects, and resyncs.\nSee [`packages/telegram-remote/README.md`](../packages/telegram-remote/README.md)\nand [`packages/telegram-remote/.env.example`](../packages/telegram-remote/.env.example)\nfor RPC knobs.\n\n## Managed service examples\n\n`packages/telegram-remote/examples/systemd/` contains Linux systemd **user-unit** examples, which are the canonical always-on deployment path:\n\n- `gjc-telegram-remote-coordinator.service` runs the default Coordinator MCP backend as one bot service.\n- `gjc-rpc-session.service` runs the persistent `gjc launch --output rpc --listen <socket>` session.\n- `gjc-telegram-remote-rpc.service` runs the RPC Telegram bot, orders after and wants the session unit, and performs a finite socket wait because `After=` is not readiness.\n- `telegram-remote.env.example` is a service-oriented env template; copied real env files must be owner-only (`0600`).\n\n`packages/telegram-remote/examples/launchd/` contains macOS LaunchAgent parity examples. They use `com.example...` labels so operators replace the namespace, and shell wrappers source a protected env file then `exec` the real command. launchd parity cannot express systemd `Wants=`/`After=`/`BindsTo=` semantics; the RPC bot wrapper performs a finite socket wait.\n\nService examples are not a new daemon and do not change the runtime contract. Coordinator mode remains one service. RPC mode remains two services because the Telegram gateway dials an existing owner-only socket and never owns the session. The examples document the same-UID socket boundary, `last-connected-wins`, concrete socket paths, `loginctl enable-linger` for boot-start Linux user services, absolute executable paths or a service-local `PATH`, no token in units/plists/argv/logs, and install/uninstall verification.\n\n—\n*[repo owner's gaebal-gajae (clawdbot) 🦞]*\n",
|
|
64
63
|
"theme.md": "# Theming Reference\n\nThis document describes how theming works in the coding-agent today: schema, loading, runtime behavior, and failure modes.\n\n## What the theme system controls\n\nThe theme system drives:\n\n- foreground/background color tokens used across the TUI\n- markdown styling adapters (`getMarkdownTheme()`)\n- selector/editor/settings list adapters (`getSelectListTheme()`, `getEditorTheme()`, `getSettingsListTheme()`)\n- symbol preset + symbol overrides (`unicode`, `nerd`, `ascii`)\n- syntax highlighting colors used by native highlighter (`@gajae-code/natives`)\n- status line segment colors\n\nPrimary implementation: `src/modes/theme/theme.ts`.\n\n## Theme JSON shape\n\nTheme files are JSON objects validated against the runtime schema in `theme.ts` (`ThemeJsonSchema`) and mirrored by `src/modes/theme/theme-schema.json`.\n\nTop-level fields:\n\n- `name` (required)\n- `colors` (required; all color tokens required)\n- `vars` (optional; reusable color variables)\n- `export` (optional; HTML export colors)\n- `symbols` (optional)\n - `preset` (optional: `unicode | nerd | ascii`)\n - `overrides` (optional: key/value overrides for `SymbolKey`)\n\nColor values accept:\n\n- hex string (`\"#RRGGBB\"`)\n- 256-color index (`0..255`)\n- variable reference string (resolved through `vars`)\n- empty string (`\"\"`) meaning terminal default (`\\x1b[39m` fg, `\\x1b[49m` bg)\n\n## Required color tokens (current)\n\nAll tokens below are required in `colors`.\n\n### Core text and borders (11)\n\n`accent`, `border`, `borderAccent`, `borderMuted`, `success`, `error`, `warning`, `muted`, `dim`, `text`, `thinkingText`\n\n### Background blocks (7)\n\n`selectedBg`, `userMessageBg`, `customMessageBg`, `toolPendingBg`, `toolSuccessBg`, `toolErrorBg`, `statusLineBg`\n\n### Message/tool text (5)\n\n`userMessageText`, `customMessageText`, `customMessageLabel`, `toolTitle`, `toolOutput`\n\n### Markdown (10)\n\n`mdHeading`, `mdLink`, `mdLinkUrl`, `mdCode`, `mdCodeBlock`, `mdCodeBlockBorder`, `mdQuote`, `mdQuoteBorder`, `mdHr`, `mdListBullet`\n\n### Tool diff + syntax highlighting (12)\n\n`toolDiffAdded`, `toolDiffRemoved`, `toolDiffContext`,\n`syntaxComment`, `syntaxKeyword`, `syntaxFunction`, `syntaxVariable`, `syntaxString`, `syntaxNumber`, `syntaxType`, `syntaxOperator`, `syntaxPunctuation`\n\n### Mode/thinking borders (8)\n\n`thinkingOff`, `thinkingMinimal`, `thinkingLow`, `thinkingMedium`, `thinkingHigh`, `thinkingXhigh`, `bashMode`, `pythonMode`\n\n### Status line segment colors (14)\n\n`statusLineSep`, `statusLineModel`, `statusLinePath`, `statusLineGitClean`, `statusLineGitDirty`, `statusLineContext`, `statusLineSpend`, `statusLineStaged`, `statusLineDirty`, `statusLineUntracked`, `statusLineOutput`, `statusLineCost`, `statusLineSubagents`\n\n## Optional tokens\n\n### `export` section (optional)\n\nUsed for HTML export theming helpers:\n\n- `export.pageBg`\n- `export.cardBg`\n- `export.infoBg`\n\nIf omitted, export code derives defaults from resolved theme colors.\n\n### `symbols` section (optional)\n\n- `symbols.preset` sets a theme-level default symbol set.\n- `symbols.overrides` can override individual `SymbolKey` values.\n\nRuntime precedence:\n\n1. settings `symbolPreset` override (if set)\n2. theme JSON `symbols.preset`\n3. fallback `\"unicode\"`\n\nInvalid override keys are ignored and logged (`logger.debug`).\n\n## Built-in vs custom theme sources\n\nTheme lookup order (`loadThemeJson`):\n\n1. built-in embedded themes (`red-claw.json`, `blue-crab.json`, `claude-code.json`, `codex.json`, and `opencode.json` compiled into `defaultThemes`)\n2. custom theme file: `<customThemesDir>/<name>.json`\n\nCustom themes directory comes from `getCustomThemesDir()`:\n\n- default: `~/.gjc/agent/themes`\n- overridden by `GJC_CODING_AGENT_DIR` (`$GJC_CODING_AGENT_DIR/themes`)\n\n`getAvailableThemes()` returns merged built-in + custom names, sorted, with built-ins taking precedence on name collision.\n\n## Loading, validation, and resolution\n\nFor custom theme files:\n\n1. read JSON\n2. parse JSON\n3. validate against `ThemeJsonSchema`\n4. resolve `vars` references recursively\n5. convert resolved values to ANSI by terminal capability mode\n\nValidation behavior:\n\n- missing required color tokens: explicit grouped error message\n- bad token types/values: validation errors with JSON path\n- unknown theme file: `Theme not found: <name>`\n\nVar reference behavior:\n\n- supports nested references\n- throws on missing variable reference\n- throws on circular references\n\n## Terminal color mode behavior\n\nColor mode detection (`detectColorMode`):\n\n- `COLORTERM=truecolor|24bit` => truecolor\n- `WT_SESSION` => truecolor\n- `TERM` in `dumb`, `linux`, or empty => 256color\n- otherwise => truecolor\n\nConversion behavior:\n\n- hex -> `Bun.color(..., \"ansi-16m\" | \"ansi-256\")`\n- numeric -> `38;5` / `48;5` ANSI\n- `\"\"` -> default fg/bg reset\n\n## Runtime switching behavior\n\n### Initial theme (`initTheme`)\n\n`main.ts` initializes theme with settings:\n\n- `symbolPreset`\n- `colorBlindMode`\n- `theme.dark`\n- `theme.light`\n\nAuto theme slot selection uses terminal appearance in this order:\n\n1. terminal-reported OSC 11 background luminance, unless the macOS/Zellij fallback path is active\n2. `COLORFGBG` background index (`< 8` => dark, `>= 8` => light)\n3. macOS appearance fallback only for the known-broken macOS/Zellij OSC 11 path\n4. dark slot fallback\n\nBuilt-in theme note: `red-claw` is the default dark GJC theme, and `blue-crab` is the default light-slot theme. Both are crustacean brand themes with separate semantic error/warning/diff-removal tokens and crab-oriented symbol overrides. Three additional bundled migration themes — `claude-code`, `codex`, and `opencode` — mirror the look of those tools for easy eye-migration. All three are dark-classified and recommended for `theme.dark`, but are selectable in either slot; they keep GJC's default symbol identity (no crab-symbol overrides).\n\nCurrent defaults from settings schema:\n\n- `theme.dark = \"red-claw\"`\n- `theme.light = \"blue-crab\"`\n- `symbolPreset = \"unicode\"`\n- `colorBlindMode = false`\n\n### Explicit switching (`setTheme`)\n\n- loads selected theme\n- updates global `theme` singleton\n- optionally starts watcher\n- triggers `onThemeChange` callback\n\nOn failure:\n\n- falls back to built-in `dark`\n- returns `{ success: false, error }`\n\n### Preview switching (`previewTheme`)\n\n- applies temporary preview theme to global `theme`\n- does **not** change persisted settings by itself\n- returns success/error without fallback replacement\n\nThe settings theme picker is confirm-only; arrow-key browsing does not call `previewTheme`, so the rendered theme and displayed/persisted theme name stay aligned until Enter confirms a new selection.\n\n## Watchers and live reload\n\nWhen watcher is enabled (`setTheme(..., true)` / interactive init):\n\n- watches `<customThemesDir>/<currentTheme>.json` only when that file exists\n- built-ins are effectively not watched; built-in theme lookup also takes precedence over same-name custom files\n- matching file changes schedule a debounced reload; reload errors or temporary file absence keep the last successfully loaded theme\n- the watcher does not perform a delete/rename fallback; it waits for a future successful reload or explicit theme switch\n\nAuto mode also reevaluates dark/light slot mapping from terminal appearance changes, `SIGWINCH`, and the macOS fallback observer when active.\n\n## Color-blind mode behavior\n\n`colorBlindMode` changes only one token at runtime:\n\n- `toolDiffAdded` is HSV-adjusted (green shifted toward blue)\n- adjustment is applied only when resolved value is a hex string\n\nOther tokens are unchanged.\n\n## Where theme settings are persisted\n\nTheme-related settings are persisted by `Settings` to global config YAML:\n\n- path: `<agentDir>/config.yml`\n- default agent dir: `~/.gjc/agent`\n- effective default file: `~/.gjc/agent/config.yml`\n\nPersisted keys:\n\n- `theme.dark`\n- `theme.light`\n- `symbolPreset`\n- `colorBlindMode`\n\nLegacy migration exists: old flat `theme: \"name\"` is migrated to nested `theme.dark` or `theme.light` based on luminance detection; legacy built-in names `dark`/`light` map to `red-claw`/`blue-crab` unless matching custom theme files exist.\n\n## Creating a custom theme (practical)\n\n1. Create file in custom themes dir, e.g. `~/.gjc/agent/themes/my-theme.json`.\n2. Include `name`, optional `vars`, and **all required** `colors` tokens.\n3. Optionally include `symbols` and `export`.\n4. Select the theme in Settings (`Display -> Dark theme` or `Display -> Light theme`) depending on which auto slot you want. All bundled themes are selectable: the crustacean defaults `red-claw` and `blue-crab`, plus the migration themes `claude-code`, `codex`, and `opencode` (dark-classified, recommended for the dark slot but selectable in either).\n\nMinimal skeleton:\n\n```json\n{\n \"name\": \"my-theme\",\n \"vars\": {\n \"accent\": \"#7aa2f7\",\n \"muted\": 244\n },\n \"colors\": {\n \"accent\": \"accent\",\n \"border\": \"#4c566a\",\n \"borderAccent\": \"accent\",\n \"borderMuted\": \"muted\",\n \"success\": \"#9ece6a\",\n \"error\": \"#f7768e\",\n \"warning\": \"#e0af68\",\n \"muted\": \"muted\",\n \"dim\": 240,\n \"text\": \"\",\n \"thinkingText\": \"muted\",\n\n \"selectedBg\": \"#2a2f45\",\n \"userMessageBg\": \"#1f2335\",\n \"userMessageText\": \"\",\n \"customMessageBg\": \"#24283b\",\n \"customMessageText\": \"\",\n \"customMessageLabel\": \"accent\",\n \"toolPendingBg\": \"#1f2335\",\n \"toolSuccessBg\": \"#1f2d2a\",\n \"toolErrorBg\": \"#2d1f2a\",\n \"toolTitle\": \"\",\n \"toolOutput\": \"muted\",\n\n \"mdHeading\": \"accent\",\n \"mdLink\": \"accent\",\n \"mdLinkUrl\": \"muted\",\n \"mdCode\": \"#c0caf5\",\n \"mdCodeBlock\": \"#c0caf5\",\n \"mdCodeBlockBorder\": \"muted\",\n \"mdQuote\": \"muted\",\n \"mdQuoteBorder\": \"muted\",\n \"mdHr\": \"muted\",\n \"mdListBullet\": \"accent\",\n\n \"toolDiffAdded\": \"#9ece6a\",\n \"toolDiffRemoved\": \"#f7768e\",\n \"toolDiffContext\": \"muted\",\n\n \"syntaxComment\": \"#565f89\",\n \"syntaxKeyword\": \"#bb9af7\",\n \"syntaxFunction\": \"#7aa2f7\",\n \"syntaxVariable\": \"#c0caf5\",\n \"syntaxString\": \"#9ece6a\",\n \"syntaxNumber\": \"#ff9e64\",\n \"syntaxType\": \"#2ac3de\",\n \"syntaxOperator\": \"#89ddff\",\n \"syntaxPunctuation\": \"#9aa5ce\",\n\n \"thinkingOff\": 240,\n \"thinkingMinimal\": 244,\n \"thinkingLow\": \"#7aa2f7\",\n \"thinkingMedium\": \"#2ac3de\",\n \"thinkingHigh\": \"#bb9af7\",\n \"thinkingXhigh\": \"#f7768e\",\n\n \"bashMode\": \"#2ac3de\",\n \"pythonMode\": \"#bb9af7\",\n\n \"statusLineBg\": \"#16161e\",\n \"statusLineSep\": 240,\n \"statusLineModel\": \"#bb9af7\",\n \"statusLinePath\": \"#7aa2f7\",\n \"statusLineGitClean\": \"#9ece6a\",\n \"statusLineGitDirty\": \"#e0af68\",\n \"statusLineContext\": \"#2ac3de\",\n \"statusLineSpend\": \"#7dcfff\",\n \"statusLineStaged\": \"#9ece6a\",\n \"statusLineDirty\": \"#e0af68\",\n \"statusLineUntracked\": \"#f7768e\",\n \"statusLineOutput\": \"#c0caf5\",\n \"statusLineCost\": \"#ff9e64\",\n \"statusLineSubagents\": \"#bb9af7\"\n }\n}\n```\n\n## Testing custom themes\n\nUse this workflow:\n\n1. Start interactive mode (watcher enabled from startup).\n2. Open settings and confirm the custom theme in the dark/light theme picker; arrow-key browsing is intentionally non-mutating.\n3. For custom theme files, edit the JSON while running and confirm auto-reload on save.\n4. Exercise critical surfaces:\n - markdown rendering\n - tool blocks (pending/success/error)\n - diff rendering (added/removed/context)\n - status line readability\n - thinking level border changes\n - bash/python mode border colors\n5. Validate both symbol presets if your theme depends on glyph width/appearance.\n\n## Real constraints and caveats\n\n- All `colors` tokens are required for custom themes.\n- `export` and `symbols` are optional.\n- `$schema` in theme JSON is informational; runtime validation is enforced by a Zod schema in code.\n- `setTheme` failure falls back to `dark`; `previewTheme` failure does not replace current theme.\n- File watcher reload errors or temporary missing files keep the current loaded theme until a successful reload or explicit theme switch.\n",
|
|
65
64
|
"tools/ask.md": "# ask\n\n> Prompts the interactive user for one or more choices or free-form answers.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ask.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ask.md`\n- Key collaborators:\n - `packages/coding-agent/src/config/settings-schema.ts` — `ask.timeout` / `ask.notify` defaults\n - `packages/coding-agent/src/modes/theme/theme.ts` — checkbox and tree glyphs for TUI rendering\n - `packages/coding-agent/src/tui.ts` — status-line rendering\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `questions` | `Question[]` | Yes | One or more questions. Empty arrays are rejected by schema and also guarded at runtime. |\n\n### `Question`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `id` | `string` | Yes | Stable identifier used in multi-question results. |\n| `question` | `string` | Yes | Prompt text shown to the user. |\n| `options` | `{ label: string }[]` | Yes | Explicit options. The UI always appends `Other (type your own)`; callers must not include it. |\n| `multi` | `boolean` | No | Enables multi-select mode. Default: `false`. |\n| `recommended` | `number` | No | Zero-based recommended option index. In single-select mode the label gets ` (Recommended)` appended in the UI. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is plain text:\n - single question: `User selected: ...` and/or `User provided custom input: ...`\n - multiple questions: `User answers:` followed by one line per `id`\n- `details`:\n - single question: `{ question, options, multi, selectedOptions, customInput? }`\n - multiple questions: `{ results: QuestionResult[] }`, where each item includes `id`, `question`, `options`, `multi`, `selectedOptions`, and optional `customInput`\n- Cancellation and headless cases throw instead of returning a structured success result.\n\n## Flow\n1. `AskTool.createIf()` only registers the tool when `session.hasUI` is true; headless sessions never get it.\n2. `execute()` requires `context.ui`; if missing it aborts the context and throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n3. It reads `ask.timeout` from settings, converts seconds to milliseconds, and disables timeout entirely while plan mode is enabled (`packages/coding-agent/src/tools/ask.ts`).\n4. If `ask.notify` is not `off`, it sends a terminal notification: `Waiting for input`.\n5. For each question, `askSingleQuestion()` drives either:\n - single-select list + optional editor for `Other`\n - multi-select checkbox loop + `Done selecting` sentinel + optional editor for `Other`\n6. In multi-question mode, left/right arrow handlers enable back/forward navigation between questions and preserve prior selections.\n7. If a timeout fires before any selection/custom input, the tool auto-selects the recommended option, or the first option when no valid `recommended` index exists.\n8. If the user cancels without timeout, `execute()` aborts the tool context and throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n9. On success it formats human-readable text plus structured `details`; the TUI renderer uses `details` for rich display.\n\n## Modes / Variants\n- Single question: returns flattened `details` fields for one question.\n- Multiple questions: returns `details.results[]` and allows back/forward navigation across questions.\n- Single-select: one option or custom input.\n- Multi-select: toggled checkbox list, `Done selecting` sentinel only when forward navigation is not active.\n\n## Side Effects\n- User-visible prompts / interactive UI\n - Opens a selection dialog via `context.ui.select(...)`.\n - Opens a text editor dialog via `context.ui.editor(...)` for `Other`.\n - Sends a terminal notification unless `ask.notify=off`.\n- Session state\n - Reads plan-mode state to disable timeouts.\n - Calls `context.abort()` on headless use or user cancellation.\n- Background work / cancellation\n - Wraps UI waits in `untilAborted(...)` so abort signals interrupt pending dialogs.\n\n## Limits & Caps\n- `questions` must contain at least 1 item (`askSchema` in `packages/coding-agent/src/tools/ask.ts`).\n- `ask.timeout` default is `30` seconds; `0` disables timeout (`packages/coding-agent/src/config/settings-schema.ts`).\n- Prompt guidance says provide 2-5 options, but code does not enforce that (`packages/coding-agent/src/prompts/tools/ask.md`).\n- Timeout only applies to the option picker; once the user chooses `Other`, the editor has no timeout (`packages/coding-agent/src/prompts/tools/ask.md`).\n\n## Errors\n- Missing interactive UI: throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n- User cancels picker/editor without timeout: throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n- Abort signal during input: converted to `ToolAbortError(\"Ask input was cancelled\")`.\n- Empty `questions` at runtime returns a text error payload instead of throwing: `Error: questions must not be empty`.\n\n## Notes\n- `recommended` is only a UI hint; invalid indexes are ignored.\n- In single-select mode the returned `selectedOptions` value strips the appended ` (Recommended)` suffix.\n- Multi-select results preserve selection order by `Set` insertion order, not original option order after arbitrary toggles.\n- Option labels and prompt text are returned verbatim in `details`; the tool does not interpret them beyond UI affordances like `Other` and ` (Recommended)`.\n",
|
|
66
65
|
"tools/ast-edit.md": "# ast_edit\n\n> Preview and apply structural rewrites over source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-edit.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-edit.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native rewrite planning and file mutation\n - `crates/pi-natives/src/language/mod.rs` — language aliases and extension inference\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/resolve.ts` — preview/apply queueing\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number diff references\n - `packages/coding-agent/src/hashline/hash.ts` — stable hashline diff anchors\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `{ pat: string; out: string }[]` | Yes | One or more rewrite rules. `pat` must be non-empty. Duplicate `pat` values fail before native execution. Empty `out` deletes the matched node. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n\nShared AST pattern grammar and language catalog: see [`ast_grep`](./ast-grep.md#inputs).\n\n- `ast_edit` uses the same `$NAME`, `$_`, `$$$NAME`, and `$$$` metavariable semantics.\n- The tool prompt adds rewrite-specific constraints:\n - metavariable names must be uppercase and must stand for whole AST nodes,\n - captures from `pat` are substituted into `out`,\n - each rewrite is a 1:1 structural substitution; one capture cannot expand into multiple sibling nodes unless the grammar itself permits that expansion at that position.\n\n## Outputs\n- Single-shot preview result from `ast_edit` itself.\n- Model-facing `content` is one text block showing proposed edits, grouped by file for directory/multi-file runs.\n - Each change renders as two lines: `-REF|before` and `+REF|after` in hashline mode, or `-LINE:COLUMN before` / `+LINE:COLUMN after` when hashlines are off.\n - Only the first line of each `before`/`after` snippet is shown, truncated to 120 characters in the wrapper.\n - `Limit reached; narrow paths.` and formatted parse issues are appended when applicable.\n- If no rewrites match, text is `No replacements made` plus formatted parse issues when present.\n- `details` includes aggregate preview metadata:\n - `totalReplacements`, `filesTouched`, `filesSearched`, `applied`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileReplacements`, `displayContent`, `meta`\n- The tool always previews first (`applied: false` in the direct result). Actual file writes happen only later through `resolve(action: \"apply\", ...)`.\n- When preview produced replacements, `ast_edit` also queues a pending `resolve` action. Successful apply returns a separate `resolve` result, not another `ast_edit` result.\n\n## Flow\n1. `AstEditTool.execute()` validates each op in `packages/coding-agent/src/tools/ast-edit.ts`:\n - empty `pat` fails,\n - at least one op is required,\n - duplicate `pat` values fail,\n - ops are converted to a `Record<pattern, replacement>`.\n2. The wrapper reads `GJC_MAX_AST_FILES` via `$envpos(..., 1000)` and uses that as the native `maxFiles` cap for both preview and apply.\n3. Path normalization, internal URL handling, missing-path partitioning, and multi-path resolution follow the same `path-utils.ts` flow as `ast_grep`.\n4. The wrapper stats the resolved base path to decide whether to render grouped directory output.\n5. `runAstEditOnce(...)` always runs native `astEdit(...)` with `dryRun: true` and `failOnParseError: false` on the first pass.\n6. Native `ast_edit` in `crates/pi-natives/src/ast.rs`:\n - normalizes the rewrite map and sorts rules by pattern string,\n - resolves strictness (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers a single language for the whole call unless `lang` was supplied,\n - compiles every rewrite pattern for that language,\n - parses each file, skips files with syntax-error trees, collects `replace_by(...)` edits for every match, enforces replacement and file caps, and returns textual before/after slices plus source ranges.\n7. The TS wrapper deduplicates parse errors, groups changes by file, and renders preview diff lines.\n8. If preview found replacements and `applied` is false, `queueResolveHandler(...)` registers a forced `resolve` action and injects a `resolve-reminder` steering message.\n9. On `resolve(action: \"apply\")`, the queued callback reruns the same rewrite set with `dryRun: false`, recomputes counts, and rejects the apply as an error if the live result no longer matches the preview (`stalePreview`).\n10. On a non-stale apply, the callback returns `Applied N replacements in M files.`; on discard, `resolve` returns a discard message without mutating files.\n\n## Modes / Variants\n- Single file: preview or apply against one file.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router resolves them to a backing file path.\n- Preview mode: always the direct `ast_edit` tool result.\n- Apply mode: only reachable through the queued `resolve` callback after a preview.\n- Hashline output mode vs plain line/column mode: controlled by `resolveFileDisplayMode()`.\n\n## Side Effects\n- Filesystem\n - Preview reads files and scans directories.\n - Apply rewrites files in place with `std::fs::write(...)`, but only when the computed output differs from the original source.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Queues a one-shot forced `resolve` tool choice through `queueResolveHandler(...)`.\n - Adds a `resolve-reminder` steering message.\n- User-visible prompts / interactive UI\n - Direct `ast_edit` results are previews.\n - Follow-up apply/discard is exposed through the hidden `resolve` tool.\n- Background work / cancellation\n - Native preview/apply work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- File cap exposed by the wrapper: `GJC_MAX_AST_FILES`, default `1000`, in `packages/coding-agent/src/tools/ast-edit.ts`.\n- Native `maxFiles` and `maxReplacements` are both clamped to at least `1` when provided in `crates/pi-natives/src/ast.rs`.\n- The wrapper never sets `maxReplacements`; native behavior therefore defaults to effectively unbounded replacements for a run.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` is deduplicated but not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No separate glob-expansion count cap exists. Candidate count is whatever the resolved path/glob expands to after gitignore filtering, then native `maxFiles` stops mutations after the configured number of touched files.\n- Preview text truncates each rendered `before` and `after` first line to 120 characters in `packages/coding-agent/src/tools/ast-edit.ts`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, duplicate rewrite patterns, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - inability to infer one language across all candidates when `lang` is absent,\n - unsupported explicit `lang`,\n - bad glob compilation or unreadable search roots,\n - overlapping computed edits (`Overlapping replacements detected; refine pattern to avoid ambiguous edits`),\n - out-of-bounds edit ranges or non-UTF-8 replacement text,\n - write failures during apply,\n - cancellation or timeout.\n- With `failOnParseError: false` (the wrapper always uses this), pattern compile failures and file parse failures become `parseErrors` instead of aborting the whole run.\n- If every rewrite pattern fails to compile, native `ast_edit` returns a successful zero-replacement result with `parseErrors` populated.\n- Files containing tree-sitter error nodes are skipped for rewriting; they do not get partial edits.\n- Apply can fail after a successful preview if the preview becomes stale. The resolve callback compares replacement totals and per-file counts and returns an error result rather than applying a mismatched preview silently.\n\n## Notes\n- `ast_edit` does not expose the native `lang`, `strictness`, `selector`, `maxReplacements`, `failOnParseError`, or `timeoutMs` fields to the model. The runtime fixes the call shape to a preview-first, smart-strictness, best-effort parse mode.\n- Because the wrapper does not expose `lang`, mixed-language rewrites only succeed when every candidate infers to the same canonical language. This is stricter than `ast_grep`.\n- Idempotency is not enforced syntactically. A rewrite like `foo($A) -> foo($A)` previews zero changes because output equals input; a rewrite that keeps matching its own output may still produce replacements on repeated calls.\n- Rewrites are accumulated per file, then applied from the end of the file backward after an overlap check. Independent matches can coexist; overlapping matches abort the run.\n- Native rewrite rule order is by pattern-string sort, not by the original `ops` array order, because `normalize_rewrite_map(...)` sorts the `(pattern, rewrite)` pairs.\n- Preview/apply parity is validated only by totals and per-file counts, not by a byte-for-byte diff of every replacement payload.",
|