@gajae-code/coding-agent 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/dist/types/commands/ultragoal.d.ts +1 -0
  3. package/dist/types/config/settings-schema.d.ts +11 -3
  4. package/dist/types/gjc-runtime/launch-tmux.d.ts +1 -0
  5. package/dist/types/gjc-runtime/ultragoal-runtime.d.ts +29 -0
  6. package/dist/types/harness-control-plane/finalize.d.ts +8 -0
  7. package/dist/types/harness-control-plane/receipts.d.ts +16 -1
  8. package/dist/types/harness-control-plane/types.d.ts +9 -1
  9. package/dist/types/reminders/star-reminder.d.ts +115 -0
  10. package/dist/types/session/agent-session.d.ts +18 -0
  11. package/examples/extensions/README.md +20 -41
  12. package/package.json +7 -7
  13. package/src/cli/grep-cli.ts +1 -1
  14. package/src/commands/harness.ts +42 -3
  15. package/src/commands/ultragoal.ts +1 -0
  16. package/src/config/settings-schema.ts +13 -3
  17. package/src/defaults/gjc/skills/team/SKILL.md +10 -1
  18. package/src/defaults/gjc/skills/ultragoal/SKILL.md +3 -2
  19. package/src/gjc-runtime/launch-tmux.ts +25 -2
  20. package/src/gjc-runtime/team-runtime.ts +78 -3
  21. package/src/gjc-runtime/ultragoal-guard.ts +18 -2
  22. package/src/gjc-runtime/ultragoal-runtime.ts +240 -30
  23. package/src/harness-control-plane/finalize.ts +84 -0
  24. package/src/harness-control-plane/owner.ts +13 -0
  25. package/src/harness-control-plane/receipts.ts +39 -1
  26. package/src/harness-control-plane/types.ts +25 -1
  27. package/src/internal-urls/docs-index.generated.ts +1 -1
  28. package/src/modes/interactive-mode.ts +26 -0
  29. package/src/reminders/star-reminder.ts +422 -0
  30. package/src/session/agent-session.ts +79 -13
@@ -17,10 +17,13 @@ import {
17
17
  type CompletionEvidence,
18
18
  type ReceiptEnvelope,
19
19
  type ReceiptSubject,
20
+ type ReviewFailureEvidence,
21
+ type ReviewVerdictEvidence,
20
22
  type ValidationEvidence,
21
23
  validateReceipt,
22
24
  } from "./receipts";
23
25
  import { readReceiptIndex, writeReceiptImmutable } from "./storage";
26
+ import { isReviewVerdict, type ReviewVerdict } from "./types";
24
27
 
25
28
  export interface ValidationCommandSpec {
26
29
  name: string;
@@ -49,6 +52,12 @@ export interface FinalizeOptions {
49
52
  requireTests?: boolean;
50
53
  requireCommit?: boolean;
51
54
  requirePr?: boolean;
55
+ /** Review-only sessions produce a terminal verdict instead of implementation validation. */
56
+ reviewOnly?: boolean;
57
+ /** Operator/loop-supplied terminal review verdict (closed vocabulary). */
58
+ verdict?: string | null;
59
+ /** Bounded PR/issue reference for the review target (e.g. "PR-414"). Never resolved from the live repo. */
60
+ prTarget?: string | null;
52
61
  validationCommands?: ValidationCommandSpec[];
53
62
  checks: FinalizeChecks;
54
63
  clock?: () => number;
@@ -60,6 +69,7 @@ export interface FinalizeResult {
60
69
  validation: { name: string; valid: boolean; exitStatus: number }[];
61
70
  commitHash: string | null;
62
71
  prUrl: string | null;
72
+ verdict?: ReviewVerdict | null;
63
73
  issueArtifact: string | null;
64
74
  blockers: string[];
65
75
  }
@@ -69,6 +79,8 @@ function receiptId(prefix: string): string {
69
79
  }
70
80
 
71
81
  export async function runFinalize(opts: FinalizeOptions): Promise<FinalizeResult> {
82
+ if (opts.reviewOnly) return runReviewFinalize(opts);
83
+
72
84
  const now = () => new Date(opts.clock ? opts.clock() : Date.now()).toISOString();
73
85
  const blockers: string[] = [];
74
86
  const validation: FinalizeResult["validation"] = [];
@@ -178,6 +190,78 @@ export async function runFinalize(opts: FinalizeOptions): Promise<FinalizeResult
178
190
  };
179
191
  }
180
192
 
193
+ /**
194
+ * Review-only finalizer: produces a terminal verdict receipt (no implementation validation,
195
+ * no commit/PR resolution) when a valid, autonomous verdict is supplied; otherwise writes a
196
+ * durable, bounded `review-failure` receipt suitable for fallback routing.
197
+ *
198
+ * It never *resolves* PR/commit metadata from the live repo; the only PR reference attached is
199
+ * the session's own declared review target (`prTarget`), so a review session cannot report an
200
+ * unrelated PR resolved from the current checkout.
201
+ *
202
+ * `OWNER_CONFIRMATION_REQUIRED` is a valid verdict but is NOT an autonomous success: it is
203
+ * recorded durably yet returns `completed: false` with an `owner-confirmation-required` blocker
204
+ * so downstream routing escalates to a human instead of treating it as merge-ready.
205
+ */
206
+ async function runReviewFinalize(opts: FinalizeOptions): Promise<FinalizeResult> {
207
+ const now = () => new Date(opts.clock ? opts.clock() : Date.now()).toISOString();
208
+ const prTarget = opts.prTarget ?? null;
209
+ const subject: ReceiptSubject = { workspace: opts.workspace, branch: opts.branch, head: null, commit: null };
210
+ const baseResult: Omit<FinalizeResult, "completed" | "receiptPath" | "verdict" | "blockers"> = {
211
+ validation: [],
212
+ commitHash: null,
213
+ prUrl: null,
214
+ issueArtifact: null,
215
+ };
216
+
217
+ if (!isReviewVerdict(opts.verdict)) {
218
+ const reason = opts.verdict == null ? "review-verdict-missing" : "review-verdict-invalid";
219
+ const failure: ReviewFailureEvidence = { reason, prTarget, failedAt: now(), fallback: "operator-or-omx-review" };
220
+ const receipt = buildReceipt<ReviewFailureEvidence>({
221
+ receiptId: receiptId("revfail"),
222
+ sessionId: opts.sessionId,
223
+ family: "review-failure",
224
+ source: "finalizer",
225
+ subject,
226
+ evidence: failure,
227
+ createdAt: now(),
228
+ });
229
+ const outcome = validateReceipt(receipt);
230
+ const entry = await writeReceiptImmutable(
231
+ opts.root,
232
+ opts.sessionId,
233
+ "review-failure",
234
+ receipt.receiptId,
235
+ receipt,
236
+ );
237
+ const blockers = outcome.valid ? [reason] : [reason, ...outcome.reasons];
238
+ return { ...baseResult, completed: false, receiptPath: entry.path, verdict: null, blockers };
239
+ }
240
+
241
+ const verdict = opts.verdict as ReviewVerdict;
242
+ const evidence: ReviewVerdictEvidence = {
243
+ verdict,
244
+ prTarget,
245
+ finalizedAt: now(),
246
+ summaryRef: typeof opts.prTarget === "string" ? `verdict:${verdict}@${opts.prTarget}` : `verdict:${verdict}`,
247
+ };
248
+ const receipt = buildReceipt<ReviewVerdictEvidence>({
249
+ receiptId: receiptId("verdict"),
250
+ sessionId: opts.sessionId,
251
+ family: "review-verdict",
252
+ source: "finalizer",
253
+ subject,
254
+ evidence,
255
+ createdAt: now(),
256
+ });
257
+ const outcome = validateReceipt(receipt);
258
+ const entry = await writeReceiptImmutable(opts.root, opts.sessionId, "review-verdict", receipt.receiptId, receipt);
259
+ // A confirmation-required verdict is recorded but never an autonomous success.
260
+ const humanActionRequired = verdict === "OWNER_CONFIRMATION_REQUIRED";
261
+ const completed = outcome.valid && !humanActionRequired;
262
+ const blockers = !outcome.valid ? outcome.reasons : humanActionRequired ? ["owner-confirmation-required"] : [];
263
+ return { ...baseResult, completed, receiptPath: entry.path, verdict, blockers };
264
+ }
181
265
  function git(workspace: string, args: string[]): string | null {
182
266
  try {
183
267
  return execFileSync("git", args, {
@@ -359,6 +359,14 @@ export class RuntimeOwner {
359
359
 
360
360
  async #validate(): Promise<PrimitiveResponse> {
361
361
  const state = await this.#loadState();
362
+ if (state.handle.mode === "review") {
363
+ // Review-only sessions do not run implementation validation and never attach PR metadata.
364
+ state.lifecycle = "validating";
365
+ state.updatedAt = new Date(this.#opts.clock ? this.#opts.clock() : Date.now()).toISOString();
366
+ await writeSessionState(this.#opts.root, state);
367
+ await this.#emit("info", "validated", { count: 0, reviewOnly: true });
368
+ return this.#response(state, { validation: [], reviewOnly: true });
369
+ }
362
370
  const checks = this.#finalizeChecks ?? defaultFinalizeChecks(state.handle.workspace);
363
371
  const commit = await checks.resolveCommit();
364
372
  const subject: ReceiptSubject = {
@@ -495,11 +503,15 @@ export class RuntimeOwner {
495
503
  const state = await this.#loadState();
496
504
  const workspace = state.handle.workspace;
497
505
  const checks = this.#finalizeChecks ?? defaultFinalizeChecks(workspace);
506
+ const reviewOnly = state.handle.mode === "review";
498
507
  const fin = await runFinalize({
499
508
  root: this.#opts.root,
500
509
  sessionId: this.#opts.sessionId,
501
510
  workspace,
502
511
  branch: state.handle.branch ?? "",
512
+ reviewOnly,
513
+ verdict: reviewOnly ? (typeof input.verdict === "string" ? input.verdict : null) : undefined,
514
+ prTarget: reviewOnly ? state.handle.issueOrPr : undefined,
503
515
  requireTests: input.requireTests !== false,
504
516
  requireCommit: input.requireCommit !== false,
505
517
  requirePr: input.requirePr !== false,
@@ -514,6 +526,7 @@ export class RuntimeOwner {
514
526
  await this.#emit(fin.completed ? "info" : "critical", "finalized", {
515
527
  completed: fin.completed,
516
528
  blockers: fin.blockers,
529
+ ...(reviewOnly ? { verdict: fin.verdict ?? null, reviewOnly: true } : {}),
517
530
  });
518
531
  return this.#response(state, { finalize: fin }, fin.completed);
519
532
  }
@@ -13,7 +13,13 @@
13
13
  * - completion the finalize gate: receipt-valid + commit + PR/issue + validations
14
14
  */
15
15
  import { createHash } from "node:crypto";
16
- import type { GitDelta, ReceiptFamily, RecoveryClassification } from "./types";
16
+ import {
17
+ type GitDelta,
18
+ isReviewVerdict,
19
+ type ReceiptFamily,
20
+ type RecoveryClassification,
21
+ type ReviewVerdict,
22
+ } from "./types";
17
23
 
18
24
  export interface ReceiptSubject {
19
25
  workspace: string;
@@ -144,6 +150,23 @@ export interface CompletionEvidence {
144
150
  blockers: string[];
145
151
  }
146
152
 
153
+ export interface ReviewVerdictEvidence {
154
+ verdict: ReviewVerdict;
155
+ prTarget: string | null;
156
+ finalizedAt: string;
157
+ /** Bounded summary code/reference for the verdict; never raw assistant text. */
158
+ summaryRef: string | null;
159
+ }
160
+
161
+ export interface ReviewFailureEvidence {
162
+ /** Machine-actionable reason the review produced no terminal verdict. */
163
+ reason: string;
164
+ prTarget: string | null;
165
+ failedAt: string;
166
+ /** Routing hint for the operator/fallback path. */
167
+ fallback: string;
168
+ }
169
+
147
170
  function validateFamily(receipt: ReceiptEnvelope<unknown>): string[] {
148
171
  switch (receipt.family) {
149
172
  case "vanish":
@@ -154,6 +177,10 @@ function validateFamily(receipt: ReceiptEnvelope<unknown>): string[] {
154
177
  return validateValidation(receipt.evidence as ValidationEvidence);
155
178
  case "completion":
156
179
  return validateCompletion(receipt.evidence as CompletionEvidence);
180
+ case "review-verdict":
181
+ return validateReviewVerdict(receipt.evidence as ReviewVerdictEvidence);
182
+ case "review-failure":
183
+ return validateReviewFailure(receipt.evidence as ReviewFailureEvidence);
157
184
  default:
158
185
  return [`unknown-family:${receipt.family}`];
159
186
  }
@@ -206,6 +233,17 @@ function validateCompletion(e: CompletionEvidence): string[] {
206
233
  return reasons;
207
234
  }
208
235
 
236
+ function validateReviewVerdict(e: ReviewVerdictEvidence): string[] {
237
+ if (!e) return ["review-verdict-missing-evidence"];
238
+ if (!isReviewVerdict(e.verdict)) return ["review-verdict-not-in-vocabulary"];
239
+ return [];
240
+ }
241
+
242
+ function validateReviewFailure(e: ReviewFailureEvidence): string[] {
243
+ if (!e || typeof e.reason !== "string" || e.reason.length === 0) return ["review-failure-missing-reason"];
244
+ return [];
245
+ }
246
+
209
247
  /** Classifications that MUST have a valid `vanish` receipt before the action proceeds. */
210
248
  export function requiresVanishBeforeAction(classification: RecoveryClassification): boolean {
211
249
  return (
@@ -11,6 +11,22 @@
11
11
  /** Harnesses the control plane can operate. v1 implements `gajae-code` only. */
12
12
  export type Harness = "gajae-code" | "codex" | "omx";
13
13
 
14
+ /** Operating mode of a session. `implement` builds/changes code; `review` produces a read-only verdict. */
15
+ export type SessionMode = "implement" | "review";
16
+
17
+ /** Closed vocabulary of terminal review verdicts a review-only session may emit. */
18
+ export type ReviewVerdict = "APPROVE_MERGE_READY" | "REQUEST_CHANGES" | "OWNER_CONFIRMATION_REQUIRED";
19
+
20
+ export const REVIEW_VERDICTS: readonly ReviewVerdict[] = [
21
+ "APPROVE_MERGE_READY",
22
+ "REQUEST_CHANGES",
23
+ "OWNER_CONFIRMATION_REQUIRED",
24
+ ];
25
+
26
+ export function isReviewVerdict(value: unknown): value is ReviewVerdict {
27
+ return typeof value === "string" && (REVIEW_VERDICTS as readonly string[]).includes(value);
28
+ }
29
+
14
30
  /** Lifecycle states of an operated session. */
15
31
  export type HarnessLifecycle =
16
32
  | "new"
@@ -44,7 +60,13 @@ export type RecoveryClassification =
44
60
  | "human-check";
45
61
 
46
62
  /** Receipt families persisted under the session storage dir. */
47
- export type ReceiptFamily = "vanish" | "prompt-acceptance" | "validation" | "completion";
63
+ export type ReceiptFamily =
64
+ | "vanish"
65
+ | "prompt-acceptance"
66
+ | "validation"
67
+ | "completion"
68
+ | "review-verdict"
69
+ | "review-failure";
48
70
 
49
71
  /** The CLI verbs / primitives exposed by `gjc harness <verb>`. */
50
72
  export type HarnessVerb =
@@ -95,6 +117,8 @@ export interface PrimitiveResponse<E = Record<string, unknown>> {
95
117
  export interface SessionHandle {
96
118
  sessionId: string;
97
119
  harness: Harness;
120
+ /** Operating mode; absent on legacy records means `implement`. */
121
+ mode?: SessionMode;
98
122
  repo: string | null;
99
123
  workspace: string;
100
124
  branch: string | null;
@@ -20,7 +20,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
20
20
  "keybindings.md": "# Keybindings\n\nRun `/hotkeys` inside an `gjc` session to see the active chords for your current build. The list reflects any remaps loaded from disk and any bindings added by extensions.\n\n## Customize keybindings\n\nUser remaps live in `~/.gjc/agent/keybindings.json`. The file is a JSON object whose keys are keybinding action IDs and whose values are either one chord string or an array of chord strings. It is not read from `~/.gjc/agent/config.yml`, and there is no nested `keybindings` object.\n\n```json\n{\n \"app.model.cycleForward\": \"Ctrl+P\",\n \"app.model.selectTemporary\": \"Alt+P\",\n \"app.plan.toggle\": \"Alt+Shift+P\"\n}\n```\n\nChord names are case-insensitive and use the same notation shown in the UI, such as `Ctrl+P`, `Alt+Shift+P`, `Shift+Enter`, and `Ctrl+Backspace`.\n\nSet an action to an empty array to disable it:\n\n```json\n{\n \"app.stt.toggle\": []\n}\n```\n\n## Common action IDs\n\n| Action ID | Default | Meaning |\n| --- | --- | --- |\n| `app.model.cycleForward` | `Ctrl+P` | Cycle role models forward |\n| `app.model.cycleBackward` | `Shift+Ctrl+P` | Cycle role models backward |\n| `app.model.selectTemporary` | `Alt+P` | Pick a model temporarily for this session |\n| `app.model.select` | `Ctrl+L` | Open the model selector and set roles |\n| `app.plan.toggle` | `Alt+Shift+P` | Toggle plan mode |\n| `app.history.search` | `Ctrl+R` | Search prompt history |\n| `app.tools.expand` | `Ctrl+O` | Toggle tool-output expansion |\n| `app.thinking.toggle` | `Ctrl+T` | Toggle thinking-block visibility |\n| `app.thinking.cycle` | `Shift+Tab` | Cycle thinking level |\n| `app.editor.external` | `Ctrl+G` | Edit the draft in `$VISUAL` / `$EDITOR` |\n| `app.message.followUp` | `Ctrl+Enter` | Queue a follow-up message |\n| `app.message.dequeue` | `Alt+Up` | Dequeue a queued message back into the editor |\n| `app.clipboard.copyLine` | `Alt+Shift+L` | Copy the current line |\n| `app.clipboard.copyPrompt` | `Alt+Shift+C` | Copy the whole prompt |\n| `app.stt.toggle` | `Alt+H` | Toggle speech-to-text recording |\n\nOlder unqualified action names are migrated when `keybindings.json` is loaded, but new docs and new configs should use the namespaced action IDs above.\n",
21
21
  "lsp-config.md": "# LSP configuration in GJC\n\nThis guide explains how to configure language servers for the GJC coding agent.\n\nSource of truth in code:\n\n- Server config type: `packages/coding-agent/src/lsp/types.ts` (`ServerConfig`)\n- Config loader: `packages/coding-agent/src/lsp/config.ts`\n- Built-in server definitions: `packages/coding-agent/src/lsp/defaults.json`\n\n## Auto-detection\n\nWhen no LSP config file is present, GJC auto-detects servers by intersecting two conditions:\n\n1. The project directory contains at least one of the server's `rootMarkers`.\n2. The server binary is available — checked in project-local bin directories first (e.g., `node_modules/.bin/`, `.venv/bin/`), then `$PATH`.\n\nNo configuration is required for common setups. The built-in server list covers most popular languages; see [`defaults.json`](../packages/coding-agent/src/lsp/defaults.json) for the full set.\n\n## Config file locations\n\nGJC merges LSP config from multiple files, lowest to highest priority:\n\n| Priority | Location |\n|----------|----------|\n| 5 (lowest) | `~/lsp.json`, `~/.lsp.json`, `~/lsp.yaml`, `~/.lsp.yaml` |\n| 3 | `~/.gjc/agent/lsp.json`, `~/.gjc/agent/lsp.yaml`, `~/.gemini/lsp.*` |\n| 2 | `<project>/.gjc/lsp.json`, `<project>/.gjc/lsp.yaml`, `<project>/.gemini/lsp.*` |\n| 1 (highest) | `<project>/lsp.json`, `<project>/.lsp.json`, `<project>/lsp.yaml` |\n\nEach location accepts both `.json` and `.yaml` / `.yml` variants, as well as hidden-file versions (`.lsp.json`, `.lsp.yaml`). Files are merged in order: higher-priority files override lower-priority fields for the same server. Servers not mentioned in any override file remain at their built-in defaults.\n\n**Recommended locations:**\n\n- User-wide preferences → `~/.gjc/agent/lsp.json`\n- Project-specific overrides → `<project>/.gjc/lsp.json`\n\n> **Note:** The presence of any LSP config file disables auto-detection. When at least one file is found, GJC skips the binary-scan phase and loads all servers that have matching `rootMarkers`, an available binary, and are not explicitly `disabled`.\n\n## File shape\n\nBoth JSON and YAML are accepted. The top-level object can use either a `servers` wrapper key or a flat map directly:\n\n```json\n{\n \"servers\": {\n \"server-name\": { ... }\n },\n \"idleTimeoutMs\": 300000\n}\n```\n\nor (flat, without the `servers` wrapper):\n\n```json\n{\n \"server-name\": { ... },\n \"idleTimeoutMs\": 300000\n}\n```\n\nTop-level keys:\n\n- `servers` — map of server name to `ServerConfig` (optional wrapper; flat form is equivalent)\n- `idleTimeoutMs` — shut down idle language servers after this many milliseconds; disabled by default\n\n## ServerConfig fields\n\n| Field | Type | Required | Description |\n|-------|------|----------|-------------|\n| `command` | `string` | yes | Binary name (resolved via PATH/local bins) or absolute path |\n| `args` | `string[]` | no | Arguments passed to the binary |\n| `fileTypes` | `string[]` | yes | File extensions this server handles, e.g. `[\".ts\", \".tsx\"]` |\n| `rootMarkers` | `string[]` | yes | Files/dirs that indicate a project root; glob patterns (e.g. `*.cabal`) are supported |\n| `initOptions` | `object` | no | Sent as `initializationOptions` during LSP handshake |\n| `settings` | `object` | no | Workspace settings pushed via `workspace/didChangeConfiguration` |\n| `disabled` | `boolean` | no | Set to `true` to disable this server entirely |\n| `warmupTimeoutMs` | `number` | no | Startup timeout in ms for this server (overrides the global default) |\n| `isLinter` | `boolean` | no | Mark server as linter/formatter only; excluded from type-intelligence operations (hover, go-to-definition, etc.) |\n| `capabilities` | `object` | no | Opt-in server-specific features; see [Capabilities](#capabilities) |\n\n`resolvedCommand` is populated automatically at runtime — do not set it manually.\n\n### Capabilities\n\nThe `capabilities` object enables optional server-specific features that GJC supports on a per-server basis:\n\n```json\n{\n \"capabilities\": {\n \"flycheck\": true,\n \"ssr\": true,\n \"expandMacro\": true,\n \"runnables\": true,\n \"relatedTests\": true\n }\n}\n```\n\nAll fields are boolean and optional. They are currently used by `rust-analyzer`.\n\n## Common recipes\n\n### Override a built-in server's settings\n\nPartial overrides are merged onto the built-in defaults. You only need to specify the fields you want to change.\n\n```json\n{\n \"servers\": {\n \"typescript-language-server\": {\n \"args\": [\"--stdio\", \"--log-level\", \"4\"]\n }\n }\n}\n```\n\n```yaml\nservers:\n gopls:\n settings:\n gopls:\n gofumpt: false\n staticcheck: false\n```\n\n### Disable a built-in server\n\n```json\n{\n \"servers\": {\n \"eslint\": {\n \"disabled\": true\n }\n }\n}\n```\n\n### Register a custom server\n\nNew servers require `command`, `fileTypes`, and `rootMarkers`. All other fields are optional.\n\n```json\n{\n \"servers\": {\n \"my-lsp\": {\n \"command\": \"my-lsp-server\",\n \"args\": [\"--stdio\"],\n \"fileTypes\": [\".xyz\"],\n \"rootMarkers\": [\".xyz-project\", \".git\"]\n }\n }\n}\n```\n\n### Set a global idle timeout\n\nShut down language servers that have been inactive for more than five minutes:\n\n```json\n{\n \"idleTimeoutMs\": 300000\n}\n```\n\n### Disable a server for one project, keep it globally\n\nPlace the override in `<project>/.gjc/lsp.json`:\n\n```json\n{\n \"servers\": {\n \"pylsp\": {\n \"disabled\": true\n }\n }\n}\n```\n\nThe user-level config in `~/.gjc/agent/lsp.json` is unaffected; pylsp is only suppressed in this project.\n\n## Built-in server list\n\nThe following servers ship in `defaults.json` and are eligible for auto-detection:\n\n| Server key | Language(s) | Binary |\n|---|---|---|\n| `rust-analyzer` | Rust | `rust-analyzer` |\n| `clangd` | C, C++, ObjC | `clangd` |\n| `zls` | Zig | `zls` |\n| `gopls` | Go | `gopls` |\n| `typescript-language-server` | TypeScript, JavaScript | `typescript-language-server` |\n| `denols` | TypeScript, JavaScript (Deno) | `deno` |\n| `biome` | TS/JS/JSON (linter) | `biome` |\n| `eslint` | TS/JS/Vue/Svelte (linter) | `vscode-eslint-language-server` |\n| `vscode-html-language-server` | HTML | `vscode-html-language-server` |\n| `vscode-css-language-server` | CSS, SCSS, Less | `vscode-css-language-server` |\n| `vscode-json-language-server` | JSON | `vscode-json-language-server` |\n| `tailwindcss` | HTML, CSS, TS/JS | `tailwindcss-language-server` |\n| `svelte` | Svelte | `svelteserver` |\n| `vue-language-server` | Vue | `vue-language-server` |\n| `astro` | Astro | `astro-ls` |\n| `pyright` | Python | `pyright-langserver` |\n| `basedpyright` | Python | `basedpyright-langserver` |\n| `pylsp` | Python | `pylsp` |\n| `ruff` | Python (linter) | `ruff` |\n| `jdtls` | Java | `jdtls` |\n| `kotlin-lsp` | Kotlin | `kotlin-lsp` |\n| `metals` | Scala | `metals` |\n| `hls` | Haskell | `haskell-language-server-wrapper` |\n| `ocamllsp` | OCaml | `ocamllsp` |\n| `elixirls` | Elixir | `elixir-ls` |\n| `erlangls` | Erlang | `erlang_ls` |\n| `gleam` | Gleam | `gleam` |\n| `solargraph` | Ruby | `solargraph` |\n| `ruby-lsp` | Ruby | `ruby-lsp` |\n| `rubocop` | Ruby (linter) | `rubocop` |\n| `bashls` | Bash, Zsh | `bash-language-server` |\n| `lua-language-server` | Lua | `lua-language-server` |\n| `intelephense` | PHP | `intelephense` |\n| `phpactor` | PHP | `phpactor` |\n| `omnisharp` | C# | `omnisharp` |\n| `yamlls` | YAML | `yaml-language-server` |\n| `terraformls` | Terraform | `terraform-ls` |\n| `dockerls` | Dockerfile | `docker-langserver` |\n| `helm-ls` | Helm | `helm_ls` |\n| `nixd` | Nix | `nixd` |\n| `nil` | Nix | `nil` |\n| `ols` | Odin | `ols` |\n| `dartls` | Dart | `dart` |\n| `marksman` | Markdown | `marksman` |\n| `texlab` | LaTeX | `texlab` |\n| `graphql` | GraphQL | `graphql-lsp` |\n| `prismals` | Prisma | `prisma-language-server` |\n| `vimls` | Vim script | `vim-language-server` |\n| `emmet-language-server` | HTML, CSS, JSX | `emmet-language-server` |\n| `sourcekit-lsp` | Swift | `sourcekit-lsp` |\n| `swiftlint` | Swift (linter) | `swiftlint` |\n| `tlaplus` | TLA+ | `tlapm_lsp` |\n",
22
22
  "memory.md": "# Autonomous Memory\n\nWhen enabled, the agent automatically extracts durable knowledge from past sessions and injects a compact summary into each new session. Over time it builds a project-scoped memory store — technical decisions, recurring workflows, pitfalls — that carries forward without manual effort.\n\nDisabled by default. Enable via `/settings` or `config.yml`:\n\n```yaml\nmemories:\n enabled: true\n```\n\n## Usage\n\n### What gets injected\n\nAt session start, if a memory summary exists for the current project, it is injected into the system prompt as a **Memory Guidance** block. The agent is instructed to:\n\n- Treat memory as heuristic context — useful for process and prior decisions, not authoritative on current repo state.\n- Pair memory-influenced decisions with current-repo evidence before acting.\n- Prefer repo state and user instruction when they conflict with memory; treat conflicting memory as stale.\n\n### Memory artifacts\n\nGenerated local-memory artifacts are private runtime state, not a public tool or URI surface. They may be summarized into the system prompt when local memory is enabled, but users and model-facing tool docs should not rely on direct `memory://` reads. The legacy internal `memory://` resolver remains only for compatibility with existing persisted guidance and is not part of the public coding harness contract; remove it after legacy local-memory prompts no longer reference it.\n### `/memory` slash command\n\n| Subcommand | Effect |\n| --------------------- | ---------------------------------------------- |\n| `view` | Show the current memory injection payload |\n| `clear` / `reset` | Delete all memory data and generated artifacts |\n| `enqueue` / `rebuild` | Force consolidation to run at next startup |\n\n## How it works\n\nMemories are built by a background pipeline that runs at startup or when manually triggered via slash command.\n\n**Phase 1 — per-session extraction:** For each past session that has changed since it was last processed, a model reads the session history and extracts durable signal: technical decisions, constraints, resolved failures, recurring workflows. Sessions that are too recent, too old, or currently active are skipped. Each extraction produces a raw memory block and a short synopsis for that session.\n\n**Phase 2 — consolidation:** After extraction, a second model pass reads all per-session extractions and produces three outputs written to disk:\n\n- `MEMORY.md` — a curated long-term memory document\n- `memory_summary.md` — the compact text injected at session start\n- `skills/` — reusable procedural playbooks, each in its own subdirectory\n\nPhase 2 uses a lease to prevent double-running when multiple processes start simultaneously. Stale skill directories from prior runs are pruned automatically.\n\nAll output is scanned for secrets before being written to disk.\n\n### Extraction behavior\n\nMemory extraction and consolidation behavior is driven by static prompt files in `packages/coding-agent/src/prompts/memories/`.\n\n| File | Purpose | Variables |\n| --------------------- | ------------------------------------------- | ------------------------------------------- |\n| `stage_one_system.md` | System prompt for per-session extraction | — |\n| `stage_one_input.md` | User-turn template wrapping session content | `{{thread_id}}`, `{{response_items_json}}` |\n| `consolidation.md` | Prompt for cross-session consolidation | `{{raw_memories}}`, `{{rollout_summaries}}` |\n| `read_path.md` | Memory guidance injected into live sessions | `{{memory_summary}}` |\n\n### Model selection\n\nMemory piggybacks on the model role system.\n\n| Phase | Role | Purpose |\n| ----------------------- | ------------------------------------------------------------------- | -------------------------------- |\n| Phase 1 (extraction) | `default` | Per-session knowledge extraction |\n| Phase 2 (consolidation) | `smol` (falls back to `default`, then current/first registry model) | Cross-session synthesis |\n\nIf the requested memory role is not configured, memory model resolution falls back to the `default` role, then the active session model, then the first model in the registry.\n\n## Configuration\n\n| Setting | Default | Description |\n| ------------------------------------- | ------- | --------------------------------------------------------- |\n| `memories.enabled` | `false` | Master switch |\n| `memories.maxRolloutAgeDays` | `30` | Sessions older than this are not processed |\n| `memories.minRolloutIdleHours` | `12` | Sessions active more recently than this are skipped |\n| `memories.maxRolloutsPerStartup` | `64` | Cap on sessions processed in a single startup |\n| `memories.summaryInjectionTokenLimit` | `5000` | Max tokens of the summary injected into the system prompt |\n\nAdditional tuning knobs (concurrency, lease durations, token budgets) are available in config for advanced use.\n\n## Key files\n\n- `packages/coding-agent/src/memories/index.ts` — pipeline orchestration, injection, slash command handling\n- `packages/coding-agent/src/memories/storage.ts` — SQLite-backed job queue and thread registry\n- `packages/coding-agent/src/prompts/memories/` — memory prompt templates\n- `packages/coding-agent/src/internal-urls/memory-protocol.ts` — legacy non-public `memory://` compatibility handler\n",
23
- "models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects initial/smol/slow models\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — API key + OAuth resolution order\n- `packages/ai/src/models.ts` and `packages/ai/src/types.ts` — built-in providers/models and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.gjc/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n <provider-id>:\n # provider-level config\nequivalence:\n overrides:\n <provider-id>/<model-id>: <canonical-model-id>\n exclude:\n - <provider-id>/<model-id>\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n`equivalence` is optional and configures canonical model grouping on top of concrete provider models:\n\n- `overrides` maps an exact concrete selector (`provider/modelId`) to an official upstream canonical id\n- `exclude` opts a concrete selector out of canonical grouping\n\n## Provider-level fields\n\n```yaml\nproviders:\n my-provider:\n baseUrl: https://api.example.com/v1\n apiKey: MY_PROVIDER_API_KEY\n api: openai-completions\n headers:\n X-Team: platform\n authHeader: true\n auth: apiKey\n disableStrictTools: false # set true for Anthropic-compatible endpoints that reject the strict field\n cacheRetention: short # none | short | long; model entries and modelOverrides can override this\n discovery:\n type: ollama\n modelOverrides:\n some-model-id:\n name: Renamed model\n cacheRetention: long\n models:\n - id: some-model-id\n name: Some Model\n api: openai-completions\n reasoning: false\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 128000\n maxTokens: 16384\n headers:\n X-Model: value\n cacheRetention: none\n thinking:\n minLevel: low\n maxLevel: xhigh\n mode: effort\n defaultLevel: high\n levels: [low, medium, high, xhigh]\n compat:\n supportsStore: true\n supportsDeveloperRole: true\n supportsReasoningEffort: true\n maxTokensField: max_completion_tokens\n openRouterRouting:\n only: [anthropic]\n vercelGatewayRouting:\n order: [anthropic, openai]\n extraBody:\n gateway: m1-01\n controller: mlx\nmodelBindings:\n modelRoles:\n default: my-provider/some-model-id:high\n agentModelOverrides:\n executor: my-provider/some-model-id\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `bedrock-converse-stream`\n- `anthropic-messages`\n- `bedrock-converse-stream`\n- `google-generative-ai`\n- `google-vertex`\n- `google-gemini-cli`\n- `ollama-chat`\n- `cursor-agent`\n\n\n### First-class Azure OpenAI and Amazon Bedrock examples\n\nAzure OpenAI uses canonical OpenAI model IDs in GJC and resolves those IDs to Azure deployment names at request time. Set `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` to avoid assuming model id equals deployment name:\n\n```yaml\nproviders:\n azure-openai:\n baseUrl: https://my-resource.openai.azure.com/openai/v1\n apiKeyEnv: AZURE_OPENAI_API_KEY\n api: azure-openai-responses\n models:\n - id: gpt-4.1\n - id: o3\n```\n\n```sh\nexport AZURE_OPENAI_DEPLOYMENT_NAME_MAP='gpt-4.1=gpt-41-prod,o3=o3-reasoning-prod'\n```\n\nAmazon Bedrock uses the native `bedrock-converse-stream` transport and AWS credential chain auth. Do not put AWS access keys in `models.yml`; configure `AWS_REGION` / `AWS_PROFILE` or standard static AWS credential environment variables instead:\n\n```yaml\nproviders:\n amazon-bedrock:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com\n api: bedrock-converse-stream\n models:\n - id: us.anthropic.claude-opus-4-6-v1\n - id: anthropic.claude-3-5-sonnet-20241022-v2:0\n```\n\n### MiniMax and GLM custom provider examples\n\nFor common MiniMax and GLM/zAI setup, prefer the provider presets so the OpenAI-compatible API, base URL, env var, model id, and compatibility flags are written together:\n\n```sh\ngjc setup provider --preset minimax\ngjc setup provider --preset minimax-cn\ngjc setup provider --preset glm\n```\n\nThe same presets are available inside the TUI:\n\n```text\n/provider add --preset minimax\n/provider add --preset glm\n/provider add zai\n```\n\nPresets only write `models.yml` entries that reference documented environment variable names (`MINIMAX_CODE_API_KEY`, `MINIMAX_CODE_CN_API_KEY`, or `ZAI_API_KEY`); they do not store or validate real credentials. The GLM preset aliases (`glm`, `zai`, `z-ai`) write an OpenAI-compatible custom provider named `glm-proxy` and do not replace the first-class `zai` provider.\n\n## Model profiles (`--mpreset`)\n\nModel profiles are optional top-level `profiles:` entries in `~/.gjc/agent/models.yml`. A profile can require provider credentials before activation and can map one or more model roles; omitted roles inherit from the active defaults.\n\n```yaml\nprofiles:\n team-standard:\n required_providers: [openai, anthropic]\n model_mapping:\n default: openai/gpt-5.2\n executor: anthropic/claude-sonnet-4-6:medium\n architect: openai/o3:high\n planner: openai/o3:high\n critic: openai/o3:high\n```\n\n`model_mapping` keys are role names (`default`, `executor`, `architect`, `planner`, `critic`). Each role maps to exactly one model selector in the form `provider/modelId[:effort]`; comma-separated fallback chains are not supported in a single role value.\n`required_providers` is the aggregate set of providers required across the profile's mapped roles, not a per-role fallback chain.\n\nBuilt-in profiles are grouped by provider mix and tier:\n\n- `opencode-go-{eco,standard,pro}`\n- `codex-{eco,standard,pro}`\n- `opencode-go-codex-{eco,standard,pro}`\n\nThe `eco` tier favors cheaper/faster defaults, `standard` matches normal production defaults (`codex-standard` is the current OpenAI Codex default set), and `pro` raises reasoning for architect, critic, and planner roles. User-defined profiles override built-ins by exact profile name.\n\nUse `gjc --mpreset <name>` to activate a profile for the current session only. Activation hard-blocks when any provider listed in `required_providers` lacks credentials. Add `--default` to persist the selected profile as `modelProfile.default` in `config.yml`, so it applies at startup:\n\n```sh\ngjc --mpreset codex-standard\ngjc --mpreset opencode-go-pro --default\n```\n\nThe `/model` command shows a `Profiles` section above model selection. In `/login`, `Add custom provider` is the first option for configuring credentials needed by custom or profile-required providers.\n\nMiniMax's OpenAI-compatible endpoint rejects multiple system messages and emits thinking in `reasoning_content`, so pin the public-safe compatibility fields when hand-authoring a custom provider:\n\n```yaml\nproviders:\n minimax-custom:\n baseUrl: https://api.minimax.io/v1\n apiKeyEnv: MINIMAX_API_KEY\n api: openai-completions\n compat:\n supportsStore: false\n supportsDeveloperRole: false\n supportsReasoningEffort: false\n reasoningContentField: reasoning_content\n models:\n - id: MiniMax-M2.5\n```\n\nGLM via z.ai is available as the first-class `zai` provider. For a private GLM-compatible proxy, keep secrets in an env var and disable OpenAI-only request fields as needed:\n\n```yaml\nproviders:\n glm-proxy:\n baseUrl: https://api.z.ai/api/paas/v4\n apiKeyEnv: ZAI_API_KEY\n api: openai-completions\n compat:\n supportsDeveloperRole: false\n supportsReasoningEffort: false\n models:\n - id: glm-4.6\n```\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default), `none`, or `oauth`; for `models.yml` custom models, `oauth` is accepted by schema but does not waive the `apiKey` requirement\n- `models.yml` is strict: unknown provider/model keys fail validation before provider dispatch, so stale keys such as `requestTransform` or `wireModelId` only work where this document lists them.\n- `discovery.type`: `ollama`, `llama.cpp`, or `lm-studio`\n- `cacheRetention`: `none`, `short`, or `long`; request-time options win over model/modelOverride values, then provider values, then `GJC_CACHE_RETENTION`, then the runtime default. For OpenAI Responses, this controls `prompt_cache_retention` only; it does not disable `prompt_cache_key` when a stable session id exists.\n\n## OpenAI-compatible proxy configuration\n\nOpenAI-compatible proxy providers should use schema-supported provider keys first:\n\n```yaml\nproviders:\n proxy-provider:\n baseUrl: https://api.proxy.example/v1\n apiKeyEnv: PROXY_API_KEY\n api: openai-completions\n auth: apiKey\n headers:\n User-Agent: curl/8.7.1\n models:\n - id: local-gpt\n name: Local GPT\n reasoning: true\n input: [text]\n cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }\n contextWindow: 400000\n maxTokens: 128000\n```\n\nUse provider-level `headers` for proxy-required headers. Keep the provider `api` set to `openai-completions` when the proxy exposes Chat Completions-compatible `/v1/chat/completions` semantics. `auth: apiKey` sends the resolved token as bearer auth; use `auth: none` only for trusted local/no-auth endpoints.\n\n`requestTransform` and `wireModelId` remain supported for request-body shaping, but they are not needed for ordinary OpenAI-compatible proxies whose local model id is already the upstream wire id. Unknown config keys fail validation before a provider request is sent.\n\nWhen request shaping is needed:\n\n- `requestTransform.profile: openai-proxy` strips OpenAI SDK/Stainless telemetry headers at final fetch time and sets a generic GJC user agent.\n- `stripHeaders` replaces the preset strip list when provided.\n- `setHeaders` is applied after stripping; use `null` to remove a header.\n- `extraBody` is shallow-merged into the JSON request body after provider compatibility fields; core transport keys such as `model`, `messages`/`input`, `stream`, `tools`, and `tool_choice` are protected and ignored.\n- Model-level `requestTransform` overrides provider-level fields and shallow-merges `setHeaders`/`extraBody`.\n- `wireModelId` changes only the upstream request body model id; local selection still uses `provider/id`.\n\n### Layofflabs-style proxy example\n\n```yaml\nproviders:\n layofflabs:\n baseUrl: https://api.layofflabs.com/v1\n apiKeyEnv: OPENAI_API_KEY\n api: openai-completions\n auth: apiKey\n headers:\n User-Agent: curl/8.7.1\n models:\n - id: gpt-5.5\n name: GPT 5.5 via Layofflabs\n reasoning: true\n thinking:\n minLevel: low\n maxLevel: xhigh\n mode: effort\n defaultLevel: high\n levels: [low, medium, high, xhigh]\n input: [text]\n cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }\n contextWindow: 400000\n maxTokens: 128000\n\nmodelBindings:\n modelRoles:\n default: layofflabs/gpt-5.5:high\n agentModelOverrides:\n executor: layofflabs/gpt-5.5:high\n```\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `headers`\n- `compat`\n- `requestTransform`\n- `disableStrictTools`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`.\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n- unknown provider, model, override, and request-transform keys fail schema validation; remove stale keys instead of relying on them being ignored.\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@gajae-code/ai`.\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`, `requestTransform`, `disableStrictTools`, `cacheRetention`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n - same `provider + id` replaces existing\n - otherwise append\n6. Load cached/runtime-discovered models (Ollama, llama.cpp, LM Studio, plus built-in provider managers), then re-apply model overrides.\n\n### Provider-model cache and static fingerprint\n\nCached per-provider model lists are persisted in the model-cache SQLite\ndatabase (schema v3) with a `static_fingerprint` column that hashes the\nstatic catalog slice merged into the row. When `resolveProviderModels`\nskips the network fetch and the fingerprint of the in-memory static\ncatalog matches the cached one, the cached rows are returned verbatim —\nthe static + dynamic merge is bypassed entirely. The fingerprint is\nmemoized per process via a WeakMap keyed by the static-models array\nreference, so repeated cold-start calls do not re-hash.\n\n## Canonical model equivalence and coalescing\n\nThe registry keeps every concrete provider model and then builds a canonical layer above them.\n\nCanonical ids are official upstream ids only, for example:\n\n- `anthropic-model-opus-4-6`\n- `anthropic-model-haiku-4-5`\n- `gpt-5.3-openai-code`\n\n### `models.yml` equivalence config\n\nExample:\n\n```yaml\nproviders:\n zenmux:\n baseUrl: https://api.zenmux.example/v1\n apiKey: ZENMUX_API_KEY\n api: openai-codex-responses\n models:\n - id: openai-code\n name: Zenmux OpenAI code\n reasoning: true\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 200000\n maxTokens: 32768\n\nequivalence:\n overrides:\n zenmux/openai-code: gpt-5.3-openai-code\n p-openai-code/openai-code: gpt-5.3-openai-code\n exclude:\n - demo/openai-code-preview\n```\n\nBuild order for canonical grouping:\n\n1. exact user override from `equivalence.overrides`\n2. bundled official-id matches from built-in model metadata\n3. conservative heuristic normalization for gateway/provider variants\n4. fallback to the concrete model's own id\n\nCurrent heuristics are intentionally narrow:\n\n- embedded upstream prefixes can be stripped when present, for example `anthropic/...` or `openai/...`\n- dotted and dashed version variants can normalize only when they map to an existing official id, for example `4.6 -> 4-6`\n- ambiguous families or versions are not merged without a bundled match or explicit override\n\n### Canonical resolution behavior\n\nWhen multiple concrete variants share a canonical id, resolution uses:\n\n1. availability and auth\n2. `config.yml` `modelProviderOrder`\n3. existing registry/provider order if `modelProviderOrder` is unset\n\nDisabled or unauthenticated providers are skipped.\n\nSession state and transcripts continue to record the concrete provider/model that actually executed the turn.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `input`, `cost`, `contextWindow`, `maxTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`, `extraBody`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-responses`\n- base URL: `OLLAMA_BASE_URL` or `http://127.0.0.1:11434`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls Ollama endpoints and normalizes discovered OpenAI-compatible models to `openai-responses`.\n\n### Implicit llama.cpp discovery\n\nIf `llama.cpp` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `llama.cpp`\n- api: `openai-responses`\n- base URL: `LLAMA_CPP_BASE_URL` or `http://127.0.0.1:8080`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls llama.cpp model endpoints and synthesizes model entries with local defaults.\n\n### Implicit LM Studio discovery\n\nIf `lm-studio` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `lm-studio`\n- api: `openai-completions`\n- base URL: `LM_STUDIO_BASE_URL` or `http://127.0.0.1:1234/v1`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery fetches models (`GET /models`) and synthesizes model entries with local defaults.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n ollama:\n baseUrl: http://127.0.0.1:11434\n api: openai-responses\n auth: none\n discovery:\n type: ollama\n\n llama.cpp:\n baseUrl: http://127.0.0.1:8080\n api: openai-responses\n auth: none\n discovery:\n type: llama.cpp\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n### Broker mode\n\nWhen `GJC_AUTH_BROKER_URL` (or `auth.broker.url`) is set, the local SQLite credential store is replaced by `RemoteAuthCredentialStore`. Layers 2 and 3 above (stored API key / OAuth in `agent.db`) are served from a broker-supplied snapshot whose `refresh` tokens are redacted; expiry triggers `POST /v1/credential/:id/refresh` on the broker rather than a local refresh.\n\n`AuthStorage.setConfigApiKey` lets a `models.yml` `apiKey` win over a broker-resolved OAuth token without overriding a runtime `--api-key`. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full broker / gateway design and env surface (`GJC_AUTH_BROKER_URL`, `GJC_AUTH_BROKER_TOKEN`, `auth.broker.url`, `auth.broker.token`).\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact canonical model id\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\nResolution precedence for exact selectors:\n\n1. exact `provider/modelId` bypasses coalescing\n2. exact canonical id resolves through the canonical index\n3. exact bare concrete id still works\n4. fuzzy and glob matching run after the exact paths\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default`, `smol`, `slow`, `vision`, `plan`, `designer`, `commit`, `task`\n\nRole aliases like `pi/smol` expand through `settings.modelRoles`. Each role value can also append a thinking selector such as `:minimal`, `:low`, `:medium`, or `:high`.\n\nIf a role points at another role, the target model still inherits normally and any explicit suffix on the referring role wins for that role-specific use.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `modelProviderOrder` (global canonical-provider precedence)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI code provider transport)\n\n`modelRoles` may store either:\n\n- `provider/modelId` to pin a concrete provider variant\n- a canonical id such as `gpt-5.3-openai-code` to allow provider coalescing\n\nFor `enabledModels` and CLI `--models`:\n\n- exact canonical ids expand to all concrete variants in that canonical group\n- explicit `provider/modelId` entries stay exact\n- globs and fuzzy matches still operate on concrete models\n\nGlobal `enabledModels` and `disabledProviders` entries may also be scoped to a path prefix:\n\n```yaml\nenabledModels:\n - anthropic-model-sonnet-4-5\n - path: ~/work\n models:\n - anthropic/anthropic-model-opus-4-5\ndisabledProviders:\n - ollama\n - path: ~/private\n providers:\n - anthropic\n```\n\nString entries apply everywhere. Scoped entries apply when the current working directory is the configured path or one of its subdirectories. Use `path`, `paths`, `pathPrefix`, or `pathPrefixes`; use `models` for `enabledModels`, `providers` for `disabledProviders`, or `values` for either.\n\n## `/model` and `--list-models`\n\nBoth surfaces keep provider-prefixed models visible and selectable.\n\nThey now also expose canonical/coalesced models:\n\n- `/model` includes a canonical view alongside provider tabs\n- `--list-models` prints a canonical section plus the concrete provider rows\n\nSelecting a canonical entry stores the canonical selector. Selecting a provider row stores the explicit `provider/modelId`.\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI code provider websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for Spark -> non-Spark on the same provider:\n\n```yaml\nproviders:\n openai-code:\n modelOverrides:\n gpt-5.3-openai-code-spark:\n contextPromotionTarget: openai-code/gpt-5.3-openai-code\n```\n\nThe built-in model generator also assigns this automatically for `*-spark` models when a same-provider base model exists.\n\n## Compatibility and routing fields\n\nThe `compat` block on a provider or model overrides the URL-based auto-detection in `packages/ai/src/providers/openai-completions-compat.ts`. It is validated by `OpenAICompatSchema` in `packages/coding-agent/src/config/model-registry.ts` and consumed by every `openai-completions` transport (`packages/ai/src/providers/openai-completions.ts`). The canonical type is `OpenAICompat` in `packages/ai/src/types.ts`.\n\n`models.yml` accepts the following keys (all optional; unset falls back to URL detection):\n\nRequest shaping:\n\n- `supportsStore` — emit `store: false` on requests. Default: auto (off for non-standard endpoints).\n- `supportsDeveloperRole` — use the `developer` system role for reasoning models instead of `system`. Default: auto.\n- `supportsUsageInStreaming` — send `stream_options: { include_usage: true }` to receive token usage on streaming responses. Default: `true`.\n- `maxTokensField` — `\"max_completion_tokens\"` or `\"max_tokens\"`. Default: auto.\n- `supportsToolChoice` — emit the `tool_choice` parameter when the caller forces a specific tool. Default: `true`. Set `false` for endpoints that 400 on `tool_choice` (e.g. DeepSeek when reasoning is on).\n- `disableReasoningOnForcedToolChoice` — drop `reasoning_effort` / OpenRouter `reasoning` whenever `tool_choice` forces a call. Default: auto (Kimi/Anthropic-fronted endpoints).\n- `extraBody` — extra top-level fields merged into every request body (gateway hints, controller selectors, etc.).\n\nReasoning / thinking:\n\n- `supportsReasoningEffort` — accept `reasoning_effort`. Default: auto (off for Grok and zAI).\n- `reasoningEffortMap` — partial map from internal effort levels (`minimal|low|medium|high|xhigh`) to provider-specific strings (e.g. DeepSeek maps `xhigh -> \"max\"`).\n- `thinkingFormat` — request shape for thinking: `\"openai\"` (`reasoning_effort`), `\"openrouter\"` (`reasoning: { effort }`), `\"zai\"` (`thinking: { type: \"enabled\" }`), `\"qwen\"` (top-level `enable_thinking`), or `\"qwen-chat-template\"` (`chat_template_kwargs.enable_thinking`). Default: `\"openai\"`.\n- `reasoningContentField` — assistant field carrying chain-of-thought: `\"reasoning_content\"`, `\"reasoning\"`, or `\"reasoning_text\"`. Default: auto.\n- `requiresReasoningContentForToolCalls` — assistant tool-call turns must round-trip the reasoning field (DeepSeek-R1, Kimi, OpenRouter when reasoning is on). Default: `false`.\n- `requiresAssistantContentForToolCalls` — assistant tool-call turns must include non-empty text content (Kimi). Default: `false`.\n\nTool / message normalization:\n\n- `requiresToolResultName` — tool-result messages need a `name` field (Mistral). Default: auto.\n- `requiresAssistantAfterToolResult` — a user message after a tool result needs an assistant turn in between. Default: auto.\n- `requiresThinkingAsText` — convert thinking blocks to text wrapped in `<thinking>` delimiters (Mistral). Default: auto.\n- `requiresMistralToolIds` — normalize tool-call ids to exactly 9 alphanumeric chars. Default: auto.\n- `supportsStrictMode` — accept the per-tool `strict` field on tool schemas. Default: conservative auto-detect per provider/baseUrl.\n- `toolStrictMode` — `\"all_strict\"` forces strict on every tool, `\"none\"` forces it off; unset keeps the existing per-tool mixed behavior.\n\nGateway routing (only applied when `baseUrl` matches the gateway):\n\n- `openRouterRouting.only` / `openRouterRouting.order` — provider routing on `openrouter.ai` (see <https://openrouter.ai/docs/provider-routing>).\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order` — provider routing on `ai-gateway.vercel.sh` (see <https://vercel.com/docs/ai-gateway/models-and-providers/provider-options>).\n\nProvider-level `compat` is the baseline; per-model `compat` is deep-merged on top, with `openRouterRouting`, `vercelGatewayRouting`, and `extraBody` merged as nested objects.\n\n### Anthropic compatibility (`anthropic-messages`)\n\nFor `anthropic-messages` models the runtime uses a separate `AnthropicCompat` shape (`packages/ai/src/types.ts`). The `models.yml` schema currently exposes only the strict-tools opt-out as a top-level provider field (see below); the remaining Anthropic-side knobs (`disableAdaptiveThinking`, `supportsEagerToolInputStreaming`, `supportsLongCacheRetention`) are set by built-in catalog metadata and are not user-configurable from `models.yml`.\n\n### Strict tool schemas (`disableStrictTools`)\n\nAnthropic's API supports a `strict` field on tool definitions that forces the model to always follow the provided schema exactly. This is enabled by default for all `anthropic-messages` providers because it guarantees schema conformance in agentic systems.\n\nThird-party providers that front the Anthropic API (AWS Bedrock, Azure, self-hosted proxies) do not always implement this field and will reject requests that include it. Set `disableStrictTools: true` at the provider level to opt out:\n\n```yaml\nproviders:\n bedrock-anthropic:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com/anthropic\n apiKey: AWS_BEARER_TOKEN\n api: anthropic-messages\n disableStrictTools: true\n models:\n - id: anthropic-model-sonnet-4-20250514\n name: Anthropic model Sonnet 4 (Bedrock)\n input: [text, image]\n contextWindow: 200000\n maxTokens: 16384\n cost:\n input: 3.00\n output: 15.00\n cacheRead: 0.30\n cacheWrite: 3.75\n```\n\n`disableStrictTools` is a provider-level flag that applies to all models in the provider.\n\nTool schemas going on the wire are normalized by the unified flow in\n`packages/ai/src/utils/schema/normalize.ts` (Google/CCA/MCP dispatchers\nplus the OpenAI strict-mode sanitize+enforce pipeline). See\n[`ai-schema-normalize.md`](./ai-schema-normalize.md) for the strict-mode\nedge cases (local `$ref` inlining, single-item `allOf` collapse,\n`anyOf`-wrapper description hoist, enum/const primitive-type inference)\nand the per-provider dispatcher mapping.\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n local-openai:\n baseUrl: http://127.0.0.1:8000/v1\n auth: none\n api: openai-completions\n models:\n - id: Qwen/Qwen2.5-Coder-32B-Instruct\n name: Qwen 2.5 Coder 32B (local)\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n anthropic-proxy:\n baseUrl: https://proxy.example.com/anthropic\n apiKey: ANTHROPIC_PROXY_API_KEY\n api: anthropic-messages\n authHeader: true\n disableStrictTools: true # if the proxy doesn't support strict tool schemas\n models:\n - id: anthropic-model-sonnet-4-20250514\n name: Anthropic model Sonnet 4 (Proxy)\n reasoning: true\n input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n openrouter:\n baseUrl: https://my-proxy.example.com/v1\n headers:\n X-Team: platform\n modelOverrides:\n anthropic/anthropic-model-sonnet-4:\n name: Sonnet 4 (Corp)\n compat:\n openRouterRouting:\n only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`. Explicit `.json` / `.jsonc` paths remain supported only when passed programmatically to `ModelRegistry`; the default user config is `~/.gjc/agent/models.yml`.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
23
+ "models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects initial/smol/slow models\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — API key + OAuth resolution order\n- `packages/ai/src/models.ts` and `packages/ai/src/types.ts` — built-in providers/models and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.gjc/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n <provider-id>:\n # provider-level config\nequivalence:\n overrides:\n <provider-id>/<model-id>: <canonical-model-id>\n exclude:\n - <provider-id>/<model-id>\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n`equivalence` is optional and configures canonical model grouping on top of concrete provider models:\n\n- `overrides` maps an exact concrete selector (`provider/modelId`) to an official upstream canonical id\n- `exclude` opts a concrete selector out of canonical grouping\n\n## Provider-level fields\n\n```yaml\nproviders:\n my-provider:\n baseUrl: https://api.example.com/v1\n apiKey: MY_PROVIDER_API_KEY\n api: openai-completions\n headers:\n X-Team: platform\n authHeader: true\n auth: apiKey\n disableStrictTools: false # set true for Anthropic-compatible endpoints that reject the strict field\n cacheRetention: short # none | short | long; model entries and modelOverrides can override this\n discovery:\n type: ollama\n modelOverrides:\n some-model-id:\n name: Renamed model\n cacheRetention: long\n models:\n - id: some-model-id\n name: Some Model\n api: openai-completions\n reasoning: false\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 128000\n maxTokens: 16384\n headers:\n X-Model: value\n cacheRetention: none\n thinking:\n minLevel: low\n maxLevel: xhigh\n mode: effort\n defaultLevel: high\n levels: [low, medium, high, xhigh]\n compat:\n supportsStore: true\n supportsDeveloperRole: true\n supportsReasoningEffort: true\n maxTokensField: max_completion_tokens\n openRouterRouting:\n only: [anthropic]\n vercelGatewayRouting:\n order: [anthropic, openai]\n extraBody:\n gateway: m1-01\n controller: mlx\nmodelBindings:\n modelRoles:\n default: my-provider/some-model-id:high\n agentModelOverrides:\n executor: my-provider/some-model-id\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `bedrock-converse-stream`\n- `anthropic-messages`\n- `bedrock-converse-stream`\n- `google-generative-ai`\n- `google-vertex`\n- `google-gemini-cli`\n- `ollama-chat`\n- `cursor-agent`\n\n\n### First-class Azure OpenAI and Amazon Bedrock examples\n\nAzure OpenAI uses canonical OpenAI model IDs in GJC and resolves those IDs to Azure deployment names at request time. Set `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` to avoid assuming model id equals deployment name:\n\n```yaml\nproviders:\n azure-openai:\n baseUrl: https://my-resource.openai.azure.com/openai/v1\n apiKeyEnv: AZURE_OPENAI_API_KEY\n api: azure-openai-responses\n models:\n - id: gpt-4.1\n - id: o3\n```\n\n```sh\nexport AZURE_OPENAI_DEPLOYMENT_NAME_MAP='gpt-4.1=gpt-41-prod,o3=o3-reasoning-prod'\n```\n\nAmazon Bedrock uses the native `bedrock-converse-stream` transport and AWS credential chain auth. Do not put AWS access keys in `models.yml`; configure `AWS_REGION` / `AWS_PROFILE` or standard static AWS credential environment variables instead:\n\n```yaml\nproviders:\n amazon-bedrock:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com\n api: bedrock-converse-stream\n models:\n - id: us.anthropic.claude-opus-4-6-v1\n - id: anthropic.claude-3-5-sonnet-20241022-v2:0\n```\n\n### MiniMax and GLM custom provider examples\n\nFor common MiniMax and GLM/zAI setup, prefer the provider presets so the OpenAI-compatible API, base URL, env var, model id, and compatibility flags are written together:\n\n```sh\ngjc setup provider --preset minimax\ngjc setup provider --preset minimax-cn\ngjc setup provider --preset glm\n```\n\nThe same presets are available inside the TUI:\n\n```text\n/provider add --preset minimax\n/provider add --preset glm\n/provider add zai\n```\n\nPresets only write `models.yml` entries that reference documented environment variable names (`MINIMAX_CODE_API_KEY`, `MINIMAX_CODE_CN_API_KEY`, or `ZAI_API_KEY`); they do not store or validate real credentials. The GLM preset aliases (`glm`, `zai`, `z-ai`) write an OpenAI-compatible custom provider named `glm-proxy` and do not replace the first-class `zai` provider.\n\n## Model profiles (`--mpreset`)\n\nModel profiles are optional top-level `profiles:` entries in `~/.gjc/agent/models.yml`. A profile can require provider credentials before activation and can map one or more model roles; omitted roles inherit from the active defaults.\n\n```yaml\nprofiles:\n team-standard:\n required_providers: [openai, anthropic]\n model_mapping:\n default: openai/gpt-5.2\n executor: anthropic/claude-sonnet-4-6:medium\n architect: openai/o3:high\n planner: openai/o3:high\n critic: openai/o3:high\n```\n\n`model_mapping` keys are role names (`default`, `executor`, `architect`, `planner`, `critic`). Each role maps to exactly one model selector in the form `provider/modelId[:effort]`; comma-separated fallback chains are not supported in a single role value.\n`required_providers` is the aggregate set of providers required across the profile's mapped roles, not a per-role fallback chain.\n\nBuilt-in profiles are grouped by provider mix and tier:\n\n- `opencode-go-{eco,standard,pro}`\n- `codex-{eco,standard,pro}`\n- `opencode-go-codex-{eco,standard,pro}`\n\nThe `eco` tier favors cheaper/faster defaults, `standard` matches normal production defaults (`codex-standard` is the current OpenAI Codex default set), and `pro` raises reasoning for architect, critic, and planner roles. User-defined profiles override built-ins by exact profile name.\n\nUse `gjc --mpreset <name>` to activate a profile for the current session only. Activation hard-blocks when any provider listed in `required_providers` lacks credentials. Add `--default` to persist the selected profile as `modelProfile.default` in `config.yml`, so it applies at startup:\n\n```sh\ngjc --mpreset codex-standard\ngjc --mpreset opencode-go-pro --default\n```\n\nThe `/model` command shows a `Profiles` section above model selection. In `/login`, `Add custom provider` is the first option for configuring credentials needed by custom or profile-required providers.\n\nMiniMax's OpenAI-compatible endpoint rejects multiple system messages and emits thinking in `reasoning_content`, so pin the public-safe compatibility fields when hand-authoring a custom provider:\n\n```yaml\nproviders:\n minimax-custom:\n baseUrl: https://api.minimax.io/v1\n apiKeyEnv: MINIMAX_API_KEY\n api: openai-completions\n compat:\n supportsStore: false\n supportsDeveloperRole: false\n supportsReasoningEffort: false\n reasoningContentField: reasoning_content\n models:\n - id: MiniMax-M2.5\n```\n\nGLM via z.ai is available as the first-class `zai` provider. For a private GLM-compatible proxy, keep secrets in an env var and disable OpenAI-only request fields as needed:\n\n```yaml\nproviders:\n glm-proxy:\n baseUrl: https://api.z.ai/api/paas/v4\n apiKeyEnv: ZAI_API_KEY\n api: openai-completions\n compat:\n supportsDeveloperRole: false\n supportsReasoningEffort: false\n models:\n - id: glm-4.6\n```\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default), `none`, or `oauth`; for `models.yml` custom models, `oauth` is accepted by schema but does not waive the `apiKey` requirement\n- `models.yml` is strict: unknown provider/model keys fail validation before provider dispatch, so stale keys such as `requestTransform` or `wireModelId` only work where this document lists them.\n- `discovery.type`: `ollama`, `llama.cpp`, or `lm-studio`\n- `cacheRetention`: `none`, `short`, or `long`; request-time options win over model/modelOverride values, then provider values, then `GJC_CACHE_RETENTION`, then the runtime default. For OpenAI Responses, this controls `prompt_cache_retention` only; it does not disable `prompt_cache_key` when a stable session id exists.\n\n## OpenAI-compatible proxy configuration\n\nOpenAI-compatible proxy providers should use schema-supported provider keys first:\n\n```yaml\nproviders:\n proxy-provider:\n baseUrl: https://api.proxy.example/v1\n apiKeyEnv: PROXY_API_KEY\n api: openai-completions\n auth: apiKey\n headers:\n User-Agent: curl/8.7.1\n models:\n - id: local-gpt\n name: Local GPT\n reasoning: true\n input: [text]\n cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }\n contextWindow: 400000\n maxTokens: 128000\n```\n\nUse provider-level `headers` for proxy-required headers. Keep the provider `api` set to `openai-completions` when the proxy exposes Chat Completions-compatible `/v1/chat/completions` semantics. `auth: apiKey` sends the resolved token as bearer auth; use `auth: none` only for trusted local/no-auth endpoints.\n\n`requestTransform` and `wireModelId` remain supported for request-body shaping, but they are not needed for ordinary OpenAI-compatible proxies whose local model id is already the upstream wire id. Unknown config keys fail validation before a provider request is sent.\n\nWhen request shaping is needed:\n\n- `requestTransform.profile: openai-proxy` strips OpenAI SDK/Stainless telemetry headers at final fetch time and sets a generic GJC user agent.\n- `stripHeaders` replaces the preset strip list when provided.\n- `setHeaders` is applied after stripping; use `null` to remove a header.\n- `extraBody` is shallow-merged into the JSON request body after provider compatibility fields; core transport keys such as `model`, `messages`/`input`, `stream`, `tools`, and `tool_choice` are protected and ignored.\n- Model-level `requestTransform` overrides provider-level fields and shallow-merges `setHeaders`/`extraBody`.\n- `wireModelId` changes only the upstream request body model id; local selection still uses `provider/id`.\n\n### Layofflabs-style proxy example\n\n```yaml\nproviders:\n layofflabs:\n baseUrl: https://api.layofflabs.com/v1\n apiKeyEnv: OPENAI_API_KEY\n api: openai-completions\n auth: apiKey\n headers:\n User-Agent: curl/8.7.1\n models:\n - id: gpt-5.5\n name: GPT 5.5 via Layofflabs\n reasoning: true\n thinking:\n minLevel: low\n maxLevel: xhigh\n mode: effort\n defaultLevel: high\n levels: [low, medium, high, xhigh]\n input: [text]\n cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }\n contextWindow: 400000\n maxTokens: 128000\n\nmodelBindings:\n modelRoles:\n default: layofflabs/gpt-5.5:high\n agentModelOverrides:\n executor: layofflabs/gpt-5.5:high\n```\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `headers`\n- `compat`\n- `requestTransform`\n- `disableStrictTools`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`.\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n- unknown provider, model, override, and request-transform keys fail schema validation; remove stale keys instead of relying on them being ignored.\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@gajae-code/ai`.\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`, `requestTransform`, `disableStrictTools`, `cacheRetention`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n - same `provider + id` replaces existing\n - otherwise append\n6. Load cached/runtime-discovered models (Ollama, llama.cpp, LM Studio, plus built-in provider managers), then re-apply model overrides.\n\n### Provider-model cache and static fingerprint\n\nCached per-provider model lists are persisted in the model-cache SQLite\ndatabase (schema v3) with a `static_fingerprint` column that hashes the\nstatic catalog slice merged into the row. When `resolveProviderModels`\nskips the network fetch and the fingerprint of the in-memory static\ncatalog matches the cached one, the cached rows are returned verbatim —\nthe static + dynamic merge is bypassed entirely. The fingerprint is\nmemoized per process via a WeakMap keyed by the static-models array\nreference, so repeated cold-start calls do not re-hash.\n\n## Canonical model equivalence and coalescing\n\nThe registry keeps every concrete provider model and then builds a canonical layer above them.\n\nCanonical ids are official upstream ids only, for example:\n\n- `anthropic-model-opus-4-6`\n- `anthropic-model-haiku-4-5`\n- `gpt-5.3-openai-code`\n\n### `models.yml` equivalence config\n\nExample:\n\n```yaml\nproviders:\n zenmux:\n baseUrl: https://api.zenmux.example/v1\n apiKey: ZENMUX_API_KEY\n api: openai-codex-responses\n models:\n - id: openai-code\n name: Zenmux OpenAI code\n reasoning: true\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 200000\n maxTokens: 32768\n\nequivalence:\n overrides:\n zenmux/openai-code: gpt-5.3-openai-code\n p-openai-code/openai-code: gpt-5.3-openai-code\n exclude:\n - demo/openai-code-preview\n```\n\nBuild order for canonical grouping:\n\n1. exact user override from `equivalence.overrides`\n2. bundled official-id matches from built-in model metadata\n3. conservative heuristic normalization for gateway/provider variants\n4. fallback to the concrete model's own id\n\nCurrent heuristics are intentionally narrow:\n\n- embedded upstream prefixes can be stripped when present, for example `anthropic/...` or `openai/...`\n- dotted and dashed version variants can normalize only when they map to an existing official id, for example `4.6 -> 4-6`\n- ambiguous families or versions are not merged without a bundled match or explicit override\n\n### Canonical resolution behavior\n\nWhen multiple concrete variants share a canonical id, resolution uses:\n\n1. availability and auth\n2. `config.yml` `modelProviderOrder`\n3. existing registry/provider order if `modelProviderOrder` is unset\n\nDisabled or unauthenticated providers are skipped.\n\nSession state and transcripts continue to record the concrete provider/model that actually executed the turn.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `input`, `cost`, `contextWindow`, `maxTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`, `extraBody`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-responses`\n- base URL: `OLLAMA_BASE_URL` or `http://127.0.0.1:11434`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls Ollama endpoints and normalizes discovered OpenAI-compatible models to `openai-responses`.\n\n### Implicit llama.cpp discovery\n\nIf `llama.cpp` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `llama.cpp`\n- api: `openai-responses`\n- base URL: `LLAMA_CPP_BASE_URL` or `http://127.0.0.1:8080`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls llama.cpp model endpoints and synthesizes model entries with local defaults.\n\n### Implicit LM Studio discovery\n\nIf `lm-studio` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `lm-studio`\n- api: `openai-completions`\n- base URL: `LM_STUDIO_BASE_URL` or `http://127.0.0.1:1234/v1`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery fetches models (`GET /models`) and synthesizes model entries with local defaults.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n ollama:\n baseUrl: http://127.0.0.1:11434\n api: openai-responses\n auth: none\n discovery:\n type: ollama\n\n llama.cpp:\n baseUrl: http://127.0.0.1:8080\n api: openai-responses\n auth: none\n discovery:\n type: llama.cpp\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n### Broker mode\n\nWhen `GJC_AUTH_BROKER_URL` (or `auth.broker.url`) is set, the local SQLite credential store is replaced by `RemoteAuthCredentialStore`. Layers 2 and 3 above (stored API key / OAuth in `agent.db`) are served from a broker-supplied snapshot whose `refresh` tokens are redacted; expiry triggers `POST /v1/credential/:id/refresh` on the broker rather than a local refresh.\n\n`AuthStorage.setConfigApiKey` lets a `models.yml` `apiKey` win over a broker-resolved OAuth token without overriding a runtime `--api-key`. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full broker / gateway design and env surface (`GJC_AUTH_BROKER_URL`, `GJC_AUTH_BROKER_TOKEN`, `auth.broker.url`, `auth.broker.token`).\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact canonical model id\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\nResolution precedence for exact selectors:\n\n1. exact `provider/modelId` bypasses coalescing\n2. exact canonical id resolves through the canonical index\n3. exact bare concrete id still works\n4. fuzzy and glob matching run after the exact paths\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default`, `smol`, `slow`, `vision`, `plan`, `designer`, `commit`, `task`\n\nRole aliases like `pi/smol` expand through `settings.modelRoles`. Each role value can also append a thinking selector such as `:minimal`, `:low`, `:medium`, or `:high`.\n\nIf a role points at another role, the target model still inherits normally and any explicit suffix on the referring role wins for that role-specific use.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `modelProviderOrder` (global canonical-provider precedence)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI code provider transport)\n\n`modelRoles` may store either:\n\n- `provider/modelId` to pin a concrete provider variant\n- a canonical id such as `gpt-5.3-openai-code` to allow provider coalescing\n\nFor `enabledModels` and CLI `--models`:\n\n- exact canonical ids expand to all concrete variants in that canonical group\n- explicit `provider/modelId` entries stay exact\n- globs and fuzzy matches still operate on concrete models\n\nGlobal `enabledModels` and `disabledProviders` entries may also be scoped to a path prefix:\n\n```yaml\nenabledModels:\n - anthropic-model-sonnet-4-5\n - path: ~/work\n models:\n - anthropic/anthropic-model-opus-4-5\ndisabledProviders:\n - ollama\n - path: ~/private\n providers:\n - anthropic\n```\n\nString entries apply everywhere. Scoped entries apply when the current working directory is the configured path or one of its subdirectories. Use `path`, `paths`, `pathPrefix`, or `pathPrefixes`; use `models` for `enabledModels`, `providers` for `disabledProviders`, or `values` for either.\n\n## `/model` and `--list-models`\n\nBoth surfaces keep provider-prefixed models visible and selectable.\n\nThey now also expose canonical/coalesced models:\n\n- `/model` includes a canonical view alongside provider tabs\n- `--list-models` prints a canonical section plus the concrete provider rows\n\nSelecting a canonical entry stores the canonical selector. Selecting a provider row stores the explicit `provider/modelId`.\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error. It is **off by default** (`contextPromotion.enabled` is `false`); opt in to enable it.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI code provider websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for Spark -> non-Spark on the same provider:\n\n```yaml\nproviders:\n openai-code:\n modelOverrides:\n gpt-5.3-openai-code-spark:\n contextPromotionTarget: openai-code/gpt-5.3-openai-code\n```\n\nThe built-in model generator also assigns this automatically for `*-spark` models when a same-provider base model exists.\n\n## Compatibility and routing fields\n\nThe `compat` block on a provider or model overrides the URL-based auto-detection in `packages/ai/src/providers/openai-completions-compat.ts`. It is validated by `OpenAICompatSchema` in `packages/coding-agent/src/config/model-registry.ts` and consumed by every `openai-completions` transport (`packages/ai/src/providers/openai-completions.ts`). The canonical type is `OpenAICompat` in `packages/ai/src/types.ts`.\n\n`models.yml` accepts the following keys (all optional; unset falls back to URL detection):\n\nRequest shaping:\n\n- `supportsStore` — emit `store: false` on requests. Default: auto (off for non-standard endpoints).\n- `supportsDeveloperRole` — use the `developer` system role for reasoning models instead of `system`. Default: auto.\n- `supportsUsageInStreaming` — send `stream_options: { include_usage: true }` to receive token usage on streaming responses. Default: `true`.\n- `maxTokensField` — `\"max_completion_tokens\"` or `\"max_tokens\"`. Default: auto.\n- `supportsToolChoice` — emit the `tool_choice` parameter when the caller forces a specific tool. Default: `true`. Set `false` for endpoints that 400 on `tool_choice` (e.g. DeepSeek when reasoning is on).\n- `disableReasoningOnForcedToolChoice` — drop `reasoning_effort` / OpenRouter `reasoning` whenever `tool_choice` forces a call. Default: auto (Kimi/Anthropic-fronted endpoints).\n- `extraBody` — extra top-level fields merged into every request body (gateway hints, controller selectors, etc.).\n\nReasoning / thinking:\n\n- `supportsReasoningEffort` — accept `reasoning_effort`. Default: auto (off for Grok and zAI).\n- `reasoningEffortMap` — partial map from internal effort levels (`minimal|low|medium|high|xhigh`) to provider-specific strings (e.g. DeepSeek maps `xhigh -> \"max\"`).\n- `thinkingFormat` — request shape for thinking: `\"openai\"` (`reasoning_effort`), `\"openrouter\"` (`reasoning: { effort }`), `\"zai\"` (`thinking: { type: \"enabled\" }`), `\"qwen\"` (top-level `enable_thinking`), or `\"qwen-chat-template\"` (`chat_template_kwargs.enable_thinking`). Default: `\"openai\"`.\n- `reasoningContentField` — assistant field carrying chain-of-thought: `\"reasoning_content\"`, `\"reasoning\"`, or `\"reasoning_text\"`. Default: auto.\n- `requiresReasoningContentForToolCalls` — assistant tool-call turns must round-trip the reasoning field (DeepSeek-R1, Kimi, OpenRouter when reasoning is on). Default: `false`.\n- `requiresAssistantContentForToolCalls` — assistant tool-call turns must include non-empty text content (Kimi). Default: `false`.\n\nTool / message normalization:\n\n- `requiresToolResultName` — tool-result messages need a `name` field (Mistral). Default: auto.\n- `requiresAssistantAfterToolResult` — a user message after a tool result needs an assistant turn in between. Default: auto.\n- `requiresThinkingAsText` — convert thinking blocks to text wrapped in `<thinking>` delimiters (Mistral). Default: auto.\n- `requiresMistralToolIds` — normalize tool-call ids to exactly 9 alphanumeric chars. Default: auto.\n- `supportsStrictMode` — accept the per-tool `strict` field on tool schemas. Default: conservative auto-detect per provider/baseUrl.\n- `toolStrictMode` — `\"all_strict\"` forces strict on every tool, `\"none\"` forces it off; unset keeps the existing per-tool mixed behavior.\n\nGateway routing (only applied when `baseUrl` matches the gateway):\n\n- `openRouterRouting.only` / `openRouterRouting.order` — provider routing on `openrouter.ai` (see <https://openrouter.ai/docs/provider-routing>).\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order` — provider routing on `ai-gateway.vercel.sh` (see <https://vercel.com/docs/ai-gateway/models-and-providers/provider-options>).\n\nProvider-level `compat` is the baseline; per-model `compat` is deep-merged on top, with `openRouterRouting`, `vercelGatewayRouting`, and `extraBody` merged as nested objects.\n\n### Anthropic compatibility (`anthropic-messages`)\n\nFor `anthropic-messages` models the runtime uses a separate `AnthropicCompat` shape (`packages/ai/src/types.ts`). The `models.yml` schema currently exposes only the strict-tools opt-out as a top-level provider field (see below); the remaining Anthropic-side knobs (`disableAdaptiveThinking`, `supportsEagerToolInputStreaming`, `supportsLongCacheRetention`) are set by built-in catalog metadata and are not user-configurable from `models.yml`.\n\n### Strict tool schemas (`disableStrictTools`)\n\nAnthropic's API supports a `strict` field on tool definitions that forces the model to always follow the provided schema exactly. This is enabled by default for all `anthropic-messages` providers because it guarantees schema conformance in agentic systems.\n\nThird-party providers that front the Anthropic API (AWS Bedrock, Azure, self-hosted proxies) do not always implement this field and will reject requests that include it. Set `disableStrictTools: true` at the provider level to opt out:\n\n```yaml\nproviders:\n bedrock-anthropic:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com/anthropic\n apiKey: AWS_BEARER_TOKEN\n api: anthropic-messages\n disableStrictTools: true\n models:\n - id: anthropic-model-sonnet-4-20250514\n name: Anthropic model Sonnet 4 (Bedrock)\n input: [text, image]\n contextWindow: 200000\n maxTokens: 16384\n cost:\n input: 3.00\n output: 15.00\n cacheRead: 0.30\n cacheWrite: 3.75\n```\n\n`disableStrictTools` is a provider-level flag that applies to all models in the provider.\n\nTool schemas going on the wire are normalized by the unified flow in\n`packages/ai/src/utils/schema/normalize.ts` (Google/CCA/MCP dispatchers\nplus the OpenAI strict-mode sanitize+enforce pipeline). See\n[`ai-schema-normalize.md`](./ai-schema-normalize.md) for the strict-mode\nedge cases (local `$ref` inlining, single-item `allOf` collapse,\n`anyOf`-wrapper description hoist, enum/const primitive-type inference)\nand the per-provider dispatcher mapping.\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n local-openai:\n baseUrl: http://127.0.0.1:8000/v1\n auth: none\n api: openai-completions\n models:\n - id: Qwen/Qwen2.5-Coder-32B-Instruct\n name: Qwen 2.5 Coder 32B (local)\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n anthropic-proxy:\n baseUrl: https://proxy.example.com/anthropic\n apiKey: ANTHROPIC_PROXY_API_KEY\n api: anthropic-messages\n authHeader: true\n disableStrictTools: true # if the proxy doesn't support strict tool schemas\n models:\n - id: anthropic-model-sonnet-4-20250514\n name: Anthropic model Sonnet 4 (Proxy)\n reasoning: true\n input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n openrouter:\n baseUrl: https://my-proxy.example.com/v1\n headers:\n X-Team: platform\n modelOverrides:\n anthropic/anthropic-model-sonnet-4:\n name: Sonnet 4 (Corp)\n compat:\n openRouterRouting:\n only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`. Explicit `.json` / `.jsonc` paths remain supported only when passed programmatically to `ModelRegistry`; the default user config is `~/.gjc/agent/models.yml`.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
24
24
  "natives-addon-loader-runtime.md": "# Natives Addon Loader Runtime\n\nThis document covers the runtime loader shipped by `@gajae-code/natives`: how `native/index.js` decides which `.node` file to require, how compiled-binary embedded payloads are extracted, and what startup failures report.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/native/embedded-addon.js`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/package.json`\n\n## Scope and responsibility\n\nThe loader is intentionally narrow:\n\n- Build a platform/CPU-aware candidate list for addon filenames and directories.\n- Treat an embedded-addon manifest as the authoritative compiled-binary signal when present.\n- Optionally materialize an embedded addon into a versioned per-user cache directory.\n- Attempt candidates in deterministic order and return the first addon that `require(...)` loads.\n\nThe current loader does **not** run a separate `validateNative(...)` export-presence gate. API shape is provided by the generated N-API binding file (`native/index.d.ts`) and the loaded addon itself. A stale binary therefore normally fails as a missing property or native load error rather than as a custom \"missing exports\" validation error.\n\n## Runtime inputs and derived state\n\nAt module initialization, `native/index.js` computes:\n\n- **Platform tag**: `${process.platform}-${process.arch}` (for example `darwin-arm64`).\n- **Package version**: from `packages/natives/package.json`.\n- **Core directories**:\n - `nativeDir`: package-local `packages/natives/native`.\n - `execDir`: directory containing `process.execPath`.\n - `versionedDir`: `<getNativesDir()>/<packageVersion>`.\n - `userDataDir` fallback:\n - Windows: `%LOCALAPPDATA%/gjc` or `%USERPROFILE%/AppData/Local/gjc`.\n - Non-Windows: `~/.local/bin`.\n- **Natives cache root** (`getNativesDir()`):\n - if `$XDG_DATA_HOME/gjc` exists, `$XDG_DATA_HOME/gjc/natives`;\n - otherwise `~/.gjc/natives`.\n- **Compiled-binary mode** (`detectCompiledBinary`): true if any of:\n - embedded-addon manifest is non-null,\n - `GJC_COMPILED` env var is set,\n - `import.meta.url` contains Bun embedded markers (`$bunfs`, `~BUN`, `%7EBUN`).\n- **Variant override**: `GJC_NATIVE_VARIANT` (`modern`/`baseline` only; invalid values ignored).\n- **Selected variant**: explicit override, otherwise runtime AVX2 detection on x64 (`modern` if AVX2, else `baseline`).\n\n## Platform support and tag resolution\n\n`SUPPORTED_PLATFORMS` is fixed to:\n\n- `linux-x64`\n- `linux-arm64`\n- `darwin-x64`\n- `darwin-arm64`\n- `win32-x64`\n\nUnsupported platforms are not rejected before probing. The loader first tries the computed candidate paths. If all fail and `platformTag` is unsupported, it throws an unsupported-platform error listing supported tags.\n\n## Variant selection (`modern` / `baseline` / default)\n\n### x64 behavior\n\n1. `GJC_NATIVE_VARIANT=modern|baseline` wins when valid.\n2. Otherwise AVX2 support is detected:\n - Linux: scan `/proc/cpuinfo` for `avx2`.\n - macOS: `sysctl -n machdep.cpu.leaf7_features`, then `machdep.cpu.features`.\n - Windows: PowerShell `[System.Runtime.Intrinsics.X86.Avx2]::IsSupported`.\n3. AVX2 selects `modern`; unavailable or undetectable AVX2 selects `baseline`.\n\n### Non-x64 behavior\n\nNo variant suffix is used; the filename is `pi_natives.<platform>-<arch>.node`.\n\n### Filename construction\n\n`loader-state.js#getAddonFilenames` returns:\n\n- Non-x64 or no variant: `pi_natives.<tag>.node`\n- x64 + `modern`:\n 1. `pi_natives.<tag>-modern.node`\n 2. `pi_natives.<tag>-baseline.node`\n 3. `pi_natives.<tag>.node`\n- x64 + `baseline`:\n 1. `pi_natives.<tag>-baseline.node`\n 2. `pi_natives.<tag>.node`\n\nThe default unsuffixed fallback remains part of the x64 candidate list.\n\n## Candidate path construction and fallback ordering\n\n`resolveLoaderCandidates(...)` expands every filename across directories, then de-duplicates while preserving first occurrence order.\n\n### Non-compiled runtime\n\nFor each filename, candidates are:\n\n1. `<nativeDir>/<filename>`\n2. `<execDir>/<filename>`\n\n### Compiled runtime\n\nFor each filename, candidates are:\n\n1. `<versionedDir>/<filename>`\n2. `<userDataDir>/<filename>`\n3. `<nativeDir>/<filename>`\n4. `<execDir>/<filename>`\n\nAt load time, an extracted embedded candidate, when produced, is prepended ahead of these de-duplicated candidates.\n\n## Embedded addon extraction lifecycle\n\n`embedded-addon.js` is generated by `scripts/embed-native.ts`. The reset stub exports `embeddedAddon = null`. A populated manifest has:\n\n- `platformTag`\n- `version`\n- `files[]` entries with `variant`, `filename`, and `filePath`\n\nExtraction (`maybeExtractEmbeddedAddon`) runs only when:\n\n1. compiled-binary mode is true,\n2. `embeddedAddon` is non-null,\n3. manifest `platformTag` equals the runtime platform tag,\n4. manifest `version` equals the package version,\n5. a variant-appropriate embedded file exists.\n\nVariant file selection:\n\n- Non-x64: prefer `default`, then first available file.\n- x64 + `modern`: prefer `modern`, fallback to `baseline`.\n- x64 + `baseline`: require `baseline`.\n\nMaterialization:\n\n1. Ensure `<versionedDir>` exists.\n2. Reuse `<versionedDir>/<selected filename>` if it already exists.\n3. Otherwise read `selectedEmbeddedFile.filePath` and write the target path.\n4. Return the target path as the first candidate.\n\nDirectory creation or write failures are appended to the loader error list; probing continues through normal candidates.\n\n## Lifecycle and state transitions\n\n```text\nInit\n -> Load package metadata and embedded-addon manifest\n -> Compute platform/version/variant/filenames/candidate paths\n -> (compiled + embedded manifest matches?)\n yes -> try extract to versionedDir (record errors, continue)\n no -> skip extraction\n -> For each runtime candidate in order:\n require(candidate)\n -> success: return addon exports (READY)\n -> failure: record error, continue\n -> none loaded:\n if unsupported platform tag -> throw Unsupported platform\n else -> throw Failed to load (tried-path diagnostics + hints)\n```\n\n## Failure behavior and diagnostics\n\n### Unsupported platform\n\nIf all candidates fail and `platformTag` is not supported, the loader throws:\n\n- `Unsupported platform: <tag>`\n- supported platform list\n- issue-reporting guidance\n\n### No loadable candidate\n\nIf the platform is supported but no candidate can be loaded, the final error includes:\n\n- `Failed to load pi_natives native addon for <platformTag>` or `<platformTag> (<variant>)`\n- every attempted path with the corresponding `require(...)` error\n- mode-specific remediation hints\n\n### Compiled-binary startup failures\n\nCompiled mode diagnostics include:\n\n- expected versioned cache target paths (`<versionedDir>/<filename>`),\n- remediation to delete the versioned cache and rerun,\n- direct release download `curl` commands for each expected filename.\n\n### Non-compiled startup failures\n\nNormal package/runtime diagnostics include:\n\n- reinstall hint (`bun install @gajae-code/natives`),\n- local rebuild command (`bun --cwd=packages/natives run build`),\n- optional x64 variant build hint (`TARGET_VARIANT=baseline|modern bun --cwd=packages/natives run build`).\n",
25
25
  "natives-architecture.md": "# Natives Architecture\n\n`@gajae-code/natives` is now a two-layer package around a loader:\n\n1. **CommonJS loader/package entrypoint** resolves and loads the correct `.node` addon and patches generated enum objects onto the export object.\n2. **Rust N-API module layer** implements the exported functions/classes and emits the generated TypeScript declarations.\n\nThis document is the foundation for deeper module-level docs.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/index.d.ts`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/native/embedded-addon.js`\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/scripts/gen-enums.ts`\n- `packages/natives/package.json`\n- `crates/pi-natives/src/lib.rs`\n\n## Package entrypoint and public surface\n\n`packages/natives/package.json` points directly at generated native bindings:\n\n- `main`: `./native/index.js`\n- `types`: `./native/index.d.ts`\n- `exports[\".\"].types`: `./native/index.d.ts`\n- `exports[\".\"].import`: `./native/index.js`\n\nThere is no current `packages/natives/src` TypeScript wrapper layer. Consumers import functions/classes/enums directly from `@gajae-code/natives`; the type contract is the generated `native/index.d.ts` plus enum exports appended by `scripts/gen-enums.ts`.\n\nCurrent capability groups in the generated API include:\n\n- **Search/text/code primitives**: `grep`, `search`, `hasMatch`, `fuzzyFind`, `glob`, `astGrep`, `astEdit`, text width/slicing/wrapping/sanitization, syntax highlighting, token counting.\n- **Execution/process/terminal primitives**: `executeShell`, `Shell`, `PtySession`, process-tree helpers, key parsing.\n- **System/media/conversion primitives**: clipboard, image resize/encode/SIXEL, HTML-to-Markdown, macOS appearance/power helpers, work profiling, Windows ProjFS overlay helpers.\n\n## Loader layer\n\n`packages/natives/native/index.js` owns runtime addon selection and optional embedded extraction.\n\n### Candidate resolution model\n\n- Platform tag is `${process.platform}-${process.arch}`.\n- Supported tags are currently:\n - `linux-x64`\n - `linux-arm64`\n - `darwin-x64`\n - `darwin-arm64`\n - `win32-x64`\n- x64 can use CPU variants:\n - `modern` (AVX2-capable)\n - `baseline` (fallback)\n- Non-x64 uses the default filename without a variant suffix.\n\nFilename strategy:\n\n- Default: `pi_natives.<platform>-<arch>.node`\n- x64 variant: `pi_natives.<platform>-<arch>-modern.node` or `...-baseline.node`\n- x64 runtime fallback includes the unsuffixed default filename after variant candidates.\n\n### Platform-specific variant detection\n\nFor x64, variant selection uses:\n\n- Linux: `/proc/cpuinfo`\n- macOS: `sysctl -n machdep.cpu.leaf7_features`, then `machdep.cpu.features`\n- Windows: PowerShell check for `System.Runtime.Intrinsics.X86.Avx2`\n\n`GJC_NATIVE_VARIANT` can force `modern` or `baseline`; invalid values are ignored.\n\n### Binary distribution and extraction model\n\n`packages/natives/package.json` publishes `native/`, which contains the loader, generated declarations, generated enum patch, embedded-addon manifest stub, and prebuilt `.node` artifacts.\n\nFor compiled binaries, loader behavior is:\n\n1. Check versioned user cache path: `<getNativesDir()>/<packageVersion>/...`.\n2. Check legacy compiled-binary location:\n - Windows: `%LOCALAPPDATA%/gjc` (fallback `%USERPROFILE%/AppData/Local/gjc`)\n - non-Windows: `~/.local/bin`\n3. Fall back to packaged `native/` and executable directory candidates.\n\n`getNativesDir()` uses `$XDG_DATA_HOME/gjc/natives` when `$XDG_DATA_HOME/gjc` exists; otherwise it uses `~/.gjc/natives`.\n\nIf a populated embedded addon manifest is present, it is also treated as a compiled-binary signal. The loader can extract the matching embedded `.node` into the versioned cache directory before candidate probing.\n\n### Failure modes\n\nLoader failures are explicit:\n\n- **Unsupported platform tag**: after failed probing, throws with supported platform list.\n- **No loadable candidate**: throws with all attempted paths and remediation hints.\n- **Embedded extraction errors**: directory/write failures are recorded and included in final load diagnostics if no candidate loads.\n\nThe current loader does not perform a separate post-`require` export validation pass.\n\n## Rust N-API module layer\n\n`crates/pi-natives/src/lib.rs` declares exported module ownership:\n\n- `appearance`\n- `ast`\n- `clipboard`\n- `fd`\n- `fs_cache`\n- `glob`\n- `glob_util`\n- `grep`\n- `highlight`\n- `html`\n- `image`\n- `keys`\n- `language`\n- `power`\n- `prof`\n- `projfs_overlay`\n- `ps`\n- `pty`\n- `shell`\n- `task`\n- `text`\n- `tokens`\n- `utils` (crate-private helpers)\n\nN-API exports are generated from Rust `#[napi]` functions/classes/objects/enums. Snake_case Rust names are exposed as camelCase JavaScript names unless explicitly configured by napi-rs.\n\n## Ownership boundaries\n\n- **Loader/package ownership (`packages/natives/native`, `packages/natives/scripts`)**\n - runtime binary selection\n - CPU variant selection and override handling\n - compiled-binary embedded extraction\n - generated TypeScript declarations and enum export patching\n- **Rust ownership (`crates/pi-natives/src`)**\n - algorithmic and system-level implementation\n - platform-native behavior and performance-sensitive logic\n - N-API symbol implementation consumed directly by package callers\n- **Consumer ownership (`packages/coding-agent`, `packages/tui`)**\n - user-facing policy and fallbacks that are not built into the native API\n - higher-level rendering, artifact, shell-session, and command behavior\n\n## Runtime flow (high level)\n\n1. Consumer imports from `@gajae-code/natives`.\n2. `native/index.js` computes platform/arch/variant and candidate paths.\n3. Optional embedded binary extraction occurs for compiled distributions.\n4. The first `require(candidate)` that succeeds becomes the exported addon object.\n5. Generated enum objects are appended to `module.exports`.\n6. Caller invokes generated N-API functions/classes directly.\n\n## Glossary\n\n- **Native addon**: A `.node` binary loaded via Node-API (N-API).\n- **Platform tag**: Runtime tuple `platform-arch` (for example `darwin-arm64`).\n- **Variant**: x64 CPU-specific build flavor (`modern` AVX2, `baseline` fallback).\n- **Generated binding declaration**: `native/index.d.ts` emitted by napi-rs during `build-native.ts`.\n- **Compiled binary mode**: Runtime mode where the CLI is bundled and native addons are resolved from embedded/cache paths before package-local paths.\n- **Embedded addon**: Build artifact metadata and file references generated into `native/embedded-addon.js` so compiled binaries can extract matching `.node` payloads.\n",
26
26
  "natives-binding-contract.md": "# Natives Binding Contract (JavaScript/TypeScript Side)\n\nThis document defines the JS/TS contract between `@gajae-code/natives` callers and the loaded N-API addon.\n\nCurrent package shape is direct-to-native: there is no `packages/natives/src/<module>` TypeScript wrapper layer. The public API is the generated `packages/natives/native/index.d.ts` declaration file, the CommonJS loader in `packages/natives/native/index.js`, and the Rust `#[napi]` exports in `crates/pi-natives/src`.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/index.d.ts`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/gen-enums.ts`\n- `packages/natives/package.json`\n- `crates/pi-natives/src/lib.rs`\n- Rust modules under `crates/pi-natives/src/*.rs`\n\n## Contract model\n\nThe contract has three parts:\n\n1. **Generated runtime loader** (`native/index.js`)\n - computes candidates and `require(...)`s the `.node` addon;\n - exports the loaded addon object directly;\n - appends enum objects generated by `scripts/gen-enums.ts`.\n2. **Generated TypeScript declarations** (`native/index.d.ts`)\n - generated by napi-rs during `scripts/build-native.ts`;\n - declares exported functions, classes, object interfaces, and native enums;\n - is the package `types` entry.\n3. **Rust N-API exports** (`crates/pi-natives/src`)\n - `#[napi]` functions/classes/objects/enums are the source of generated declarations and runtime symbols;\n - snake_case Rust names become camelCase JavaScript names by napi-rs convention.\n\nThere is no current `NativeBindings` declaration-merging lifecycle and no `validateNative(...)` required-export list in the loader.\n\n## Public export surface organization\n\n`packages/natives/package.json` exposes the package root only:\n\n```json\n{\n \"main\": \"./native/index.js\",\n \"types\": \"./native/index.d.ts\",\n \"exports\": {\n \".\": {\n \"types\": \"./native/index.d.ts\",\n \"import\": \"./native/index.js\"\n }\n }\n}\n```\n\nConsumers in `packages/coding-agent` and `packages/tui` import directly from `@gajae-code/natives`.\n\n## JS API ↔ native export mapping (representative)\n\n| Category | Public JS API | Rust source | Return style |\n| ----------------- | ---------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------- | ----------------------------- |\n| Grep | `grep(options, onMatch?)` | `grep.rs` | `Promise<GrepResult>` |\n| Grep | `search(content, options)` | `grep.rs` | `SearchResult` |\n| Grep | `hasMatch(content, pattern, ignoreCase?, multiline?)` | `grep.rs` | `boolean` |\n| Fuzzy path search | `fuzzyFind(options)` | `fd.rs` | `Promise<FuzzyFindResult>` |\n| Glob | `glob(options, onMatch?)` | `glob.rs` | `Promise<GlobResult>` |\n| Glob cache | `invalidateFsScanCache(path?)` | `fs_cache.rs` | `void` |\n| AST search/edit | `astGrep(options)`, `astEdit(options)` | `ast.rs` | `Promise<...>` |\n| Shell | `executeShell(options, onChunk?)` | `shell.rs` | `Promise<ShellExecuteResult>` |\n| Shell | `new Shell(options?)`, `shell.run(...)`, `shell.abort()` | `shell.rs` | class / promises |\n| PTY | `new PtySession()`, `start/write/resize/kill` | `pty.rs` | class / promises |\n| Process | `killTree(pid, signal)`, `listDescendants(pid)` | `ps.rs` | sync |\n| Keys | `parseKey`, `matchesKey`, Kitty/legacy helpers | `keys.rs` | sync |\n| Text | `wrapTextWithAnsi`, `truncateToWidth`, `sliceWithWidth`, `extractSegments`, `visibleWidth` | `text.rs` | sync |\n| Highlight | `highlightCode`, `supportsLanguage`, `getSupportedLanguages` | `highlight.rs` | sync |\n| HTML | `htmlToMarkdown(html, options?)` | `html.rs` | `Promise<string>` |\n| Image | `PhotonImage`, `encodeSixel` | `image.rs` | class / sync / promises |\n| Clipboard | `copyToClipboard`, `readImageFromClipboard` | `clipboard.rs` | sync / promise |\n| Tokens | `countTokens(input, encoding?)` | `tokens.rs` | sync |\n| System | `detectMacOSAppearance`, `MacAppearanceObserver`, `MacOSPowerAssertion`, `getWorkProfile`, ProjFS helpers | `appearance.rs`, `power.rs`, `prof.rs`, `projfs_overlay.rs` | mixed |\n\n## Sync vs async contract differences\n\nThe contract preserves Rust/N-API call style:\n\n- **Promise-returning exports** for worker-thread or async runtime work (`grep`, `glob`, `fuzzyFind`, `astGrep`, `astEdit`, `htmlToMarkdown`, shell/PTY runs, image parse/resize/encode, clipboard image read).\n- **Synchronous exports** for deterministic in-memory transforms/parsers or direct system calls (`search`, `hasMatch`, highlighting, text utilities, token counting, process queries, `copyToClipboard`, `encodeSixel`).\n- **Constructor exports** for stateful runtime objects (`Shell`, `PtySession`, `PhotonImage`, macOS observer/power handles).\n\nChanging sync ↔ async for an existing export is a breaking public API change because consumers call these exports directly.\n\n## Object and enum typing patterns\n\n### Object patterns\n\n`#[napi(object)]` Rust structs become TS interfaces, for example:\n\n- `GrepResult`, `SearchResult`, `GlobResult`, `FuzzyFindResult`\n- `ShellRunResult`, `ShellExecuteResult`, `PtyRunResult`, `MinimizerResult`\n- `AstFindResult`, `AstReplaceResult`\n- `System`/media payloads such as `ClipboardImage`, `WorkProfile`, `ParsedKittyResult`\n\nRuntime shape correctness is owned by napi-rs and the Rust implementation.\n\n### Enum patterns\n\nNative enums are represented in generated declarations and also appended to `module.exports` by `scripts/gen-enums.ts`, because the loader is hand-maintained CommonJS around the generated addon. Current enum objects include:\n\n- `AstMatchStrictness`\n- `Ellipsis`\n- `Encoding`\n- `FileType`\n- `GrepOutputMode`\n- `ImageFormat`\n- `KeyEventType`\n- `MacOSAppearance`\n- `SamplingFilter`\n\n## Error behavior and caveats\n\n- Addon load failure or unsupported platform throws during package import from `native/index.js`.\n- The loader does not verify the full export set after `require(...)`; stale or mismatched binaries surface as native load errors or missing members at use sites.\n- N-API conversion validates basic argument conversion, but TS optional fields do not guarantee semantic validity for untyped callers.\n- Numeric enum declarations do not prevent out-of-range numeric values from untyped callers unless the Rust function rejects them during conversion.\n- Callback exports use napi-rs `ThreadsafeFunction` shape: `(error: Error | null, value) => void`. Native code generally emits successful values; hard failures reject/throw through the owning call.\n\n## Maintainer checklist for binding changes\n\nWhen adding/changing an export, update all of:\n\n1. Rust `#[napi]` implementation in the owning `crates/pi-natives/src/<module>.rs`.\n2. `crates/pi-natives/src/lib.rs` if a new module is added.\n3. Any consumer imports/callsites in `packages/coding-agent` or `packages/tui`.\n4. Build output by running the natives build so `native/index.d.ts` and `native/index.js` stay in sync.\n5. `scripts/gen-enums.ts` if enum runtime export patching needs to change.\n\nDo not add a parallel TS wrapper convention unless the package design intentionally moves back to wrappers; current consumers depend on the direct generated API.\n",
@@ -55,6 +55,11 @@ import planModeApprovedPrompt from "../prompts/system/plan-mode-approved.md" wit
55
55
  import planModeCompactInstructionsPrompt from "../prompts/system/plan-mode-compact-instructions.md" with {
56
56
  type: "text",
57
57
  };
58
+ import {
59
+ createStarReminderBeforeAgentStartContributor,
60
+ scheduleLaunchStarReminderAfterFirstRender,
61
+ starReminderLaunchGate,
62
+ } from "../reminders/star-reminder";
58
63
  import type { AgentSession, AgentSessionEvent } from "../session/agent-session";
59
64
  import { HistoryStorage } from "../session/history-storage";
60
65
  import type { SessionContext, SessionManager } from "../session/session-manager";
@@ -553,6 +558,27 @@ export class InteractiveMode implements InteractiveModeContext {
553
558
  this.isInitialized = true;
554
559
  this.ui.requestRender(true);
555
560
 
561
+ // GitHub star reminder (interactive-only). Register the decline-driven
562
+ // injection contributor and schedule the launch nudge after the first
563
+ // render so the networked gh check never blocks startup.
564
+ const starReminderGate = starReminderLaunchGate({
565
+ enabled: settings.get("starReminder.enabled"),
566
+ quiet: startupQuiet,
567
+ });
568
+ if (starReminderGate.register) {
569
+ this.session.registerBeforeAgentStartContributor(
570
+ createStarReminderBeforeAgentStartContributor({
571
+ getSessionId: () => this.sessionManager.getSessionId(),
572
+ }),
573
+ );
574
+ }
575
+ if (starReminderGate.schedule) {
576
+ scheduleLaunchStarReminderAfterFirstRender({
577
+ confirm: (title, message) => this.showHookConfirm(title, message),
578
+ isIdle: () => !this.session.isStreaming && !this.isBackgrounded && !this.hookSelector,
579
+ });
580
+ }
581
+
556
582
  // Initialize hooks with TUI-based UI context
557
583
  await this.initHooksAndCustomTools();
558
584