@oh-my-pi/pi-coding-agent 15.10.0 → 15.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. package/CHANGELOG.md +142 -1
  2. package/dist/types/cli/dry-balance-cli.d.ts +15 -1
  3. package/dist/types/cli/startup-cwd.d.ts +2 -0
  4. package/dist/types/commands/launch.d.ts +3 -0
  5. package/dist/types/commit/analysis/conventional.d.ts +2 -2
  6. package/dist/types/commit/analysis/summary.d.ts +2 -2
  7. package/dist/types/commit/changelog/generate.d.ts +2 -2
  8. package/dist/types/commit/changelog/index.d.ts +2 -2
  9. package/dist/types/commit/map-reduce/index.d.ts +3 -3
  10. package/dist/types/commit/map-reduce/map-phase.d.ts +2 -2
  11. package/dist/types/commit/map-reduce/reduce-phase.d.ts +2 -2
  12. package/dist/types/commit/model-selection.d.ts +10 -4
  13. package/dist/types/config/api-key-resolver.d.ts +34 -0
  14. package/dist/types/config/keybindings.d.ts +2 -2
  15. package/dist/types/config/model-provider-priority.d.ts +1 -0
  16. package/dist/types/config/model-registry.d.ts +17 -1
  17. package/dist/types/config/model-resolver.d.ts +4 -1
  18. package/dist/types/config/settings-schema.d.ts +9 -0
  19. package/dist/types/config/settings.d.ts +7 -2
  20. package/dist/types/dap/config.d.ts +14 -1
  21. package/dist/types/dap/types.d.ts +10 -0
  22. package/dist/types/debug/report-bundle.d.ts +3 -0
  23. package/dist/types/edit/file-snapshot-store.d.ts +18 -10
  24. package/dist/types/eval/py/__tests__/prelude.test.d.ts +1 -0
  25. package/dist/types/extensibility/extensions/types.d.ts +4 -1
  26. package/dist/types/lsp/client.d.ts +10 -0
  27. package/dist/types/lsp/utils.d.ts +3 -2
  28. package/dist/types/main.d.ts +3 -9
  29. package/dist/types/mcp/tool-bridge.d.ts +2 -0
  30. package/dist/types/modes/components/chat-block.d.ts +64 -0
  31. package/dist/types/modes/components/custom-editor.d.ts +4 -1
  32. package/dist/types/modes/components/overlay-box.d.ts +17 -0
  33. package/dist/types/modes/components/plan-review-overlay.d.ts +59 -0
  34. package/dist/types/modes/components/plan-toc.d.ts +41 -0
  35. package/dist/types/modes/components/read-tool-group.d.ts +2 -0
  36. package/dist/types/modes/components/status-line.d.ts +2 -0
  37. package/dist/types/modes/components/transcript-container.d.ts +11 -0
  38. package/dist/types/modes/controllers/command-controller.d.ts +1 -0
  39. package/dist/types/modes/controllers/event-controller.d.ts +17 -1
  40. package/dist/types/modes/controllers/extension-ui-controller.d.ts +0 -1
  41. package/dist/types/modes/controllers/input-controller.d.ts +1 -1
  42. package/dist/types/modes/controllers/streaming-reveal.d.ts +22 -0
  43. package/dist/types/modes/controllers/tan-command-controller.d.ts +6 -0
  44. package/dist/types/modes/interactive-mode.d.ts +16 -5
  45. package/dist/types/modes/magic-keywords.d.ts +1 -1
  46. package/dist/types/modes/markdown-prose.d.ts +1 -1
  47. package/dist/types/modes/theme/theme.d.ts +1 -1
  48. package/dist/types/modes/types.d.ts +21 -5
  49. package/dist/types/modes/utils/copy-targets.d.ts +21 -1
  50. package/dist/types/modes/workflow.d.ts +3 -3
  51. package/dist/types/plan-mode/approved-plan.d.ts +27 -8
  52. package/dist/types/plan-mode/plan-protection.d.ts +4 -4
  53. package/dist/types/sdk.d.ts +2 -0
  54. package/dist/types/session/agent-session.d.ts +21 -0
  55. package/dist/types/session/auth-storage.d.ts +1 -1
  56. package/dist/types/session/messages.d.ts +12 -0
  57. package/dist/types/session/session-manager.d.ts +8 -3
  58. package/dist/types/slash-commands/types.d.ts +4 -6
  59. package/dist/types/task/executor.d.ts +17 -0
  60. package/dist/types/task/index.d.ts +1 -0
  61. package/dist/types/task/render.d.ts +3 -2
  62. package/dist/types/tools/archive-reader.d.ts +5 -0
  63. package/dist/types/tools/ast-edit.d.ts +3 -0
  64. package/dist/types/tools/ast-grep.d.ts +3 -0
  65. package/dist/types/tools/bash.d.ts +1 -0
  66. package/dist/types/tools/eval.d.ts +8 -0
  67. package/dist/types/tools/find.d.ts +8 -4
  68. package/dist/types/tools/gh-cache-invalidation.d.ts +6 -0
  69. package/dist/types/tools/github-cache.d.ts +12 -0
  70. package/dist/types/tools/grouped-file-output.d.ts +95 -12
  71. package/dist/types/tools/memory-render.d.ts +4 -1
  72. package/dist/types/tools/path-utils.d.ts +8 -0
  73. package/dist/types/tools/plan-mode-guard.d.ts +8 -9
  74. package/dist/types/tools/render-utils.d.ts +5 -9
  75. package/dist/types/tools/search.d.ts +6 -2
  76. package/dist/types/tools/sqlite-reader.d.ts +1 -0
  77. package/dist/types/tools/todo.d.ts +3 -2
  78. package/dist/types/tools/write.d.ts +3 -0
  79. package/dist/types/tools/yield.d.ts +8 -0
  80. package/dist/types/tui/output-block.d.ts +16 -4
  81. package/dist/types/tui/status-line.d.ts +3 -0
  82. package/dist/types/utils/enhanced-paste.d.ts +20 -0
  83. package/dist/types/web/search/providers/kimi.d.ts +1 -1
  84. package/package.json +9 -9
  85. package/src/auto-thinking/classifier.ts +5 -1
  86. package/src/cli/args.ts +3 -1
  87. package/src/cli/dry-balance-cli.ts +54 -21
  88. package/src/cli/gallery-cli.ts +4 -1
  89. package/src/cli/gallery-fixtures/misc.ts +29 -0
  90. package/src/cli/startup-cwd.ts +68 -0
  91. package/src/commands/launch.ts +3 -0
  92. package/src/commit/analysis/conventional.ts +2 -2
  93. package/src/commit/analysis/summary.ts +2 -2
  94. package/src/commit/changelog/generate.ts +2 -2
  95. package/src/commit/changelog/index.ts +2 -2
  96. package/src/commit/map-reduce/index.ts +3 -3
  97. package/src/commit/map-reduce/map-phase.ts +2 -2
  98. package/src/commit/map-reduce/reduce-phase.ts +2 -2
  99. package/src/commit/model-selection.ts +36 -11
  100. package/src/commit/pipeline.ts +4 -4
  101. package/src/config/api-key-resolver.ts +58 -0
  102. package/src/config/model-provider-priority.ts +55 -0
  103. package/src/config/model-registry.ts +29 -24
  104. package/src/config/model-resolver.ts +39 -7
  105. package/src/config/settings-schema.ts +10 -0
  106. package/src/config/settings.ts +106 -43
  107. package/src/dap/config.ts +41 -2
  108. package/src/dap/defaults.json +1 -0
  109. package/src/dap/session.ts +1 -0
  110. package/src/dap/types.ts +10 -0
  111. package/src/debug/index.ts +47 -53
  112. package/src/debug/raw-sse-buffer.ts +7 -4
  113. package/src/debug/report-bundle.ts +9 -0
  114. package/src/edit/file-snapshot-store.ts +33 -1
  115. package/src/edit/hashline/filesystem.ts +2 -1
  116. package/src/edit/renderer.ts +82 -78
  117. package/src/eval/__tests__/llm-bridge.test.ts +110 -31
  118. package/src/eval/js/context-manager.ts +32 -15
  119. package/src/eval/llm-bridge.ts +22 -6
  120. package/src/eval/py/__tests__/prelude.test.ts +19 -0
  121. package/src/eval/py/executor.ts +23 -11
  122. package/src/eval/py/prelude.py +1 -1
  123. package/src/extensibility/extensions/types.ts +10 -1
  124. package/src/goals/tools/goal-tool.ts +36 -26
  125. package/src/internal-urls/docs-index.generated.ts +8 -8
  126. package/src/lsp/client.ts +23 -11
  127. package/src/lsp/config.ts +11 -1
  128. package/src/lsp/index.ts +61 -9
  129. package/src/lsp/utils.ts +3 -2
  130. package/src/main.ts +100 -72
  131. package/src/mcp/tool-bridge.ts +2 -0
  132. package/src/memories/index.ts +14 -7
  133. package/src/mnemopi/backend.ts +5 -1
  134. package/src/modes/acp/acp-agent.ts +33 -26
  135. package/src/modes/components/assistant-message.ts +2 -9
  136. package/src/modes/components/chat-block.ts +111 -0
  137. package/src/modes/components/copy-selector.ts +1 -44
  138. package/src/modes/components/custom-editor.ts +164 -109
  139. package/src/modes/components/custom-message.ts +1 -3
  140. package/src/modes/components/execution-shared.ts +1 -2
  141. package/src/modes/components/hook-message.ts +1 -3
  142. package/src/modes/components/model-selector.ts +59 -13
  143. package/src/modes/components/oauth-selector.ts +33 -7
  144. package/src/modes/components/overlay-box.ts +108 -0
  145. package/src/modes/components/plan-review-overlay.ts +799 -0
  146. package/src/modes/components/plan-toc.ts +138 -0
  147. package/src/modes/components/read-tool-group.ts +20 -4
  148. package/src/modes/components/skill-message.ts +0 -1
  149. package/src/modes/components/status-line.ts +19 -4
  150. package/src/modes/components/tips.txt +2 -1
  151. package/src/modes/components/todo-reminder.ts +0 -2
  152. package/src/modes/components/tool-execution.ts +68 -88
  153. package/src/modes/components/transcript-container.ts +84 -24
  154. package/src/modes/components/user-message.ts +2 -3
  155. package/src/modes/controllers/command-controller-shared.ts +7 -6
  156. package/src/modes/controllers/command-controller.ts +57 -55
  157. package/src/modes/controllers/event-controller.ts +67 -40
  158. package/src/modes/controllers/extension-ui-controller.ts +10 -73
  159. package/src/modes/controllers/input-controller.ts +170 -126
  160. package/src/modes/controllers/mcp-command-controller.ts +69 -60
  161. package/src/modes/controllers/selector-controller.ts +23 -25
  162. package/src/modes/controllers/streaming-reveal.ts +212 -0
  163. package/src/modes/controllers/tan-command-controller.ts +173 -0
  164. package/src/modes/interactive-mode.ts +274 -112
  165. package/src/modes/magic-keywords.ts +1 -1
  166. package/src/modes/markdown-prose.ts +1 -1
  167. package/src/modes/setup-wizard/wizard-overlay.ts +1 -1
  168. package/src/modes/theme/shimmer.ts +20 -9
  169. package/src/modes/theme/theme-schema.json +1 -1
  170. package/src/modes/theme/theme.ts +8 -4
  171. package/src/modes/types.ts +21 -7
  172. package/src/modes/utils/copy-targets.ts +133 -27
  173. package/src/modes/utils/ui-helpers.ts +44 -46
  174. package/src/modes/workflow.ts +10 -10
  175. package/src/plan-mode/approved-plan.ts +66 -43
  176. package/src/plan-mode/plan-protection.ts +4 -4
  177. package/src/prompts/system/background-tan-dispatch.md +8 -0
  178. package/src/prompts/system/plan-mode-active.md +67 -58
  179. package/src/prompts/system/plan-mode-approved.md +1 -1
  180. package/src/prompts/system/workflow-notice.md +1 -1
  181. package/src/prompts/tools/bash.md +9 -0
  182. package/src/prompts/tools/browser.md +1 -1
  183. package/src/prompts/tools/eval.md +2 -1
  184. package/src/prompts/tools/read.md +2 -2
  185. package/src/sdk.ts +37 -46
  186. package/src/session/agent-session.ts +119 -18
  187. package/src/session/auth-storage.ts +2 -0
  188. package/src/session/messages.ts +26 -0
  189. package/src/session/session-manager.ts +109 -28
  190. package/src/slash-commands/builtin-registry.ts +36 -9
  191. package/src/slash-commands/types.ts +4 -6
  192. package/src/task/executor.ts +76 -38
  193. package/src/task/index.ts +4 -0
  194. package/src/task/render.ts +211 -147
  195. package/src/tools/archive-reader.ts +64 -0
  196. package/src/tools/ask.ts +119 -164
  197. package/src/tools/ast-edit.ts +98 -71
  198. package/src/tools/ast-grep.ts +37 -43
  199. package/src/tools/bash.ts +57 -6
  200. package/src/tools/browser/tab-supervisor.ts +13 -1
  201. package/src/tools/browser/tab-worker.ts +33 -4
  202. package/src/tools/debug.ts +20 -8
  203. package/src/tools/eval.ts +13 -2
  204. package/src/tools/fetch.ts +297 -7
  205. package/src/tools/find.ts +51 -30
  206. package/src/tools/gh-cache-invalidation.ts +200 -0
  207. package/src/tools/gh-renderer.ts +81 -42
  208. package/src/tools/github-cache.ts +25 -0
  209. package/src/tools/grouped-file-output.ts +272 -48
  210. package/src/tools/image-gen.ts +150 -103
  211. package/src/tools/inspect-image-renderer.ts +63 -41
  212. package/src/tools/inspect-image.ts +10 -3
  213. package/src/tools/job.ts +3 -4
  214. package/src/tools/memory-render.ts +4 -1
  215. package/src/tools/path-utils.ts +28 -2
  216. package/src/tools/plan-mode-guard.ts +66 -39
  217. package/src/tools/read.ts +48 -28
  218. package/src/tools/render-utils.ts +21 -37
  219. package/src/tools/resolve.ts +14 -0
  220. package/src/tools/search-tool-bm25.ts +36 -23
  221. package/src/tools/search.ts +118 -81
  222. package/src/tools/sqlite-reader.ts +9 -12
  223. package/src/tools/todo.ts +118 -52
  224. package/src/tools/write.ts +83 -64
  225. package/src/tools/yield.ts +10 -1
  226. package/src/tui/output-block.ts +60 -13
  227. package/src/tui/status-line.ts +5 -1
  228. package/src/utils/commit-message-generator.ts +11 -3
  229. package/src/utils/enhanced-paste.ts +230 -0
  230. package/src/utils/title-generator.ts +2 -1
  231. package/src/web/search/providers/anthropic.ts +25 -19
  232. package/src/web/search/providers/codex.ts +37 -8
  233. package/src/web/search/providers/exa.ts +11 -3
  234. package/src/web/search/providers/kimi.ts +28 -17
  235. package/src/web/search/providers/parallel.ts +35 -24
  236. package/src/web/search/providers/synthetic.ts +8 -6
  237. package/src/web/search/providers/tavily.ts +9 -8
  238. package/src/web/search/providers/zai.ts +8 -6
@@ -3,25 +3,25 @@ import { createGradientHighlighter, type KeywordHighlighter } from "./gradient-h
3
3
  import { keywordInProse } from "./markdown-prose";
4
4
 
5
5
  /**
6
- * "workflow" keyword support.
6
+ * "workflowz" keyword support.
7
7
  *
8
8
  * Typing the standalone word in the input editor paints it with a warm
9
9
  * amber→green gradient ({@link highlightWorkflow}); submitting a message that
10
10
  * mentions it appends a hidden {@link WORKFLOW_NOTICE} that steers the model to
11
11
  * author a deterministic multi-subagent workflow in eval cells (agent/parallel/
12
12
  * pipeline). Matching is whitespace-delimited and case-sensitive (lowercase
13
- * only) — "workflow"/"workflows" trigger, but "workflowed", "Workflow", and
14
- * "workflow.ts" never do.
13
+ * only) — "workflowz" triggers, but "workflowzed", "Workflowz", and
14
+ * "workflowz.ts" never do.
15
15
  */
16
16
 
17
- // Detection: lowercase keyword (singular or plural) flanked by whitespace or a string edge. Non-global so `.test` stays stateless.
18
- const WORKFLOW_WORD = /(?<!\S)workflows?(?!\S)/;
17
+ // Detection: lowercase keyword flanked by whitespace or a string edge. Non-global so `.test` stays stateless.
18
+ const WORKFLOW_WORD = /(?<!\S)workflowz(?!\S)/;
19
19
 
20
- /** Hidden system notice appended after a user message that mentions "workflow". */
20
+ /** Hidden system notice appended after a user message that mentions "workflowz". */
21
21
  export const WORKFLOW_NOTICE: string = workflowNotice.trim();
22
22
 
23
23
  /**
24
- * Whether `text` contains the standalone keyword "workflow"/"workflows"
24
+ * Whether `text` contains the standalone keyword "workflowz"
25
25
  * (lowercase, whitespace-delimited) in prose — never inside a code block, inline
26
26
  * code span, or XML/HTML section.
27
27
  */
@@ -30,13 +30,13 @@ export function containsWorkflow(text: string): boolean {
30
30
  }
31
31
 
32
32
  /**
33
- * Highlight every standalone "workflow"/"workflows" in `text` for editor display
33
+ * Highlight every standalone "workflowz" in `text` for editor display
34
34
  * with a warm amber→green gradient (hue 30..150), visually distinct from
35
35
  * ultrathink's rainbow and orchestrate's teal→violet.
36
36
  */
37
37
  export const highlightWorkflow: KeywordHighlighter = createGradientHighlighter({
38
- probe: /workflow/,
39
- highlight: /(?<!\S)workflows?(?!\S)/g,
38
+ probe: /workflowz/,
39
+ highlight: /(?<!\S)workflowz(?!\S)/g,
40
40
  stops: 14,
41
41
  hue: t => 30 + t * 120,
42
42
  });
@@ -1,15 +1,12 @@
1
- import * as fs from "node:fs/promises";
2
- import { isEnoent } from "@oh-my-pi/pi-utils";
3
- import { resolveLocalUrlToPath } from "../internal-urls";
4
- import { normalizeLocalScheme } from "../tools/path-utils";
5
1
  import { ToolError } from "../tools/tool-errors";
6
2
 
7
3
  /** Shape forwarded from the plan-mode resolve handler to InteractiveMode's
8
4
  * approval popup. Populated by the standing handler that the resolve tool
9
- * dispatches to when the agent submits `resolve { action: "apply" }`. */
5
+ * dispatches to when the agent submits `resolve { action: "apply" }`.
6
+ * `planFilePath` is the agent-chosen `local://<slug>-plan.md` artifact — it is
7
+ * never renamed on approval, so links to it stay valid for the session. */
10
8
  export interface PlanApprovalDetails {
11
9
  planFilePath: string;
12
- finalPlanFilePath: string;
13
10
  title: string;
14
11
  planExists: boolean;
15
12
  }
@@ -110,54 +107,80 @@ export function humanizePlanTitle(title: string): string {
110
107
  return spaced.charAt(0).toUpperCase() + spaced.slice(1);
111
108
  }
112
109
 
113
- interface RenameApprovedPlanFileOptions {
114
- planFilePath: string;
115
- finalPlanFilePath: string;
116
- getArtifactsDir: () => string | null;
117
- getSessionId: () => string | null;
110
+ /** The `local://` URL a plan slug maps to. The agent writes the plan here and
111
+ * passes the slug to `resolve`; the file is never renamed, so this URL — and
112
+ * any hyperlink to it — stays valid for the life of the session. */
113
+ export function planFileUrlForSlug(slug: string): string {
114
+ return `local://${slug}-plan.md`;
118
115
  }
119
116
 
120
- function assertLocalUrl(path: string, label: "source" | "destination"): void {
121
- if (!path.startsWith("local:/") && !path.startsWith("local://")) {
122
- throw new Error(`Approved plan ${label} path must use local: scheme with / or // (received ${path}).`);
117
+ /** Derive a `<slug>` from an agent-supplied `extra.title`, or `undefined` when
118
+ * the title is missing/non-string/unsanitizable. A trailing `-plan` is stripped
119
+ * so a supplied "auth-plan" maps to `auth-plan.md`, not `auth-plan-plan.md`. */
120
+ function planSlugFromSupplied(suppliedTitle: unknown): string | undefined {
121
+ if (typeof suppliedTitle !== "string" || !suppliedTitle.trim()) return undefined;
122
+ try {
123
+ const { title } = normalizePlanTitle(suppliedTitle);
124
+ const slug = title.replace(/-plan$/i, "");
125
+ return slug || title;
126
+ } catch {
127
+ return undefined;
123
128
  }
124
129
  }
125
130
 
126
- export async function renameApprovedPlanFile(options: RenameApprovedPlanFileOptions): Promise<void> {
127
- const { planFilePath, finalPlanFilePath, getArtifactsDir, getSessionId } = options;
128
- assertLocalUrl(planFilePath, "source");
129
- assertLocalUrl(finalPlanFilePath, "destination");
131
+ export interface ResolveApprovedPlanInput {
132
+ /** The agent's `extra.title` from the `resolve` call, if any. */
133
+ suppliedTitle?: unknown;
134
+ /** The plan path recorded in plan-mode state (the entry default or a prior plan). */
135
+ statePlanFilePath: string;
136
+ /** Read a plan `local://` URL, returning null when the file does not exist. */
137
+ readPlan: (planUrl: string) => Promise<string | null>;
138
+ /** Optional fallback: list candidate plan `local://` URLs (newest first) so a
139
+ * plan whose name can't be reconstructed (e.g. a dropped `extra.title`) is
140
+ * still found. */
141
+ listPlanFiles?: () => Promise<string[]>;
142
+ }
143
+
144
+ export interface ResolvedApprovedPlan {
145
+ planFilePath: string;
146
+ planContent: string;
147
+ title: string;
148
+ }
130
149
 
131
- const resolveOptions = {
132
- getArtifactsDir: () => getArtifactsDir(),
133
- getSessionId: () => getSessionId(),
150
+ /** Locate the plan file the agent wrote and finalize its title — without
151
+ * renaming anything. Tries, in order: the slug derived from `extra.title`
152
+ * (`local://<slug>-plan.md`), the plan path from plan-mode state, then a scan
153
+ * of recent plan files. Throws a `ToolError` guiding the agent when none exist. */
154
+ export async function resolveApprovedPlan(input: ResolveApprovedPlanInput): Promise<ResolvedApprovedPlan> {
155
+ const ordered: string[] = [];
156
+ const consider = (url: string | undefined): void => {
157
+ if (url && !ordered.includes(url)) ordered.push(url);
134
158
  };
135
- const resolvedSource = resolveLocalUrlToPath(normalizeLocalScheme(planFilePath), resolveOptions);
136
- const resolvedDestination = resolveLocalUrlToPath(normalizeLocalScheme(finalPlanFilePath), resolveOptions);
137
159
 
138
- if (resolvedSource === resolvedDestination) {
139
- return;
160
+ const slug = planSlugFromSupplied(input.suppliedTitle);
161
+ consider(slug ? planFileUrlForSlug(slug) : undefined);
162
+ consider(input.statePlanFilePath);
163
+
164
+ for (const url of ordered) {
165
+ const content = await input.readPlan(url);
166
+ if (content !== null) return finalizeApprovedPlan(url, content, input.suppliedTitle);
140
167
  }
141
168
 
142
- try {
143
- const destinationStat = await fs.stat(resolvedDestination);
144
- if (destinationStat.isFile()) {
145
- throw new Error(
146
- `Plan destination already exists at ${finalPlanFilePath}. Choose a different title and submit the plan for approval again.`,
147
- );
148
- }
149
- throw new Error(`Plan destination exists but is not a file: ${finalPlanFilePath}`);
150
- } catch (error) {
151
- if (!isEnoent(error)) {
152
- throw error;
169
+ if (input.listPlanFiles) {
170
+ for (const url of await input.listPlanFiles()) {
171
+ if (ordered.includes(url)) continue;
172
+ const content = await input.readPlan(url);
173
+ if (content !== null) return finalizeApprovedPlan(url, content, input.suppliedTitle);
153
174
  }
154
175
  }
155
176
 
156
- try {
157
- await fs.rename(resolvedSource, resolvedDestination);
158
- } catch (error) {
159
- throw new Error(
160
- `Failed to rename approved plan from ${planFilePath} to ${finalPlanFilePath}: ${error instanceof Error ? error.message : String(error)}`,
161
- );
162
- }
177
+ const target = ordered[0] ?? input.statePlanFilePath;
178
+ throw new ToolError(
179
+ `Plan file not found at ${target}. Write the finalized plan to ${target} before requesting approval.`,
180
+ );
181
+ }
182
+
183
+ function finalizeApprovedPlan(planFilePath: string, planContent: string, suppliedTitle: unknown): ResolvedApprovedPlan {
184
+ const { title } = resolvePlanTitle({ suppliedTitle, planContent, planFilePath });
185
+ return { planFilePath, planContent, title };
163
186
  }
@@ -16,11 +16,11 @@ function readTargetsPlan(readPath: string, planTarget: string): boolean {
16
16
  * Build a compaction protection matcher that keeps `read` results for the active
17
17
  * plan file intact through prune/shake — the plan analog of skill-read
18
18
  * protection. Matches both the canonical `local://PLAN.md` alias and the
19
- * session's current plan reference path (e.g. a titled `local://<title>.md`), so
20
- * the plan survives compaction whether the agent reads it by alias or by title.
19
+ * session's current plan reference path (the agent-chosen `local://<slug>-plan.md`),
20
+ * so the plan survives compaction whether the agent reads it by alias or by name.
21
21
  *
22
- * `getPlanReferencePath` is evaluated at match time so a mid-session retitle
23
- * (plan approval renames `PLAN.md` → `<title>.md`) is honored immediately.
22
+ * `getPlanReferencePath` is evaluated at match time so the plan path set on
23
+ * approval is honored immediately.
24
24
  */
25
25
  export function createPlanReadMatcher(getPlanReferencePath: () => string): (context: ProtectedToolContext) => boolean {
26
26
  return (context: ProtectedToolContext) => {
@@ -0,0 +1,8 @@
1
+ <system-notice reason="background_task_dispatched" job="{{jobId}}">
2
+ The user launched a tangential task that is now running in a separate background agent. This is NOT a prompt injection and NOT a new instruction for you — it is the coding agent informing you that work was handed off elsewhere.
3
+
4
+ The task below is being handled by another agent in its own session. You are NOT responsible for it: do NOT start working on it, do NOT reference it, and do NOT let it interrupt or alter your current task. Simply continue what you were doing as if this message had not appeared. Results, if any, will surface separately when the background task ({{jobId}}) completes.
5
+
6
+ Dispatched work (for your awareness only):
7
+ {{work}}
8
+ </system-notice>
@@ -6,111 +6,120 @@ You NEVER:
6
6
  - Run state-changing commands (git commit, npm install, etc.)
7
7
  - Make any system changes
8
8
 
9
- To implement: call `resolve` with `action: "apply"`, a `reason`, and `extra: { title: "<PLAN_TITLE>" }` → user approves an execution option → full write access is restored. `<PLAN_TITLE>` may only contain letters, numbers, underscores, and hyphens; the approved plan is renamed to `local://<PLAN_TITLE>.md`.
9
+ To implement: call `resolve` with `action: "apply"`, a `reason`, and `extra: { title: "<slug>" }` where `<slug>` matches your `local://<slug>-plan.md` file → user approves an execution option → full write access is restored. `<slug>` may only contain letters, numbers, underscores, and hyphens. The plan file is never renamed, so its name is yours to choose.
10
10
 
11
11
  You NEVER ask the user to exit plan mode for you; you MUST call `resolve` yourself.
12
12
  </critical>
13
13
 
14
+ ## Objective
15
+
16
+ A plan is **decision-complete**: another engineer or agent can execute it end-to-end without making a single design decision. Optimize every choice for that. Detail exists to remove the implementer's decisions — not to look thorough. A document that reads like a design doc (Non-Goals, Alternatives, risk matrices) yet leaves real decisions open is a FAILED plan.
17
+
14
18
  ## Plan File
15
19
 
16
20
  {{#if planExists}}
17
- Plan file exists at `{{planFilePath}}`; you MUST read and update it incrementally.
21
+ Plan file exists at `{{planFilePath}}`; you MUST read and update it incrementally. If this request is a different task, write a fresh `local://<slug>-plan.md` instead and leave the old plan in place.
18
22
  {{else}}
19
- You MUST create a plan at `{{planFilePath}}`.
23
+ Choose a short kebab-case `<slug>` that names this task (letters, numbers, hyphens) and write the plan to `local://<slug>-plan.md` — e.g. `local://auth-token-refresh-plan.md`. You MUST pass that same `<slug>` as `title` when you call `resolve`.
20
24
  {{/if}}
21
25
 
22
- You MUST use `{{editToolName}}` for incremental updates; use `{{writeToolName}}` only for create/full replace.
26
+ You MUST use `{{editToolName}}` for incremental updates; use `{{writeToolName}}` only for create/full replace. You MUST update the plan as you learn — you NEVER batch all writing to the end.
23
27
 
24
- <caution>
25
- The approval selector includes:
26
- - **Approve and execute**: starts execution in fresh context (session cleared).
27
- - **Approve and compact context**: distills the plan-mode discussion into a summary, then starts execution in this session.
28
- - **Approve and keep context**: starts execution in this session, preserving exploration history.
28
+ ## Resolving Unknowns
29
29
 
30
- You MUST still make the plan file self-contained: include requirements, decisions, key findings, and remaining todos.
31
- </caution>
30
+ You MUST eliminate unknowns by discovering facts, not by asking. Before asking the user anything, perform at least one targeted exploration pass.
31
+
32
+ Two kinds of unknowns, treated differently:
33
+ - **Discoverable facts** — repo/system truth: file locations, current behavior, existing patterns, types, configs. You MUST explore first (`find`, `search`, `read`, parallel explore subagents). You NEVER ask what the codebase can answer (e.g. "where is this defined?"). Ask only when several plausible candidates remain or a required identifier is genuinely absent — and then present the candidates with a recommendation.
34
+ - **Preferences and tradeoffs** — intent, UX, scope boundaries, performance-vs-simplicity: not derivable from code. You MUST surface these early via `{{askToolName}}` with 2–4 mutually exclusive options and a recommended default. If left unanswered, proceed with the default and record it under Assumptions.
35
+
36
+ Every question MUST materially change the plan, confirm a load-bearing assumption, or choose between real tradeoffs. You MUST batch questions. You NEVER ask filler questions or offer obviously-wrong options.
32
37
 
33
38
  {{#if reentry}}
34
39
  ## Re-entry
35
40
 
36
41
  <procedure>
37
- 1. Read existing plan
38
- 2. Evaluate request against it
42
+ 1. Read the existing plan.
43
+ 2. Evaluate the new request against it.
39
44
  3. Decide:
40
- - **Different task** → Overwrite plan
41
- - **Same task, continuing** → Update and clean outdated sections
42
- 4. Call `resolve` with `action: "apply"` and `extra: { title }` when complete
45
+ - **Different task** → overwrite the plan.
46
+ - **Same task, continuing** → update and delete outdated sections.
47
+ 4. Call `resolve` with `action: "apply"` and `extra: { title }` when complete.
43
48
  </procedure>
44
49
  {{/if}}
45
50
 
46
51
  {{#if iterative}}
47
- ## Iterative Planning
52
+ ## Workflow — Iterative
48
53
 
49
54
  <procedure>
50
55
  ### 1. Explore
51
- You MUST use `find`, `search`, `read` to understand the codebase.
56
+ You MUST use `find`, `search`, `read` to ground yourself in the actual code. Hunt for existing functions, utilities, and conventions to reuse before proposing anything new.
52
57
 
53
58
  ### 2. Interview
54
- You MUST use `{{askToolName}}` to clarify:
55
- - Ambiguous requirements
56
- - Technical decisions and tradeoffs
57
- - Preferences: UI/UX, performance, edge cases
59
+ You MUST use `{{askToolName}}` to resolve preferences and tradeoffs (see Resolving Unknowns). Batch questions; never ask what exploration answers.
58
60
 
59
- You MUST batch questions. You NEVER ask what you can answer by exploring.
60
-
61
- ### 3. Update Incrementally
62
- You MUST use `{{editToolName}}` to update plan file as you learn; NEVER wait until end.
61
+ ### 3. Update incrementally
62
+ You MUST use `{{editToolName}}` to revise the plan file as you learn.
63
63
 
64
64
  ### 4. Calibrate
65
- - Large unspecified task → multiple interview rounds
66
- - Smaller task → fewer or no questions
65
+ - Large, unspecified task → multiple interview rounds.
66
+ - Small, well-specified task → few or no questions.
67
67
  </procedure>
68
-
69
- <caution>
70
- ### Plan Structure
71
-
72
- You MUST use clear markdown headers; include:
73
- - Recommended approach (not alternatives)
74
- - Paths of critical files to modify
75
- - Verification: how to test end-to-end
76
-
77
- The plan MUST be scannable yet detailed enough to execute.
78
- </caution>
79
-
80
68
  {{else}}
81
- ## Planning Workflow
69
+ ## Workflow — Parallel
82
70
 
83
71
  <procedure>
84
- ### Phase 1: Understand
85
- You MUST focus on the request and associated code. You SHOULD launch parallel explore agents when scope spans multiple areas.
72
+ ### Phase 1 Understand
73
+ You MUST focus on the request and the code behind it. You SHOULD launch parallel `explore` subagents (via `task`) when scope spans multiple areas — give each a distinct focus (existing implementations, related components, test patterns). Actively hunt for reusable functions, utilities, and conventions; avoid proposing new code when a suitable implementation already exists.
86
74
 
87
- ### Phase 2: Design
88
- You MUST draft an approach based on exploration. You MUST consider trade-offs briefly, then choose.
75
+ ### Phase 2 Design
76
+ You MUST draft an approach from your exploration, weigh trade-offs briefly, then commit to one. For large or cross-cutting changes you MAY spawn a planning/critique subagent to pressure-test the approach before you commit.
89
77
 
90
- ### Phase 3: Review
91
- You MUST read critical files. You MUST verify plan matches original request. You SHOULD use `{{askToolName}}` to clarify remaining questions.
78
+ ### Phase 3 Review
79
+ You MUST read the critical files you intend to touch to confirm the approach holds against the real code. You MUST verify the plan still matches the original request. You SHOULD use `{{askToolName}}` to close remaining preference questions.
92
80
 
93
- ### Phase 4: Update Plan
94
- You MUST update `{{planFilePath}}` (`{{editToolName}}` for changes, `{{writeToolName}}` only if creating from scratch):
95
- - Recommended approach only
96
- - Paths of critical files to modify
97
- - Verification section
81
+ ### Phase 4 Write the plan
82
+ You MUST write the plan file (see **Plan File** above) per **The Plan** below.
98
83
  </procedure>
84
+ {{/if}}
85
+
86
+ ## The Plan
87
+
88
+ The plan MUST be self-contained: approval may clear or compact this conversation, so the file alone must carry everything needed to execute.
99
89
 
100
90
  <caution>
101
- You MUST ask questions throughout. You NEVER make large assumptions about user intent.
91
+ Write 3–5 short, scannable markdown sections. The usual shape:
92
+ - **Context** — why this change: the problem or need, what prompted it, the intended outcome. 2–4 sentences.
93
+ - **Approach** — the recommended approach only. Group bullets by subsystem or behavior, NOT file-by-file. Name existing functions/utilities to reuse, with their paths. Describe a repeated pattern once with a few representative paths — you NEVER enumerate every file or line.
94
+ - **Critical files** — the ≤5 files that disambiguate non-obvious changes, each with a one-line reason. Skip files whose change is already obvious from the Approach.
95
+ - **Verification** — how to test end-to-end: exact commands, tests to run or add, manual steps.
96
+ - **Assumptions** — only the decisions you made that the user might want to override.
97
+
98
+ Prefer the minimum detail needed for safe implementation, not exhaustive coverage. Compress related changes into high-signal bullets; omit branch-by-branch logic, restated invariants, and lists of unaffected behavior. Behavior-level descriptions beat symbol-by-symbol removal lists.
102
99
  </caution>
103
- {{/if}}
104
100
 
105
101
  <directives>
106
- - You MUST use `{{askToolName}}` only for clarifying requirements or choosing approaches
102
+ - You NEVER include sections that decide nothing: Non-Goals, Out of Scope, Alternatives Considered, Risks/Mitigations boilerplate, Future Work. Omit them entirely.
103
+ - You NEVER invent schema, validation, precedence, or fallback policy the request did not establish, unless it is required to prevent a concrete implementation mistake.
104
+ - You NEVER present alternatives in the final plan — choose. Record a discarded option only when it is a live tradeoff the user should confirm, and put it under Assumptions.
107
105
  </directives>
108
106
 
107
+ <caution>
108
+ The approval selector offers:
109
+ - **Approve and execute** — execution starts in fresh context (session cleared).
110
+ - **Approve and compact context** — distills this discussion into a summary, then executes in this session.
111
+ - **Approve and keep context** — executes in this session, preserving exploration history.
112
+
113
+ All three rely on the plan file being self-contained.
114
+ </caution>
115
+
109
116
  <critical>
117
+ You MUST use `{{askToolName}}` only to clarify requirements or choose between approaches.
118
+
110
119
  Your turn ends ONLY by:
111
120
  1. Using `{{askToolName}}` to gather information, OR
112
- 2. Calling `resolve` with `action: "apply"`, `reason`, and `extra: { title: "<PLAN_TITLE>" }` when ready — this triggers user approval, then implementation with full tool access
121
+ 2. Calling `resolve` with `action: "apply"`, `reason`, and `extra: { title: "<slug>" }` (the slug of your `local://<slug>-plan.md`) when ready — this triggers user approval, then implementation with full tool access.
113
122
 
114
- You NEVER ask plan approval via text or `{{askToolName}}`; you MUST use `resolve`.
115
- You MUST keep going until complete.
123
+ You NEVER ask for plan approval via text or `{{askToolName}}`; you MUST use `resolve`.
124
+ You MUST keep going until the plan is decision-complete.
116
125
  </critical>
@@ -16,7 +16,7 @@ The plan path is for subagent handoff only. You already have the plan; NEVER rea
16
16
 
17
17
  The full plan is injected below. You MUST execute it now:
18
18
 
19
- <plan path="{{finalPlanFilePath}}">
19
+ <plan path="{{planFilePath}}">
20
20
  {{planContent}}
21
21
  </plan>
22
22
 
@@ -1,5 +1,5 @@
1
1
  <system-notice>
2
- The user's message above contains the **workflow** keyword: drive this task as a deterministic multi-subagent workflow. Author the orchestration as Python in the `eval` tool and fan out subagents — to be comprehensive (decompose and cover in parallel), to be confident (independent perspectives and adversarial checks before you commit), or to take on scale one context can't hold (audits, migrations, broad sweeps). This overrides any default tendency to do the whole task inline when fanning out would be more thorough.
2
+ The user's message above contains the **workflowz** keyword: drive this task as a deterministic multi-subagent workflow. Author the orchestration as Python in the `eval` tool and fan out subagents — to be comprehensive (decompose and cover in parallel), to be confident (independent perspectives and adversarial checks before you commit), or to take on scale one context can't hold (audits, migrations, broad sweeps). This overrides any default tendency to do the whole task inline when fanning out would be more thorough.
3
3
 
4
4
  <when>
5
5
  Worth it when the task benefits from decomposition + parallel coverage, or from independent/adversarial cross-checking before you commit. For a quick lookup or single edit, just do it directly — don't spin up agents. Scout inline FIRST (list the files, scope the diff, find the call sites) to discover the work-list, then fan out over it — you don't need to know the shape before the *task*, only before the *fan-out*. Common shapes, each a well-scoped `eval` call you can chain across turns:
@@ -31,6 +31,15 @@ Executes bash command in shell session for terminal operations like git, bun, ca
31
31
  - `async: true` only defers **reporting** of the result — it does NOT disable, extend, or detach the timeout. A daemon started with `async: true` is still killed when `timeout` elapses, regardless of how long the agent waits before reading the result.
32
32
  - For long-running daemons (dev servers, watchers): either pass an explicit large `timeout` (up to `3600`), or fully detach the process from this shell using `nohup … &` / `setsid … &` / `disown` so it survives independent of the bash call's lifecycle.
33
33
  {{/if}}
34
+ {{#if autoBackgroundEnabled}}
35
+
36
+ ## Auto-background
37
+
38
+ - A foreground (non-`async`) call that has not completed within **{{autoBackgroundThresholdSeconds}}s** is automatically converted into a background job and returns a `Background job <id> started: …` notice with the buffered output so far. The command keeps running; the final result is delivered as a follow-up tool call when it completes.
39
+ - This is NOT a failure or a re-queue. Treat the notice as "still running, will report back" — do not retry the same command, and do not wait synchronously for it.
40
+ - Auto-backgrounding does NOT extend `timeout`: the job is still killed at the original deadline.
41
+ - If you need the result inline (e.g. piping into another command), raise `timeout` above the expected duration so it finishes before the threshold matters{{#if asyncEnabled}}, or set `async: true` up front so the contract is explicit{{/if}}.
42
+ {{/if}}
34
43
 
35
44
  # Output minimizer
36
45
 
@@ -26,7 +26,7 @@ Drives real Chromium tab; full puppeteer access via JS execution.
26
26
  - `tab.waitForResponse(pattern, { timeout? })` — pattern substring, `RegExp`, or `(response) => boolean`. Returns raw puppeteer `HTTPResponse` (call `.text()` / `.json()` / `.status()` / `.headers()` on it).
27
27
  - `tab.evaluate(fn, …args)` — sugar for `page.evaluate` with abort signal already wired. Use this instead of dropping to `page.evaluate` for ad-hoc DOM reads.
28
28
  - `tab.screenshot({ selector?, fullPage?, save?, silent? })` — captures screenshot and **auto-attaches to tool output for you to view** (unless `silent: true`). `save` is **strictly optional**: OMIT when you just want to look at page — downscaled image shown regardless, full-res capture written to temp file automatically. Pass `save` (a path) ONLY when deliberately need to keep full-res copy on disk for later use; `browser.screenshotDir` does same for every shot. NEVER invent `save` path for throwaway/temporal screenshot.
29
- - `tab.extract(format = "markdown")` — Readability-extracted page content.
29
+ - `tab.extract(format = "markdown")` — returns Readability-extracted page content as a string (`"markdown"` or `"text"`). Throws if the page yields no readable content.
30
30
  - Selectors accept CSS plus puppeteer query handlers: `aria/Sign in`, `text/Continue`, `xpath/…`, `pierce/…`. Playwright-style `p-aria/[name="…"]`, `p-text/…` normalized.
31
31
  - Default `tab.observe()` over `tab.screenshot()` for page state. Screenshot only when visual appearance matters.
32
32
  </instruction>
@@ -46,8 +46,9 @@ tool.<name>(args) → unknown
46
46
  Invoke any session tool by name. `args` is the tool's parameter object.
47
47
  llm(prompt, model?="default", system?=None, schema?=None) → str | dict
48
48
  Oneshot, stateless LLM call (no history, no tools). `model` picks a tier: "smol" (fast), "default" (this session's model), "slow" (most capable). Pass `system` for a system prompt. Pass a JSON-Schema `schema` to force structured output and get the parsed object back; otherwise returns the completion text.
49
- agent(prompt, agent_type?="task", model?=None, context?=None, label?=None, schema?=None) → str | dict
49
+ {{#if spawns}}agent(prompt, agent_type?="task", model?=None, context?=None, label?=None, schema?=None) → str | dict
50
50
  Run a subagent and return its final output. Defaults to the bundled "task" agent; pass `agent_type`/`agentType` for another discovered agent. Pass a JSON-Schema `schema` to force structured output and get the parsed object back.
51
+ {{/if}}
51
52
  parallel(thunks) → list
52
53
  Run thunks (callables) through a bounded pool, preserving input order. The pool is as wide as a `task` tool batch (tracks the `task.maxConcurrency` setting), so fan out as wide as the work divides — don't pre-shrink it. Barrier: returns once all finish; a thunk that throws propagates.
53
54
  pipeline(items, ...stages) → list
@@ -18,8 +18,8 @@ Append `:<sel>` to `path`. The bare path falls back to the default mode.
18
18
  - `:50` / `:50-` — read from line 50 onward.
19
19
  - `:50-200` — lines 50–200 inclusive.
20
20
  - `:50+150` — 150 lines starting at line 50.
21
- - `:20+1` — exactly one line.
22
- - `:5-16,960-973` — multiple ranges in one call (sorted, overlaps merged).
21
+ - `:20+1` — anchor on line 20 (single-range reads expand by ≤1 leading and ≤3 trailing context lines).
22
+ - `:5-16,960-973` — multiple ranges in one call (sorted, overlaps merged). Multi-range mode returns exact bounds with no context padding.
23
23
  - `:raw` — verbatim text; no anchors, no summary, no line prefixes.
24
24
  - `:2-4:raw` or `:raw:2-4` — range AND verbatim; the two compose in either order.
25
25
  - `:conflicts` — one-line-per-block index of every unresolved git merge conflict.
package/src/sdk.ts CHANGED
@@ -10,7 +10,6 @@ import {
10
10
  } from "@oh-my-pi/pi-agent-core";
11
11
  import {
12
12
  type CredentialDisabledEvent,
13
- isUsageLimitError,
14
13
  type Message,
15
14
  type Model,
16
15
  type SimpleStreamOptions,
@@ -24,7 +23,6 @@ import type { Component } from "@oh-my-pi/pi-tui";
24
23
  import {
25
24
  $env,
26
25
  $flag,
27
- extractRetryHint,
28
26
  getAgentDbPath,
29
27
  getAgentDir,
30
28
  getAuthBrokerSnapshotCachePath,
@@ -39,10 +37,13 @@ import { type AsyncJob, AsyncJobManager, isBackgroundJobSupportEnabled } from ".
39
37
  import { loadCapability } from "./capability";
40
38
  import { type Rule, ruleCapability, setActiveRules } from "./capability/rule";
41
39
  import { bucketRules } from "./capability/rule-buckets";
40
+ import { createApiKeyResolver } from "./config/api-key-resolver";
42
41
  import { shouldEnableAppendOnlyContext } from "./config/append-only-context-mode";
43
42
  import { ModelRegistry } from "./config/model-registry";
44
43
  import {
44
+ defaultModelPerProvider,
45
45
  formatModelString,
46
+ getModelMatchPreferences,
46
47
  parseModelPattern,
47
48
  parseModelString,
48
49
  resolveAllowedModels,
@@ -280,6 +281,8 @@ export interface CreateAgentSessionOptions {
280
281
  /** Optional provider-facing session identifier for prompt caches and sticky auth selection.
281
282
  * Keeps persisted session files isolated while reusing provider-side caches. */
282
283
  providerSessionId?: string;
284
+ /** Optional provider-facing prompt cache key, distinct from request lineage. */
285
+ providerPromptCacheKey?: string;
283
286
 
284
287
  /** Custom tools to register (in addition to built-in tools). Accepts both CustomTool and ToolDefinition. */
285
288
  customTools?: (CustomTool | ToolDefinition)[];
@@ -708,6 +711,7 @@ function customToolToDefinition(tool: CustomTool): ToolDefinition {
708
711
  parameters: tool.parameters,
709
712
  hidden: tool.hidden,
710
713
  deferrable: tool.deferrable,
714
+ approval: typeof tool.approval === "function" ? tool.approval.bind(tool) : tool.approval,
711
715
  mcpServerName: tool.mcpServerName,
712
716
  mcpToolName: tool.mcpToolName,
713
717
  execute: (toolCallId, params, signal, onUpdate, ctx) =>
@@ -1029,9 +1033,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1029
1033
  const hasServiceTierEntry = existingBranch.some(entry => entry.type === "service_tier_change");
1030
1034
 
1031
1035
  const hasExplicitModel = options.model !== undefined || options.modelPattern !== undefined;
1032
- const modelMatchPreferences = {
1033
- usageOrder: settings.getStorage()?.getModelUsageOrder(),
1034
- };
1036
+ const modelMatchPreferences = getModelMatchPreferences(settings);
1035
1037
  const allowedModels = await logger.time("resolveAllowedModels", () =>
1036
1038
  resolveAllowedModels(modelRegistry, settings, modelMatchPreferences),
1037
1039
  );
@@ -1552,9 +1554,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1552
1554
  // Resolve deferred --model pattern now that extension models are registered.
1553
1555
  if (!model && options.modelPattern) {
1554
1556
  const availableModels = modelRegistry.getAll();
1555
- const matchPreferences = {
1556
- usageOrder: settings.getStorage()?.getModelUsageOrder(),
1557
- };
1557
+ const matchPreferences = getModelMatchPreferences(settings);
1558
1558
  const { model: resolved } = parseModelPattern(options.modelPattern, availableModels, matchPreferences, {
1559
1559
  modelRegistry,
1560
1560
  });
@@ -1573,12 +1573,30 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1573
1573
  // Re-resolve the allowed set: extension factories above may have
1574
1574
  // registered providers/models that weren't visible at startup.
1575
1575
  const fallbackCandidates = await resolveAllowedModels(modelRegistry, settings, modelMatchPreferences);
1576
- for (const candidate of fallbackCandidates) {
1577
- if (await hasModelApiKey(candidate)) {
1578
- model = candidate;
1576
+ // Prefer each provider's configured default model
1577
+ // (DEFAULT_MODEL_PER_PROVIDER) over raw catalog order. Without this the
1578
+ // first-run fallback picks whatever model sorts first in models.json for
1579
+ // the winning provider (e.g. anthropic's claude-3-5-sonnet-20240620)
1580
+ // instead of the intended provider default (claude-sonnet-4-6). Mirrors
1581
+ // findInitialModel's precedence.
1582
+ for (const [provider, defaultId] of Object.entries(defaultModelPerProvider)) {
1583
+ const preferred = fallbackCandidates.find(
1584
+ candidate => candidate.provider === provider && candidate.id === defaultId,
1585
+ );
1586
+ if (preferred && (await hasModelApiKey(preferred))) {
1587
+ model = preferred;
1579
1588
  break;
1580
1589
  }
1581
1590
  }
1591
+ // Otherwise, first available model with a valid API key.
1592
+ if (!model) {
1593
+ for (const candidate of fallbackCandidates) {
1594
+ if (await hasModelApiKey(candidate)) {
1595
+ model = candidate;
1596
+ break;
1597
+ }
1598
+ }
1599
+ }
1582
1600
  if (model) {
1583
1601
  if (modelFallbackMessage) {
1584
1602
  modelFallbackMessage += `. Using ${model.provider}/${model.id}`;
@@ -2001,6 +2019,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
2001
2019
  onPayload,
2002
2020
  onResponse,
2003
2021
  sessionId: providerSessionId,
2022
+ promptCacheKey: options.providerPromptCacheKey,
2004
2023
  transformContext,
2005
2024
  steeringMode: settings.get("steeringMode") ?? "one-at-a-time",
2006
2025
  followUpMode: settings.get("followUpMode") ?? "one-at-a-time",
@@ -2017,9 +2036,15 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
2017
2036
  kimiApiFormat: settings.get("providers.kimiApiFormat") ?? "anthropic",
2018
2037
  preferWebsockets: preferOpenAICodexWebsockets,
2019
2038
  getToolContext: tc => toolContextStore.getContext(tc),
2020
- getApiKey: async provider => {
2039
+ getApiKey: async (provider, ctx) => {
2021
2040
  // Read agent.sessionId at call time so credential selection stays aligned
2022
2041
  // with metadataResolver after /new, fork, resume, or branch switches.
2042
+ // Retry steps (ctx carries an auth error) drive the central a/b/c
2043
+ // policy — force-refresh the same account, then rotate to a sibling —
2044
+ // and may legitimately yield no key when every account is exhausted.
2045
+ if (ctx?.error !== undefined) {
2046
+ return createApiKeyResolver(modelRegistry, provider, { sessionId: agent.sessionId })(ctx);
2047
+ }
2023
2048
  const key = await modelRegistry.getApiKeyForProvider(provider, agent.sessionId);
2024
2049
  if (!key) {
2025
2050
  throw new Error(`No API key found for provider "${provider}"`);
@@ -2033,40 +2058,6 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
2033
2058
  return streamSimple(streamModel, context, {
2034
2059
  ...streamOptions,
2035
2060
  openrouterVariant: streamOptions?.openrouterVariant ?? openrouterVariant,
2036
- onAuthError: async (provider, oldKey, error) => {
2037
- const message = error instanceof Error ? error.message : String(error);
2038
- // streamSimple invokes this for both 401 auth failures AND
2039
- // rotatable usage-limit errors (Codex usage_limit_reached,
2040
- // Anthropic usage_limit_reached, etc.). The two need
2041
- // different storage actions: a real 401 means the credential
2042
- // is bad and should be marked suspect; a usage limit just
2043
- // means this account is parked until reset and should be
2044
- // temporarily blocked so a sibling can pick the request up.
2045
- if (isUsageLimitError(message)) {
2046
- const retryAfterMs = extractRetryHint(undefined, message);
2047
- const switched = await modelRegistry.authStorage.markUsageLimitReached(provider, agent.sessionId, {
2048
- retryAfterMs,
2049
- signal: streamOptions?.signal,
2050
- });
2051
- logger.debug("Retrying provider request after usage-limit block", {
2052
- provider,
2053
- switched,
2054
- retryAfterMs,
2055
- error: message,
2056
- });
2057
- if (!switched) return undefined;
2058
- return modelRegistry.getApiKeyForProvider(provider, agent.sessionId);
2059
- }
2060
- await modelRegistry.authStorage.invalidateCredentialMatching(provider, oldKey, {
2061
- signal: streamOptions?.signal,
2062
- sessionId: agent.sessionId,
2063
- });
2064
- logger.debug("Retrying provider request after credential invalidation", {
2065
- provider,
2066
- error: message,
2067
- });
2068
- return modelRegistry.getApiKeyForProvider(provider, agent.sessionId);
2069
- },
2070
2061
  });
2071
2062
  },
2072
2063
  cursorExecHandlers,