npm - @oh-my-pi/pi-coding-agent - Versions diffs - 13.14.0 → 13.15.2 - Mend

@oh-my-pi/pi-coding-agent 13.14.0 → 13.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/CHANGELOG.md +140 -0
package/package.json +10 -8
package/src/autoresearch/command-initialize.md +34 -0
package/src/autoresearch/command-resume.md +17 -0
package/src/autoresearch/contract.ts +332 -0
package/src/autoresearch/dashboard.ts +447 -0
package/src/autoresearch/git.ts +243 -0
package/src/autoresearch/helpers.ts +458 -0
package/src/autoresearch/index.ts +693 -0
package/src/autoresearch/prompt.md +227 -0
package/src/autoresearch/resume-message.md +16 -0
package/src/autoresearch/state.ts +386 -0
package/src/autoresearch/tools/init-experiment.ts +310 -0
package/src/autoresearch/tools/log-experiment.ts +833 -0
package/src/autoresearch/tools/run-experiment.ts +640 -0
package/src/autoresearch/types.ts +218 -0
package/src/cli/args.ts +8 -2
package/src/cli/initial-message.ts +58 -0
package/src/config/keybindings.ts +417 -212
package/src/config/model-registry.ts +1 -0
package/src/config/model-resolver.ts +57 -9
package/src/config/settings-schema.ts +38 -10
package/src/config/settings.ts +1 -4
package/src/exec/bash-executor.ts +7 -5
package/src/export/html/template.css +43 -13
package/src/export/html/template.generated.ts +1 -1
package/src/export/html/template.html +1 -0
package/src/export/html/template.js +107 -0
package/src/extensibility/extensions/types.ts +31 -8
package/src/internal-urls/docs-index.generated.ts +1 -1
package/src/lsp/index.ts +1 -1
package/src/main.ts +44 -44
package/src/mcp/oauth-discovery.ts +1 -1
package/src/modes/acp/acp-agent.ts +957 -0
package/src/modes/acp/acp-event-mapper.ts +531 -0
package/src/modes/acp/acp-mode.ts +13 -0
package/src/modes/acp/index.ts +2 -0
package/src/modes/components/agent-dashboard.ts +5 -4
package/src/modes/components/bash-execution.ts +40 -11
package/src/modes/components/custom-editor.ts +47 -47
package/src/modes/components/extensions/extension-dashboard.ts +2 -1
package/src/modes/components/history-search.ts +2 -1
package/src/modes/components/hook-editor.ts +2 -1
package/src/modes/components/hook-input.ts +8 -7
package/src/modes/components/hook-selector.ts +15 -10
package/src/modes/components/keybinding-hints.ts +9 -9
package/src/modes/components/login-dialog.ts +3 -3
package/src/modes/components/mcp-add-wizard.ts +2 -1
package/src/modes/components/model-selector.ts +14 -3
package/src/modes/components/oauth-selector.ts +2 -1
package/src/modes/components/python-execution.ts +2 -3
package/src/modes/components/session-selector.ts +2 -1
package/src/modes/components/settings-selector.ts +2 -1
package/src/modes/components/status-line-segment-editor.ts +2 -1
package/src/modes/components/tool-execution.ts +4 -5
package/src/modes/components/tree-selector.ts +3 -2
package/src/modes/components/user-message-selector.ts +3 -8
package/src/modes/components/user-message.ts +16 -0
package/src/modes/controllers/command-controller.ts +0 -2
package/src/modes/controllers/extension-ui-controller.ts +89 -4
package/src/modes/controllers/input-controller.ts +29 -23
package/src/modes/controllers/mcp-command-controller.ts +1 -1
package/src/modes/index.ts +1 -0
package/src/modes/interactive-mode.ts +17 -5
package/src/modes/print-mode.ts +1 -1
package/src/modes/prompt-action-autocomplete.ts +7 -7
package/src/modes/rpc/rpc-mode.ts +7 -2
package/src/modes/rpc/rpc-types.ts +1 -0
package/src/modes/theme/theme.ts +53 -44
package/src/modes/types.ts +9 -2
package/src/modes/utils/hotkeys-markdown.ts +19 -19
package/src/modes/utils/keybinding-matchers.ts +21 -0
package/src/modes/utils/ui-helpers.ts +1 -1
package/src/patch/hashline.ts +139 -127
package/src/patch/index.ts +77 -59
package/src/patch/shared.ts +19 -11
package/src/prompts/tools/hashline.md +43 -116
package/src/sdk.ts +34 -17
package/src/session/agent-session.ts +123 -30
package/src/session/session-manager.ts +32 -31
package/src/session/streaming-output.ts +87 -37
package/src/tools/ask.ts +56 -30
package/src/tools/bash-interactive.ts +2 -6
package/src/tools/bash-interceptor.ts +1 -39
package/src/tools/bash-skill-urls.ts +1 -1
package/src/tools/browser.ts +1 -1
package/src/tools/gemini-image.ts +1 -1
package/src/tools/python.ts +2 -2
package/src/tools/resolve.ts +1 -1
package/src/utils/child-process.ts +88 -0

package/src/patch/shared.ts CHANGED Viewed

@@ -157,20 +157,28 @@ function formatStreamingHashlineEdits(edits: Partial<HashlineToolEdit>[], uiThem
 			return { srcLabel: "• (incomplete edit)", dst: "" };
 		}
-		const contentLines = Array.isArray(edit.lines) ? (edit.lines as string[]).join("\n") : "";
+		const contentLines = Array.isArray(edit.content) ? (edit.content as string[]).join("\n") : "";
+		const loc = edit.loc;
-		const op = typeof edit.op === "string" ? edit.op : "?";
-		const pos = typeof edit.pos === "string" ? edit.pos : undefined;
-		const end = typeof edit.end === "string" ? edit.end : undefined;
-		if (pos && end && pos !== end) {
-			return { srcLabel: `• ${op} ${pos}…${end}`, dst: contentLines };
+		if (loc === "append" || loc === "prepend") {
+			return { srcLabel: `• ${loc} (file-level)`, dst: contentLines };
 		}
-		const anchor = pos ?? end;
-		if (anchor) {
-			return { srcLabel: `\u2022 ${op} ${anchor}`, dst: contentLines };
+		if (typeof loc === "object" && loc) {
+			if ("block" in loc && typeof loc.block === "object" && loc.block) {
+				const rb = loc.block as { pos?: string; end?: string };
+				return { srcLabel: `• block ${rb.pos ?? "?"}…${rb.end ?? "?"}`, dst: contentLines };
+			}
+			if ("line" in loc) {
+				return { srcLabel: `• line ${(loc as { line: string }).line}`, dst: contentLines };
+			}
+			if ("append" in loc) {
+				return { srcLabel: `• append ${(loc as { append: string }).append}`, dst: contentLines };
+			}
+			if ("prepend" in loc) {
+				return { srcLabel: `• prepend ${(loc as { prepend: string }).prepend}`, dst: contentLines };
+			}
 		}
-		return { srcLabel: `\u2022 ${op} (file-level)`, dst: contentLines };
+		return { srcLabel: "• (unknown edit)", dst: contentLines };
 	}
 }
 function formatMetadataLine(lineCount: number | null, language: string | undefined, uiTheme: Theme): string {

package/src/prompts/tools/hashline.md CHANGED Viewed

@@ -1,25 +1,25 @@
-Applies precise, surgical file edits by referencing `LINE#ID` tags from `read` output. Each tag uniquely identifies a line, so edits remain stable even when lines shift.
+Applies precise file edits using `LINE#ID` anchors from `read` output.
-Read the file first to get fresh tags. Submit one `edit` call per file with all operations batched — tags shift after each edit, so multiple calls require re-reading between them.
+Read the file first. Copy anchors exactly from the latest `read` output. In one `edit` call, batch all edits for one file. After any successful edit, re-read before editing that file again.
+This matters: your output is checked against the real file state. Invalid anchors, duplicated boundary lines, or semantically equivalent rewrites will fail.
 <operations>
-**`path`** — the path to the file to edit.
-**`move`** — if set, move the file to the given path.
-**`delete`** — if true, delete the file.
+**Top level**
+- `path` — file path
+- `move` — optional rename target
+- `delete` — optional whole-file delete
+- `edits` — array of `{ loc, content }` entries
+**Edit entry**: `{ loc, content }`
+- `loc` — where to apply the edit (see below)
+- `content` — replacement/inserted lines (array of strings preferred, `null` to delete)
-**`edits[n].pos`** — the anchor line. Meaning depends on `op`:
-  - if `replace`: first line to rewrite
-  - if `prepend`: line to insert new lines **before**; omit for beginning of file
-  - if `append`: line to insert new lines **after**; omit for end of file
-**`edits[n].end`** — range replace only. The last line of the range (inclusive). Omit for single-line replace.
-**`edits[n].lines`** — the replacement content:
-  - for `replace`: the exact lines that will replace `[pos, end??pos]` inclusively (or the single `pos` line when `end` is omitted)
-  - for `prepend`/`append`: the new lines to insert
-  - `[""]` — blank line
-  - `null` or `[]` — delete if replace
-- If `lines` contains content that already exists after `end`, those lines **will be duplicated** in the output.
-- Keep `lines` to exactly what belongs inside the consumed range.
-- Ops are applied bottom-up. Tags **MUST** be referenced from the most recent `read` output.
+**`loc` values**
+- `"append"` / `"prepend"` — insert at end/start of file
+- `{ append: "N#ID" }` / `{ prepend: "N#ID" }` — insert after/before anchored line
+- `{ line: "N#ID" }` — replace exactly one anchored line
+- `{ block: { pos: "N#ID", end: "N#ID" } }` — replace inclusive `pos..end`
 </operations>
 <examples>
@@ -45,58 +45,14 @@ All examples below reference the same file, `util.ts`:
 {{hlinefull 18 "}"}}
 ```
-<example name="single-line replace">
-Change the timeout from `5000` to `30_000`:
+<example name="replace a block body">
+Replace only the catch body. Do not target the shared boundary line `} catch (err) {`.
 ```
 {
   path: "util.ts",
   edits: [{
-    op: "replace",
-    pos: {{hlineref 2 "const timeout = 5000;"}},
-    lines: ["const timeout = 30_000;"]
-  }]
-}
-```
-</example>
-<example name="delete lines">
-Single line — `lines: null` deletes entirely:
-```
-{
-  path: "util.ts",
-  edits: [{
-    op: "replace",
-    pos: {{hlineref 1 "// @ts-ignore"}},
-    lines: null
-  }]
-}
-```
-Range — remove the legacy block (lines 10–11):
-```
-{
-  path: "util.ts",
-  edits: [{
-    op: "replace",
-    pos: {{hlineref 10 "\t// TODO: remove after migration"}},
-    end: {{hlineref 11 "\tlegacy();"}},
-    lines: null
-  }]
-}
-```
-</example>
-<example name="rewrite a block body — shape (a)">
-Replace the catch body with smarter error handling. Shape (a): `pos` is the first body line, `end` is the last body line. The catch header (line 14) and its closer (line 17) are outside the range and stay untouched.
-When changing body content, replace the **entire** body span — not just one line inside it. Patching one line leaves the rest of the body stale.
-```
-{
-  path: "util.ts",
-  edits: [{
-    op: "replace",
-    pos: {{hlineref 15 "\t\tconsole.error(err);"}},
-    end: {{hlineref 16 "\t\treturn null;"}},
-    lines: [
+    loc: { block: { pos: {{hlineref 15 "\t\tconsole.error(err);"}}, end: {{hlineref 16 "\t\treturn null;"}} } },
+    content: [
       "\t\tif (isEnoent(err)) return null;",
       "\t\tthrow err;"
     ]
@@ -105,62 +61,38 @@ When changing body content, replace the **entire** body span — not just one li
 ```
 </example>
-<example name="replace whole block — shape (b)">
-Simplify `beta()` to a one-liner. Shape (b): `pos`=header, `end`=closer, re-emit all in `lines`.
-Bad — `end` stops at the inner `\t}` on line 17, so the outer `}` on line 18 survives. Result: two consecutive `}` lines.
+<example name="replace one line">
 ```
 {
   path: "util.ts",
   edits: [{
-    op: "replace",
-    pos: {{hlineref 9 "function beta() {"}},
-    end: {{hlineref 17 "\t}"}},
-    lines: [
-      "function beta() {",
-      "\treturn parse(data);",
-      "}"
-    ]
+    loc: { line: {{hlineref 2 "const timeout = 5000;"}} },
+    content: ["const timeout = 30_000;"]
   }]
 }
 ```
-Good — `end` includes the function's own `}` on line 18, so the old closer is consumed:
+</example>
+<example name="delete a range">
 ```
 {
   path: "util.ts",
   edits: [{
-    op: "replace",
-    pos: {{hlineref 9 "function beta() {"}},
-    end: {{hlineref 18 "}"}},
-    lines: [
-      "function beta() {",
-      "\treturn parse(data);",
-      "}"
-    ]
+    loc: { block: { pos: {{hlineref 10 "\t// TODO: remove after migration"}}, end: {{hlineref 11 "\tlegacy();"}} } },
+    content: null
   }]
 }
 ```
 </example>
-<example name="avoid shared boundary lines">
-Do not anchor `replace` on a mixed boundary line such as `} catch (err) {`, `} else {`, `}),`, or `},{`. Those lines belong to two adjacent structures at once.
-Bad — if you need to change code on both sides of that line, replacing just the boundary span will usually leave one side's syntax behind.
-Good — choose one of two safe shapes instead:
-- move inward and replace only body-owned lines
-- expand outward and replace one whole owned block, consuming its real closer/separator too
-</example>
-<example name="insert between sibling declarations">
-Add a `gamma()` function between `alpha()` and `beta()`. Use `prepend` on the next declaration — not `append` on the previous block's closing brace — so the anchor is a stable declaration boundary.
+<example name="insert before sibling">
+When adding a sibling declaration, prefer `prepend` on the next declaration.
 ```
 {
   path: "util.ts",
   edits: [{
-    op: "prepend",
-    pos: {{hlineref 9 "function beta() {"}},
-    lines: [
+    loc: { prepend: {{hlineref 9 "function beta() {"}} },
+    content: [
       "function gamma() {",
       "\tvalidate();",
       "}",
@@ -169,22 +101,17 @@ Add a `gamma()` function between `alpha()` and `beta()`. Use `prepend` on the ne
   }]
 }
 ```
-Use a trailing `""` to preserve the blank line between sibling declarations.
 </example>
 </examples>
 <critical>
-- You **MUST NOT** use this tool to reformat, reindent, or adjust whitespace — run the project's formatter instead.
-- Every tag **MUST** be copied exactly from your most recent `read` output as `N#ID`. Stale or mistyped tags cause mismatches.
-- Edit payload: `{ path, edits[] }`. Each entry: `op`, `lines`, optional `pos`/`end`. No extra keys.
-- For `append`/`prepend`, `lines` **MUST** contain only the newly introduced content. Do not re-emit surrounding content, or terminators that already exist.
-- When changing existing code near a block tail or closing delimiter, default to `replace` over the owned span instead of inserting around the boundary.
-- When adding a sibling declaration, default to `prepend` on the next sibling declaration instead of `append` on the previous block's closing brace.
-- **Block boundaries travel together.** For a block `{ header / body / closer }`, there are exactly two valid replace shapes: (a) replace only the body — `pos`=first body line, `end`=last body line, leave the header and closer untouched; or (b) replace the whole block — `pos`=header, `end`=closer, re-emit all three in `lines`. Never split them: do not set `end` to the closer while omitting it from `lines` (deletes it), and do not emit the closer in `lines` without including it in `end` (duplicates it). This applies to every block terminator: `}`, `continue`, `break`, `return`, `throw`.
-- **Never target shared boundary lines.** Do not use `replace` spans that start, end, or pivot on a line that closes one construct and opens/separates another, such as `},{`, `}),`, `} else {`, or `} catch (err) {`. Those lines are not owned by a single block. Move the range inward to body-only lines, or widen it to consume one whole owned construct including its true trailing delimiter.
-- **`lines` must not extend past `end`.** `lines` replaces exactly `pos..end`. Content after `end` survives. If you include lines in `lines` that exist after `end`, they will appear twice. Either extend `end` to cover all lines you are re-emitting, or remove the extra lines from `lines`.
-- `lines` entries **MUST** be literal file content with indentation copied exactly from the `read` output. If the file uses tabs, use a real tab character.
-- After any successful `edit` call on a file, the next change to that same file **MUST** start with a fresh `read`. Do not chain a second `edit` call off stale mental state, even if the intended range is nearby.
-- If you need a second change in the same local region, default to one wider `replace` over the whole owned block instead of a sequence of micro-edits on adjacent lines. Repeated small patches in a moving region are unstable.
-- If a local region is already malformed or a prior patch partially landed, stop nibbling at it. Re-read the file and replace the full owned block from a stable boundary; for a small file, prefer rewriting the file over stacking more tiny repairs.
+- Make the minimum exact edit. Do not rewrite nearby code unless the consumed range requires it.
+- Use anchors exactly as `N#ID` from the latest `read` output.
+- `block` requires both `pos` and `end`. Other anchored ops require one anchor.
+- Replace exactly the owned span. If `content` re-emits content beyond `end`, it will duplicate.
+- **Boundary duplication trap**: when replacing a block, `end` must be the **last line of the block** (e.g. the closing `}`), not the last *content* line before it. Otherwise the closing delimiter survives and your replacement adds a second copy.
+- Do not target shared boundary lines such as `} else {`, `} catch (…) {`, `}),`, or `},{`.
+- For a block, either replace only the body or replace the whole block. Do not split block boundaries.
+- `content` must be literal file content with matching indentation. If the file uses tabs, use real tabs.
+- Do not use this tool to reformat or clean up unrelated code.
 </critical>

package/src/sdk.ts CHANGED Viewed

@@ -13,6 +13,7 @@ import type { Component } from "@oh-my-pi/pi-tui";
 import { $env, getAgentDbPath, getAgentDir, getProjectDir, logger, postmortem } from "@oh-my-pi/pi-utils";
 import chalk from "chalk";
 import { AsyncJobManager } from "./async";
+import { createAutoresearchExtension } from "./autoresearch";
 import { loadCapability } from "./capability";
 import { type Rule, ruleCapability } from "./capability/rule";
 import { ModelRegistry } from "./config/model-registry";
@@ -143,6 +144,9 @@ export interface CreateAgentSessionOptions {
 	/** System prompt. String replaces default, function receives default and returns final. */
 	systemPrompt?: string | ((defaultPrompt: string) => string);
+	/** Optional provider-facing session identifier for prompt caches and sticky auth selection.
+	 * Keeps persisted session files isolated while reusing provider-side caches. */
+	providerSessionId?: string;
 	/** Custom tools to register (in addition to built-in tools). Accepts both CustomTool and ToolDefinition. */
 	customTools?: (CustomTool | ToolDefinition)[];
@@ -666,7 +670,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 		logger.time("sessionManager", () =>
 			SessionManager.create(cwd, SessionManager.getDefaultSessionDir(cwd, agentDir)),
 		);
-	const sessionId = sessionManager.getSessionId();
+	const providerSessionId = options.providerSessionId ?? sessionManager.getSessionId();
 	const modelApiKeyAvailability = new Map<string, boolean>();
 	const getModelAvailabilityKey = (candidate: Model): string =>
 		`${candidate.provider}\u0000${candidate.baseUrl ?? ""}`;
@@ -677,15 +681,17 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 			return cached;
 		}
-		const hasKey = !!(await modelRegistry.getApiKey(candidate, sessionId));
+		const hasKey = !!(await modelRegistry.getApiKey(candidate, providerSessionId));
 		modelApiKeyAvailability.set(availabilityKey, hasKey);
 		return hasKey;
 	};
 	// Check if session has existing data to restore
 	const existingSession = logger.time("loadSession", () => sessionManager.buildSessionContext());
-	const hasExistingSession = existingSession.messages.length > 0;
-	const hasThinkingEntry = sessionManager.getBranch().some(entry => entry.type === "thinking_level_change");
+	const existingBranch = sessionManager.getBranch();
+	const hasExistingSession = existingBranch.length > 0;
+	const hasThinkingEntry = existingBranch.some(entry => entry.type === "thinking_level_change");
+	const hasServiceTierEntry = existingBranch.some(entry => entry.type === "service_tier_change");
 	const hasExplicitModel = options.model !== undefined || options.modelPattern !== undefined;
 	const modelMatchPreferences = {
@@ -1010,6 +1016,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 	}
 	const inlineExtensions: ExtensionFactory[] = options.extensions ? [...options.extensions] : [];
+	inlineExtensions.push(createAutoresearchExtension);
 	if (customTools.length > 0) {
 		inlineExtensions.push(createCustomToolsExtension(customTools));
 	}
@@ -1283,9 +1290,15 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 	const normalizedRequested = requestedToolNames.filter(name => toolRegistry.has(name));
 	const includeExitPlanMode = requestedToolNames.includes("exit_plan_mode");
 	const mcpDiscoveryEnabled = settings.get("mcp.discoveryMode") ?? false;
+	const defaultInactiveToolNames = new Set(
+		registeredTools.filter(tool => tool.definition.defaultInactive).map(tool => tool.definition.name),
+	);
 	const requestedActiveToolNames = includeExitPlanMode
 		? normalizedRequested
 		: normalizedRequested.filter(name => name !== "exit_plan_mode");
+	const initialRequestedActiveToolNames = options.toolNames
+		? requestedActiveToolNames
+		: requestedActiveToolNames.filter(name => !defaultInactiveToolNames.has(name));
 	const explicitlyRequestedMCPToolNames = options.toolNames
 		? requestedActiveToolNames.filter(name => name.startsWith("mcp_"))
 		: [];
@@ -1300,7 +1313,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 		: [];
 	let initialSelectedMCPToolNames: string[] = [];
 	let defaultSelectedMCPToolNames: string[] = [];
-	let initialToolNames = [...requestedActiveToolNames];
+	let initialToolNames = [...initialRequestedActiveToolNames];
 	if (mcpDiscoveryEnabled) {
 		const restoredSelectedMCPToolNames = existingSession.selectedMCPToolNames.filter(name => toolRegistry.has(name));
 		defaultSelectedMCPToolNames = [
@@ -1311,7 +1324,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 			: [...new Set([...restoredSelectedMCPToolNames, ...defaultSelectedMCPToolNames])];
 		initialToolNames = [
 			...new Set([
-				...requestedActiveToolNames.filter(name => !name.startsWith("mcp_")),
+				...initialRequestedActiveToolNames.filter(name => !name.startsWith("mcp_")),
 				...initialSelectedMCPToolNames,
 			]),
 		];
@@ -1320,7 +1333,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 	// Custom tools and extension-registered tools are always included regardless of toolNames filter
 	const alwaysInclude: string[] = [
 		...(options.customTools?.map(t => (isCustomTool(t) ? t.name : t.name)) ?? []),
-		...registeredTools.map(t => t.definition.name),
+		...registeredTools.filter(t => !t.definition.defaultInactive).map(t => t.definition.name),
 	];
 	for (const name of alwaysInclude) {
 		if (mcpDiscoveryEnabled && name.startsWith("mcp_")) {
@@ -1417,6 +1430,12 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 		openaiWebsocketSetting === "on" ? true : openaiWebsocketSetting === "off" ? false : undefined;
 	const serviceTierSetting = settings.get("serviceTier");
+	const initialServiceTier = hasServiceTierEntry
+		? existingSession.serviceTier
+		: serviceTierSetting === "none"
+			? undefined
+			: serviceTierSetting;
 	agent = new Agent({
 		initialState: {
 			systemPrompt,
@@ -1426,7 +1445,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 		},
 		convertToLlm: convertToLlmFinal,
 		onPayload,
-		sessionId: sessionManager.getSessionId(),
+		sessionId: providerSessionId,
 		transformContext,
 		steeringMode: settings.get("steeringMode") ?? "one-at-a-time",
 		followUpMode: settings.get("followUpMode") ?? "one-at-a-time",
@@ -1438,14 +1457,14 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 		minP: settings.get("minP") >= 0 ? settings.get("minP") : undefined,
 		presencePenalty: settings.get("presencePenalty") >= 0 ? settings.get("presencePenalty") : undefined,
 		repetitionPenalty: settings.get("repetitionPenalty") >= 0 ? settings.get("repetitionPenalty") : undefined,
-		serviceTier: serviceTierSetting === "none" ? undefined : serviceTierSetting,
+		serviceTier: initialServiceTier,
 		kimiApiFormat: settings.get("providers.kimiApiFormat") ?? "anthropic",
 		preferWebsockets: preferOpenAICodexWebsockets,
 		getToolContext: tc => toolContextStore.getContext(tc),
 		getApiKey: async provider => {
-			// Use the provider argument from the in-flight request;
-			// agent.state.model may already be switched mid-turn.
-			const key = await modelRegistry.getApiKeyForProvider(provider, sessionId);
+			// Use the provider-facing session id for sticky credential selection so cache keys
+			// and provider auth affinity stay aligned across fresh benchmark sessions.
+			const key = await modelRegistry.getApiKeyForProvider(provider, providerSessionId);
 			if (!key) {
 				throw new Error(`No API key found for provider "${provider}"`);
 			}
@@ -1476,9 +1495,6 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 	// Restore messages if session has existing data
 	if (hasExistingSession) {
 		agent.replaceMessages(existingSession.messages);
-		if (!hasThinkingEntry) {
-			sessionManager.appendThinkingLevelChange(thinkingLevel);
-		}
 	} else {
 		// Save initial model and thinking level for new sessions so they can be restored on resume
 		if (model) {
@@ -1509,6 +1525,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 		mcpDiscoveryEnabled,
 		initialSelectedMCPToolNames,
 		defaultSelectedMCPToolNames,
+		persistInitialMCPToolSelection: !hasExistingSession,
 		defaultSelectedMCPServerNames: [...discoveryDefaultServers],
 		ttsrManager,
 		obfuscator,
@@ -1519,8 +1536,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 	if (model?.api === "openai-codex-responses") {
 		try {
 			await logger.timeAsync("prewarmCodexWebsocket", prewarmOpenAICodexResponses, model, {
-				apiKey: await modelRegistry.getApiKey(model, sessionId),
-				sessionId,
+				apiKey: await modelRegistry.getApiKey(model, providerSessionId),
+				sessionId: providerSessionId,
 				preferWebsockets: preferOpenAICodexWebsockets,
 				providerSessionState: session.providerSessionState,
 			});