@oh-my-pi/pi-coding-agent 13.14.0 → 13.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/CHANGELOG.md +140 -0
  2. package/package.json +10 -8
  3. package/src/autoresearch/command-initialize.md +34 -0
  4. package/src/autoresearch/command-resume.md +17 -0
  5. package/src/autoresearch/contract.ts +332 -0
  6. package/src/autoresearch/dashboard.ts +447 -0
  7. package/src/autoresearch/git.ts +243 -0
  8. package/src/autoresearch/helpers.ts +458 -0
  9. package/src/autoresearch/index.ts +693 -0
  10. package/src/autoresearch/prompt.md +227 -0
  11. package/src/autoresearch/resume-message.md +16 -0
  12. package/src/autoresearch/state.ts +386 -0
  13. package/src/autoresearch/tools/init-experiment.ts +310 -0
  14. package/src/autoresearch/tools/log-experiment.ts +833 -0
  15. package/src/autoresearch/tools/run-experiment.ts +640 -0
  16. package/src/autoresearch/types.ts +218 -0
  17. package/src/cli/args.ts +8 -2
  18. package/src/cli/initial-message.ts +58 -0
  19. package/src/config/keybindings.ts +417 -212
  20. package/src/config/model-registry.ts +1 -0
  21. package/src/config/model-resolver.ts +57 -9
  22. package/src/config/settings-schema.ts +38 -10
  23. package/src/config/settings.ts +1 -4
  24. package/src/exec/bash-executor.ts +7 -5
  25. package/src/export/html/template.css +43 -13
  26. package/src/export/html/template.generated.ts +1 -1
  27. package/src/export/html/template.html +1 -0
  28. package/src/export/html/template.js +107 -0
  29. package/src/extensibility/extensions/types.ts +31 -8
  30. package/src/internal-urls/docs-index.generated.ts +1 -1
  31. package/src/lsp/index.ts +1 -1
  32. package/src/main.ts +44 -44
  33. package/src/mcp/oauth-discovery.ts +1 -1
  34. package/src/modes/acp/acp-agent.ts +957 -0
  35. package/src/modes/acp/acp-event-mapper.ts +531 -0
  36. package/src/modes/acp/acp-mode.ts +13 -0
  37. package/src/modes/acp/index.ts +2 -0
  38. package/src/modes/components/agent-dashboard.ts +5 -4
  39. package/src/modes/components/bash-execution.ts +40 -11
  40. package/src/modes/components/custom-editor.ts +47 -47
  41. package/src/modes/components/extensions/extension-dashboard.ts +2 -1
  42. package/src/modes/components/history-search.ts +2 -1
  43. package/src/modes/components/hook-editor.ts +2 -1
  44. package/src/modes/components/hook-input.ts +8 -7
  45. package/src/modes/components/hook-selector.ts +15 -10
  46. package/src/modes/components/keybinding-hints.ts +9 -9
  47. package/src/modes/components/login-dialog.ts +3 -3
  48. package/src/modes/components/mcp-add-wizard.ts +2 -1
  49. package/src/modes/components/model-selector.ts +14 -3
  50. package/src/modes/components/oauth-selector.ts +2 -1
  51. package/src/modes/components/python-execution.ts +2 -3
  52. package/src/modes/components/session-selector.ts +2 -1
  53. package/src/modes/components/settings-selector.ts +2 -1
  54. package/src/modes/components/status-line-segment-editor.ts +2 -1
  55. package/src/modes/components/tool-execution.ts +4 -5
  56. package/src/modes/components/tree-selector.ts +3 -2
  57. package/src/modes/components/user-message-selector.ts +3 -8
  58. package/src/modes/components/user-message.ts +16 -0
  59. package/src/modes/controllers/command-controller.ts +0 -2
  60. package/src/modes/controllers/extension-ui-controller.ts +89 -4
  61. package/src/modes/controllers/input-controller.ts +29 -23
  62. package/src/modes/controllers/mcp-command-controller.ts +1 -1
  63. package/src/modes/index.ts +1 -0
  64. package/src/modes/interactive-mode.ts +17 -5
  65. package/src/modes/print-mode.ts +1 -1
  66. package/src/modes/prompt-action-autocomplete.ts +7 -7
  67. package/src/modes/rpc/rpc-mode.ts +7 -2
  68. package/src/modes/rpc/rpc-types.ts +1 -0
  69. package/src/modes/theme/theme.ts +53 -44
  70. package/src/modes/types.ts +9 -2
  71. package/src/modes/utils/hotkeys-markdown.ts +19 -19
  72. package/src/modes/utils/keybinding-matchers.ts +21 -0
  73. package/src/modes/utils/ui-helpers.ts +1 -1
  74. package/src/patch/hashline.ts +139 -127
  75. package/src/patch/index.ts +77 -59
  76. package/src/patch/shared.ts +19 -11
  77. package/src/prompts/tools/hashline.md +43 -116
  78. package/src/sdk.ts +34 -17
  79. package/src/session/agent-session.ts +123 -30
  80. package/src/session/session-manager.ts +32 -31
  81. package/src/session/streaming-output.ts +87 -37
  82. package/src/tools/ask.ts +56 -30
  83. package/src/tools/bash-interactive.ts +2 -6
  84. package/src/tools/bash-interceptor.ts +1 -39
  85. package/src/tools/bash-skill-urls.ts +1 -1
  86. package/src/tools/browser.ts +1 -1
  87. package/src/tools/gemini-image.ts +1 -1
  88. package/src/tools/python.ts +2 -2
  89. package/src/tools/resolve.ts +1 -1
  90. package/src/utils/child-process.ts +88 -0
@@ -157,20 +157,28 @@ function formatStreamingHashlineEdits(edits: Partial<HashlineToolEdit>[], uiThem
157
157
  return { srcLabel: "• (incomplete edit)", dst: "" };
158
158
  }
159
159
 
160
- const contentLines = Array.isArray(edit.lines) ? (edit.lines as string[]).join("\n") : "";
160
+ const contentLines = Array.isArray(edit.content) ? (edit.content as string[]).join("\n") : "";
161
+ const loc = edit.loc;
161
162
 
162
- const op = typeof edit.op === "string" ? edit.op : "?";
163
- const pos = typeof edit.pos === "string" ? edit.pos : undefined;
164
- const end = typeof edit.end === "string" ? edit.end : undefined;
165
-
166
- if (pos && end && pos !== end) {
167
- return { srcLabel: `• ${op} ${pos}…${end}`, dst: contentLines };
163
+ if (loc === "append" || loc === "prepend") {
164
+ return { srcLabel: `• ${loc} (file-level)`, dst: contentLines };
168
165
  }
169
- const anchor = pos ?? end;
170
- if (anchor) {
171
- return { srcLabel: `\u2022 ${op} ${anchor}`, dst: contentLines };
166
+ if (typeof loc === "object" && loc) {
167
+ if ("block" in loc && typeof loc.block === "object" && loc.block) {
168
+ const rb = loc.block as { pos?: string; end?: string };
169
+ return { srcLabel: `• block ${rb.pos ?? "?"}…${rb.end ?? "?"}`, dst: contentLines };
170
+ }
171
+ if ("line" in loc) {
172
+ return { srcLabel: `• line ${(loc as { line: string }).line}`, dst: contentLines };
173
+ }
174
+ if ("append" in loc) {
175
+ return { srcLabel: `• append ${(loc as { append: string }).append}`, dst: contentLines };
176
+ }
177
+ if ("prepend" in loc) {
178
+ return { srcLabel: `• prepend ${(loc as { prepend: string }).prepend}`, dst: contentLines };
179
+ }
172
180
  }
173
- return { srcLabel: `\u2022 ${op} (file-level)`, dst: contentLines };
181
+ return { srcLabel: "• (unknown edit)", dst: contentLines };
174
182
  }
175
183
  }
176
184
  function formatMetadataLine(lineCount: number | null, language: string | undefined, uiTheme: Theme): string {
@@ -1,25 +1,25 @@
1
- Applies precise, surgical file edits by referencing `LINE#ID` tags from `read` output. Each tag uniquely identifies a line, so edits remain stable even when lines shift.
1
+ Applies precise file edits using `LINE#ID` anchors from `read` output.
2
2
 
3
- Read the file first to get fresh tags. Submit one `edit` call per file with all operations batched tags shift after each edit, so multiple calls require re-reading between them.
3
+ Read the file first. Copy anchors exactly from the latest `read` output. In one `edit` call, batch all edits for one file. After any successful edit, re-read before editing that file again.
4
+
5
+ This matters: your output is checked against the real file state. Invalid anchors, duplicated boundary lines, or semantically equivalent rewrites will fail.
4
6
 
5
7
  <operations>
6
- **`path`** — the path to the file to edit.
7
- **`move`**if set, move the file to the given path.
8
- **`delete`**if true, delete the file.
8
+ **Top level**
9
+ - `path` — file path
10
+ - `move` optional rename target
11
+ - `delete` — optional whole-file delete
12
+ - `edits` — array of `{ loc, content }` entries
13
+
14
+ **Edit entry**: `{ loc, content }`
15
+ - `loc` — where to apply the edit (see below)
16
+ - `content` — replacement/inserted lines (array of strings preferred, `null` to delete)
9
17
 
10
- **`edits[n].pos`** — the anchor line. Meaning depends on `op`:
11
- - if `replace`: first line to rewrite
12
- - if `prepend`: line to insert new lines **before**; omit for beginning of file
13
- - if `append`: line to insert new lines **after**; omit for end of file
14
- **`edits[n].end`** range replace only. The last line of the range (inclusive). Omit for single-line replace.
15
- **`edits[n].lines`** — the replacement content:
16
- - for `replace`: the exact lines that will replace `[pos, end??pos]` inclusively (or the single `pos` line when `end` is omitted)
17
- - for `prepend`/`append`: the new lines to insert
18
- - `[""]` — blank line
19
- - `null` or `[]` — delete if replace
20
- - If `lines` contains content that already exists after `end`, those lines **will be duplicated** in the output.
21
- - Keep `lines` to exactly what belongs inside the consumed range.
22
- - Ops are applied bottom-up. Tags **MUST** be referenced from the most recent `read` output.
18
+ **`loc` values**
19
+ - `"append"` / `"prepend"` insert at end/start of file
20
+ - `{ append: "N#ID" }` / `{ prepend: "N#ID" }` insert after/before anchored line
21
+ - `{ line: "N#ID" }` replace exactly one anchored line
22
+ - `{ block: { pos: "N#ID", end: "N#ID" } }` replace inclusive `pos..end`
23
23
  </operations>
24
24
 
25
25
  <examples>
@@ -45,58 +45,14 @@ All examples below reference the same file, `util.ts`:
45
45
  {{hlinefull 18 "}"}}
46
46
  ```
47
47
 
48
- <example name="single-line replace">
49
- Change the timeout from `5000` to `30_000`:
48
+ <example name="replace a block body">
49
+ Replace only the catch body. Do not target the shared boundary line `} catch (err) {`.
50
50
  ```
51
51
  {
52
52
  path: "util.ts",
53
53
  edits: [{
54
- op: "replace",
55
- pos: {{hlineref 2 "const timeout = 5000;"}},
56
- lines: ["const timeout = 30_000;"]
57
- }]
58
- }
59
- ```
60
- </example>
61
-
62
- <example name="delete lines">
63
- Single line — `lines: null` deletes entirely:
64
- ```
65
- {
66
- path: "util.ts",
67
- edits: [{
68
- op: "replace",
69
- pos: {{hlineref 1 "// @ts-ignore"}},
70
- lines: null
71
- }]
72
- }
73
- ```
74
- Range — remove the legacy block (lines 10–11):
75
- ```
76
- {
77
- path: "util.ts",
78
- edits: [{
79
- op: "replace",
80
- pos: {{hlineref 10 "\t// TODO: remove after migration"}},
81
- end: {{hlineref 11 "\tlegacy();"}},
82
- lines: null
83
- }]
84
- }
85
- ```
86
- </example>
87
-
88
- <example name="rewrite a block body — shape (a)">
89
- Replace the catch body with smarter error handling. Shape (a): `pos` is the first body line, `end` is the last body line. The catch header (line 14) and its closer (line 17) are outside the range and stay untouched.
90
-
91
- When changing body content, replace the **entire** body span — not just one line inside it. Patching one line leaves the rest of the body stale.
92
- ```
93
- {
94
- path: "util.ts",
95
- edits: [{
96
- op: "replace",
97
- pos: {{hlineref 15 "\t\tconsole.error(err);"}},
98
- end: {{hlineref 16 "\t\treturn null;"}},
99
- lines: [
54
+ loc: { block: { pos: {{hlineref 15 "\t\tconsole.error(err);"}}, end: {{hlineref 16 "\t\treturn null;"}} } },
55
+ content: [
100
56
  "\t\tif (isEnoent(err)) return null;",
101
57
  "\t\tthrow err;"
102
58
  ]
@@ -105,62 +61,38 @@ When changing body content, replace the **entire** body span — not just one li
105
61
  ```
106
62
  </example>
107
63
 
108
- <example name="replace whole block — shape (b)">
109
- Simplify `beta()` to a one-liner. Shape (b): `pos`=header, `end`=closer, re-emit all in `lines`.
110
-
111
- Bad — `end` stops at the inner `\t}` on line 17, so the outer `}` on line 18 survives. Result: two consecutive `}` lines.
64
+ <example name="replace one line">
112
65
  ```
113
66
  {
114
67
  path: "util.ts",
115
68
  edits: [{
116
- op: "replace",
117
- pos: {{hlineref 9 "function beta() {"}},
118
- end: {{hlineref 17 "\t}"}},
119
- lines: [
120
- "function beta() {",
121
- "\treturn parse(data);",
122
- "}"
123
- ]
69
+ loc: { line: {{hlineref 2 "const timeout = 5000;"}} },
70
+ content: ["const timeout = 30_000;"]
124
71
  }]
125
72
  }
126
73
  ```
127
- Good — `end` includes the function's own `}` on line 18, so the old closer is consumed:
74
+ </example>
75
+
76
+ <example name="delete a range">
128
77
  ```
129
78
  {
130
79
  path: "util.ts",
131
80
  edits: [{
132
- op: "replace",
133
- pos: {{hlineref 9 "function beta() {"}},
134
- end: {{hlineref 18 "}"}},
135
- lines: [
136
- "function beta() {",
137
- "\treturn parse(data);",
138
- "}"
139
- ]
81
+ loc: { block: { pos: {{hlineref 10 "\t// TODO: remove after migration"}}, end: {{hlineref 11 "\tlegacy();"}} } },
82
+ content: null
140
83
  }]
141
84
  }
142
85
  ```
143
86
  </example>
144
87
 
145
- <example name="avoid shared boundary lines">
146
- Do not anchor `replace` on a mixed boundary line such as `} catch (err) {`, `} else {`, `}),`, or `},{`. Those lines belong to two adjacent structures at once.
147
-
148
- Bad — if you need to change code on both sides of that line, replacing just the boundary span will usually leave one side's syntax behind.
149
-
150
- Good — choose one of two safe shapes instead:
151
- - move inward and replace only body-owned lines
152
- - expand outward and replace one whole owned block, consuming its real closer/separator too
153
- </example>
154
-
155
- <example name="insert between sibling declarations">
156
- Add a `gamma()` function between `alpha()` and `beta()`. Use `prepend` on the next declaration — not `append` on the previous block's closing brace — so the anchor is a stable declaration boundary.
88
+ <example name="insert before sibling">
89
+ When adding a sibling declaration, prefer `prepend` on the next declaration.
157
90
  ```
158
91
  {
159
92
  path: "util.ts",
160
93
  edits: [{
161
- op: "prepend",
162
- pos: {{hlineref 9 "function beta() {"}},
163
- lines: [
94
+ loc: { prepend: {{hlineref 9 "function beta() {"}} },
95
+ content: [
164
96
  "function gamma() {",
165
97
  "\tvalidate();",
166
98
  "}",
@@ -169,22 +101,17 @@ Add a `gamma()` function between `alpha()` and `beta()`. Use `prepend` on the ne
169
101
  }]
170
102
  }
171
103
  ```
172
- Use a trailing `""` to preserve the blank line between sibling declarations.
173
104
  </example>
174
105
  </examples>
175
106
 
176
107
  <critical>
177
- - You **MUST NOT** use this tool to reformat, reindent, or adjust whitespace run the project's formatter instead.
178
- - Every tag **MUST** be copied exactly from your most recent `read` output as `N#ID`. Stale or mistyped tags cause mismatches.
179
- - Edit payload: `{ path, edits[] }`. Each entry: `op`, `lines`, optional `pos`/`end`. No extra keys.
180
- - For `append`/`prepend`, `lines` **MUST** contain only the newly introduced content. Do not re-emit surrounding content, or terminators that already exist.
181
- - When changing existing code near a block tail or closing delimiter, default to `replace` over the owned span instead of inserting around the boundary.
182
- - When adding a sibling declaration, default to `prepend` on the next sibling declaration instead of `append` on the previous block's closing brace.
183
- - **Block boundaries travel together.** For a block `{ header / body / closer }`, there are exactly two valid replace shapes: (a) replace only the body — `pos`=first body line, `end`=last body line, leave the header and closer untouched; or (b) replace the whole block `pos`=header, `end`=closer, re-emit all three in `lines`. Never split them: do not set `end` to the closer while omitting it from `lines` (deletes it), and do not emit the closer in `lines` without including it in `end` (duplicates it). This applies to every block terminator: `}`, `continue`, `break`, `return`, `throw`.
184
- - **Never target shared boundary lines.** Do not use `replace` spans that start, end, or pivot on a line that closes one construct and opens/separates another, such as `},{`, `}),`, `} else {`, or `} catch (err) {`. Those lines are not owned by a single block. Move the range inward to body-only lines, or widen it to consume one whole owned construct including its true trailing delimiter.
185
- - **`lines` must not extend past `end`.** `lines` replaces exactly `pos..end`. Content after `end` survives. If you include lines in `lines` that exist after `end`, they will appear twice. Either extend `end` to cover all lines you are re-emitting, or remove the extra lines from `lines`.
186
- - `lines` entries **MUST** be literal file content with indentation copied exactly from the `read` output. If the file uses tabs, use a real tab character.
187
- - After any successful `edit` call on a file, the next change to that same file **MUST** start with a fresh `read`. Do not chain a second `edit` call off stale mental state, even if the intended range is nearby.
188
- - If you need a second change in the same local region, default to one wider `replace` over the whole owned block instead of a sequence of micro-edits on adjacent lines. Repeated small patches in a moving region are unstable.
189
- - If a local region is already malformed or a prior patch partially landed, stop nibbling at it. Re-read the file and replace the full owned block from a stable boundary; for a small file, prefer rewriting the file over stacking more tiny repairs.
108
+ - Make the minimum exact edit. Do not rewrite nearby code unless the consumed range requires it.
109
+ - Use anchors exactly as `N#ID` from the latest `read` output.
110
+ - `block` requires both `pos` and `end`. Other anchored ops require one anchor.
111
+ - Replace exactly the owned span. If `content` re-emits content beyond `end`, it will duplicate.
112
+ - **Boundary duplication trap**: when replacing a block, `end` must be the **last line of the block** (e.g. the closing `}`), not the last *content* line before it. Otherwise the closing delimiter survives and your replacement adds a second copy.
113
+ - Do not target shared boundary lines such as `} else {`, `} catch (…) {`, `}),`, or `},{`.
114
+ - For a block, either replace only the body or replace the whole block. Do not split block boundaries.
115
+ - `content` must be literal file content with matching indentation. If the file uses tabs, use real tabs.
116
+ - Do not use this tool to reformat or clean up unrelated code.
190
117
  </critical>
package/src/sdk.ts CHANGED
@@ -13,6 +13,7 @@ import type { Component } from "@oh-my-pi/pi-tui";
13
13
  import { $env, getAgentDbPath, getAgentDir, getProjectDir, logger, postmortem } from "@oh-my-pi/pi-utils";
14
14
  import chalk from "chalk";
15
15
  import { AsyncJobManager } from "./async";
16
+ import { createAutoresearchExtension } from "./autoresearch";
16
17
  import { loadCapability } from "./capability";
17
18
  import { type Rule, ruleCapability } from "./capability/rule";
18
19
  import { ModelRegistry } from "./config/model-registry";
@@ -143,6 +144,9 @@ export interface CreateAgentSessionOptions {
143
144
 
144
145
  /** System prompt. String replaces default, function receives default and returns final. */
145
146
  systemPrompt?: string | ((defaultPrompt: string) => string);
147
+ /** Optional provider-facing session identifier for prompt caches and sticky auth selection.
148
+ * Keeps persisted session files isolated while reusing provider-side caches. */
149
+ providerSessionId?: string;
146
150
 
147
151
  /** Custom tools to register (in addition to built-in tools). Accepts both CustomTool and ToolDefinition. */
148
152
  customTools?: (CustomTool | ToolDefinition)[];
@@ -666,7 +670,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
666
670
  logger.time("sessionManager", () =>
667
671
  SessionManager.create(cwd, SessionManager.getDefaultSessionDir(cwd, agentDir)),
668
672
  );
669
- const sessionId = sessionManager.getSessionId();
673
+ const providerSessionId = options.providerSessionId ?? sessionManager.getSessionId();
670
674
  const modelApiKeyAvailability = new Map<string, boolean>();
671
675
  const getModelAvailabilityKey = (candidate: Model): string =>
672
676
  `${candidate.provider}\u0000${candidate.baseUrl ?? ""}`;
@@ -677,15 +681,17 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
677
681
  return cached;
678
682
  }
679
683
 
680
- const hasKey = !!(await modelRegistry.getApiKey(candidate, sessionId));
684
+ const hasKey = !!(await modelRegistry.getApiKey(candidate, providerSessionId));
681
685
  modelApiKeyAvailability.set(availabilityKey, hasKey);
682
686
  return hasKey;
683
687
  };
684
688
 
685
689
  // Check if session has existing data to restore
686
690
  const existingSession = logger.time("loadSession", () => sessionManager.buildSessionContext());
687
- const hasExistingSession = existingSession.messages.length > 0;
688
- const hasThinkingEntry = sessionManager.getBranch().some(entry => entry.type === "thinking_level_change");
691
+ const existingBranch = sessionManager.getBranch();
692
+ const hasExistingSession = existingBranch.length > 0;
693
+ const hasThinkingEntry = existingBranch.some(entry => entry.type === "thinking_level_change");
694
+ const hasServiceTierEntry = existingBranch.some(entry => entry.type === "service_tier_change");
689
695
 
690
696
  const hasExplicitModel = options.model !== undefined || options.modelPattern !== undefined;
691
697
  const modelMatchPreferences = {
@@ -1010,6 +1016,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1010
1016
  }
1011
1017
 
1012
1018
  const inlineExtensions: ExtensionFactory[] = options.extensions ? [...options.extensions] : [];
1019
+ inlineExtensions.push(createAutoresearchExtension);
1013
1020
  if (customTools.length > 0) {
1014
1021
  inlineExtensions.push(createCustomToolsExtension(customTools));
1015
1022
  }
@@ -1283,9 +1290,15 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1283
1290
  const normalizedRequested = requestedToolNames.filter(name => toolRegistry.has(name));
1284
1291
  const includeExitPlanMode = requestedToolNames.includes("exit_plan_mode");
1285
1292
  const mcpDiscoveryEnabled = settings.get("mcp.discoveryMode") ?? false;
1293
+ const defaultInactiveToolNames = new Set(
1294
+ registeredTools.filter(tool => tool.definition.defaultInactive).map(tool => tool.definition.name),
1295
+ );
1286
1296
  const requestedActiveToolNames = includeExitPlanMode
1287
1297
  ? normalizedRequested
1288
1298
  : normalizedRequested.filter(name => name !== "exit_plan_mode");
1299
+ const initialRequestedActiveToolNames = options.toolNames
1300
+ ? requestedActiveToolNames
1301
+ : requestedActiveToolNames.filter(name => !defaultInactiveToolNames.has(name));
1289
1302
  const explicitlyRequestedMCPToolNames = options.toolNames
1290
1303
  ? requestedActiveToolNames.filter(name => name.startsWith("mcp_"))
1291
1304
  : [];
@@ -1300,7 +1313,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1300
1313
  : [];
1301
1314
  let initialSelectedMCPToolNames: string[] = [];
1302
1315
  let defaultSelectedMCPToolNames: string[] = [];
1303
- let initialToolNames = [...requestedActiveToolNames];
1316
+ let initialToolNames = [...initialRequestedActiveToolNames];
1304
1317
  if (mcpDiscoveryEnabled) {
1305
1318
  const restoredSelectedMCPToolNames = existingSession.selectedMCPToolNames.filter(name => toolRegistry.has(name));
1306
1319
  defaultSelectedMCPToolNames = [
@@ -1311,7 +1324,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1311
1324
  : [...new Set([...restoredSelectedMCPToolNames, ...defaultSelectedMCPToolNames])];
1312
1325
  initialToolNames = [
1313
1326
  ...new Set([
1314
- ...requestedActiveToolNames.filter(name => !name.startsWith("mcp_")),
1327
+ ...initialRequestedActiveToolNames.filter(name => !name.startsWith("mcp_")),
1315
1328
  ...initialSelectedMCPToolNames,
1316
1329
  ]),
1317
1330
  ];
@@ -1320,7 +1333,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1320
1333
  // Custom tools and extension-registered tools are always included regardless of toolNames filter
1321
1334
  const alwaysInclude: string[] = [
1322
1335
  ...(options.customTools?.map(t => (isCustomTool(t) ? t.name : t.name)) ?? []),
1323
- ...registeredTools.map(t => t.definition.name),
1336
+ ...registeredTools.filter(t => !t.definition.defaultInactive).map(t => t.definition.name),
1324
1337
  ];
1325
1338
  for (const name of alwaysInclude) {
1326
1339
  if (mcpDiscoveryEnabled && name.startsWith("mcp_")) {
@@ -1417,6 +1430,12 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1417
1430
  openaiWebsocketSetting === "on" ? true : openaiWebsocketSetting === "off" ? false : undefined;
1418
1431
  const serviceTierSetting = settings.get("serviceTier");
1419
1432
 
1433
+ const initialServiceTier = hasServiceTierEntry
1434
+ ? existingSession.serviceTier
1435
+ : serviceTierSetting === "none"
1436
+ ? undefined
1437
+ : serviceTierSetting;
1438
+
1420
1439
  agent = new Agent({
1421
1440
  initialState: {
1422
1441
  systemPrompt,
@@ -1426,7 +1445,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1426
1445
  },
1427
1446
  convertToLlm: convertToLlmFinal,
1428
1447
  onPayload,
1429
- sessionId: sessionManager.getSessionId(),
1448
+ sessionId: providerSessionId,
1430
1449
  transformContext,
1431
1450
  steeringMode: settings.get("steeringMode") ?? "one-at-a-time",
1432
1451
  followUpMode: settings.get("followUpMode") ?? "one-at-a-time",
@@ -1438,14 +1457,14 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1438
1457
  minP: settings.get("minP") >= 0 ? settings.get("minP") : undefined,
1439
1458
  presencePenalty: settings.get("presencePenalty") >= 0 ? settings.get("presencePenalty") : undefined,
1440
1459
  repetitionPenalty: settings.get("repetitionPenalty") >= 0 ? settings.get("repetitionPenalty") : undefined,
1441
- serviceTier: serviceTierSetting === "none" ? undefined : serviceTierSetting,
1460
+ serviceTier: initialServiceTier,
1442
1461
  kimiApiFormat: settings.get("providers.kimiApiFormat") ?? "anthropic",
1443
1462
  preferWebsockets: preferOpenAICodexWebsockets,
1444
1463
  getToolContext: tc => toolContextStore.getContext(tc),
1445
1464
  getApiKey: async provider => {
1446
- // Use the provider argument from the in-flight request;
1447
- // agent.state.model may already be switched mid-turn.
1448
- const key = await modelRegistry.getApiKeyForProvider(provider, sessionId);
1465
+ // Use the provider-facing session id for sticky credential selection so cache keys
1466
+ // and provider auth affinity stay aligned across fresh benchmark sessions.
1467
+ const key = await modelRegistry.getApiKeyForProvider(provider, providerSessionId);
1449
1468
  if (!key) {
1450
1469
  throw new Error(`No API key found for provider "${provider}"`);
1451
1470
  }
@@ -1476,9 +1495,6 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1476
1495
  // Restore messages if session has existing data
1477
1496
  if (hasExistingSession) {
1478
1497
  agent.replaceMessages(existingSession.messages);
1479
- if (!hasThinkingEntry) {
1480
- sessionManager.appendThinkingLevelChange(thinkingLevel);
1481
- }
1482
1498
  } else {
1483
1499
  // Save initial model and thinking level for new sessions so they can be restored on resume
1484
1500
  if (model) {
@@ -1509,6 +1525,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1509
1525
  mcpDiscoveryEnabled,
1510
1526
  initialSelectedMCPToolNames,
1511
1527
  defaultSelectedMCPToolNames,
1528
+ persistInitialMCPToolSelection: !hasExistingSession,
1512
1529
  defaultSelectedMCPServerNames: [...discoveryDefaultServers],
1513
1530
  ttsrManager,
1514
1531
  obfuscator,
@@ -1519,8 +1536,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1519
1536
  if (model?.api === "openai-codex-responses") {
1520
1537
  try {
1521
1538
  await logger.timeAsync("prewarmCodexWebsocket", prewarmOpenAICodexResponses, model, {
1522
- apiKey: await modelRegistry.getApiKey(model, sessionId),
1523
- sessionId,
1539
+ apiKey: await modelRegistry.getApiKey(model, providerSessionId),
1540
+ sessionId: providerSessionId,
1524
1541
  preferWebsockets: preferOpenAICodexWebsockets,
1525
1542
  providerSessionState: session.providerSessionState,
1526
1543
  });