@oh-my-pi/pi-coding-agent 15.10.1 → 15.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/CHANGELOG.md +67 -0
  2. package/dist/types/cli/startup-cwd.d.ts +2 -0
  3. package/dist/types/commands/launch.d.ts +3 -0
  4. package/dist/types/config/keybindings.d.ts +2 -2
  5. package/dist/types/config/model-provider-priority.d.ts +1 -0
  6. package/dist/types/config/model-resolver.d.ts +4 -1
  7. package/dist/types/config/settings.d.ts +7 -2
  8. package/dist/types/debug/report-bundle.d.ts +3 -0
  9. package/dist/types/edit/file-snapshot-store.d.ts +18 -10
  10. package/dist/types/eval/py/__tests__/prelude.test.d.ts +1 -0
  11. package/dist/types/extensibility/extensions/types.d.ts +4 -1
  12. package/dist/types/lsp/client.d.ts +10 -0
  13. package/dist/types/main.d.ts +3 -9
  14. package/dist/types/mcp/tool-bridge.d.ts +2 -0
  15. package/dist/types/modes/components/custom-editor.d.ts +1 -1
  16. package/dist/types/modes/components/status-line.d.ts +2 -0
  17. package/dist/types/modes/controllers/event-controller.d.ts +17 -0
  18. package/dist/types/modes/interactive-mode.d.ts +1 -0
  19. package/dist/types/modes/magic-keywords.d.ts +1 -1
  20. package/dist/types/modes/markdown-prose.d.ts +1 -1
  21. package/dist/types/modes/types.d.ts +3 -0
  22. package/dist/types/modes/workflow.d.ts +3 -3
  23. package/dist/types/session/auth-storage.d.ts +1 -1
  24. package/dist/types/session/session-manager.d.ts +5 -2
  25. package/dist/types/task/executor.d.ts +10 -0
  26. package/dist/types/tools/eval.d.ts +8 -0
  27. package/dist/types/tools/gh-cache-invalidation.d.ts +6 -0
  28. package/dist/types/tools/github-cache.d.ts +12 -0
  29. package/dist/types/tools/path-utils.d.ts +8 -0
  30. package/dist/types/tools/search.d.ts +2 -2
  31. package/dist/types/tools/yield.d.ts +8 -0
  32. package/package.json +9 -9
  33. package/src/cli/args.ts +3 -1
  34. package/src/cli/dry-balance-cli.ts +2 -4
  35. package/src/cli/startup-cwd.ts +68 -0
  36. package/src/commands/launch.ts +3 -0
  37. package/src/commit/model-selection.ts +3 -2
  38. package/src/config/model-provider-priority.ts +55 -0
  39. package/src/config/model-registry.ts +4 -22
  40. package/src/config/model-resolver.ts +39 -7
  41. package/src/config/settings.ts +86 -41
  42. package/src/debug/index.ts +8 -0
  43. package/src/debug/raw-sse-buffer.ts +7 -4
  44. package/src/debug/report-bundle.ts +9 -0
  45. package/src/edit/file-snapshot-store.ts +33 -1
  46. package/src/edit/hashline/filesystem.ts +2 -1
  47. package/src/eval/__tests__/llm-bridge.test.ts +20 -0
  48. package/src/eval/js/context-manager.ts +32 -15
  49. package/src/eval/llm-bridge.ts +14 -3
  50. package/src/eval/py/__tests__/prelude.test.ts +19 -0
  51. package/src/eval/py/executor.ts +23 -11
  52. package/src/eval/py/prelude.py +1 -1
  53. package/src/extensibility/extensions/types.ts +10 -1
  54. package/src/internal-urls/docs-index.generated.ts +3 -3
  55. package/src/lsp/client.ts +23 -11
  56. package/src/lsp/config.ts +11 -1
  57. package/src/lsp/index.ts +61 -9
  58. package/src/main.ts +91 -65
  59. package/src/mcp/tool-bridge.ts +2 -0
  60. package/src/memories/index.ts +2 -2
  61. package/src/modes/components/custom-editor.ts +143 -111
  62. package/src/modes/components/model-selector.ts +59 -13
  63. package/src/modes/components/oauth-selector.ts +33 -7
  64. package/src/modes/components/status-line.ts +19 -4
  65. package/src/modes/components/tips.txt +1 -1
  66. package/src/modes/components/user-message.ts +1 -1
  67. package/src/modes/controllers/event-controller.ts +26 -0
  68. package/src/modes/controllers/input-controller.ts +46 -7
  69. package/src/modes/interactive-mode.ts +107 -20
  70. package/src/modes/magic-keywords.ts +1 -1
  71. package/src/modes/markdown-prose.ts +1 -1
  72. package/src/modes/theme/shimmer.ts +20 -9
  73. package/src/modes/types.ts +3 -0
  74. package/src/modes/workflow.ts +10 -10
  75. package/src/prompts/system/workflow-notice.md +1 -1
  76. package/src/prompts/tools/bash.md +9 -0
  77. package/src/prompts/tools/browser.md +1 -1
  78. package/src/prompts/tools/eval.md +2 -1
  79. package/src/prompts/tools/read.md +2 -2
  80. package/src/sdk.ts +26 -9
  81. package/src/session/agent-session.ts +37 -12
  82. package/src/session/auth-storage.ts +2 -0
  83. package/src/session/session-manager.ts +96 -23
  84. package/src/task/executor.ts +71 -36
  85. package/src/task/render.ts +3 -4
  86. package/src/tools/bash.ts +7 -0
  87. package/src/tools/browser/tab-supervisor.ts +13 -1
  88. package/src/tools/browser/tab-worker.ts +33 -4
  89. package/src/tools/eval.ts +13 -2
  90. package/src/tools/find.ts +7 -0
  91. package/src/tools/gh-cache-invalidation.ts +200 -0
  92. package/src/tools/github-cache.ts +25 -0
  93. package/src/tools/inspect-image.ts +2 -2
  94. package/src/tools/path-utils.ts +28 -2
  95. package/src/tools/plan-mode-guard.ts +52 -7
  96. package/src/tools/read.ts +25 -12
  97. package/src/tools/search.ts +38 -3
  98. package/src/tools/write.ts +2 -2
  99. package/src/tools/yield.ts +10 -1
  100. package/src/utils/commit-message-generator.ts +2 -2
  101. package/src/utils/enhanced-paste.ts +30 -2
  102. package/src/web/search/providers/codex.ts +37 -8
@@ -109,6 +109,7 @@ import {
109
109
  extractExplicitThinkingSelector,
110
110
  formatModelSelectorValue,
111
111
  formatModelString,
112
+ getModelMatchPreferences,
112
113
  parseModelString,
113
114
  type ResolvedModelRoleValue,
114
115
  resolveModelRoleValue,
@@ -283,6 +284,11 @@ export type AgentSessionEventListener = (event: AgentSessionEvent) => void;
283
284
  export type AsyncJobSnapshotItem = Pick<AsyncJob, "id" | "type" | "status" | "label" | "startTime">;
284
285
 
285
286
  const EMPTY_STOP_MAX_RETRIES = 3;
287
+ const NON_WHITESPACE_RE = /\S/;
288
+
289
+ function hasNonWhitespace(value: string): boolean {
290
+ return NON_WHITESPACE_RE.test(value);
291
+ }
286
292
 
287
293
  export interface AsyncJobSnapshot {
288
294
  running: AsyncJobSnapshotItem[];
@@ -5445,7 +5451,7 @@ export class AgentSession {
5445
5451
 
5446
5452
  const currentModel = this.model;
5447
5453
  if (!currentModel) return undefined;
5448
- const matchPreferences = { usageOrder: this.settings.getStorage()?.getModelUsageOrder() };
5454
+ const matchPreferences = getModelMatchPreferences(this.settings);
5449
5455
  const models: ResolvedRoleModel[] = [];
5450
5456
 
5451
5457
  for (const role of roleOrder) {
@@ -6539,9 +6545,13 @@ export class AgentSession {
6539
6545
  this.#retryAttempt = 0;
6540
6546
  }
6541
6547
  this.#resolveRetry();
6548
+ // Tool-use orphans corrupt Anthropic message history (tool_result without
6549
+ // matching tool_use). Always remove them even when the retry cap is hit.
6550
+ if (assistantMessage.stopReason === "toolUse") {
6551
+ this.#removeEmptyStopFromActiveContext(assistantMessage);
6552
+ }
6542
6553
  return true;
6543
6554
  }
6544
-
6545
6555
  this.#removeEmptyStopFromActiveContext(assistantMessage);
6546
6556
  this.agent.appendMessage({
6547
6557
  role: "developer",
@@ -6554,12 +6564,26 @@ export class AgentSession {
6554
6564
  }
6555
6565
 
6556
6566
  #isEmptyAssistantStop(assistantMessage: AssistantMessage): boolean {
6557
- if (assistantMessage.stopReason !== "stop") return false;
6558
- return !assistantMessage.content.some(content => {
6559
- if (content.type === "text") return content.text.trim().length > 0;
6560
- if (content.type === "thinking") return content.thinking.trim().length > 0;
6561
- return content.type === "toolCall";
6562
- });
6567
+ switch (assistantMessage.stopReason) {
6568
+ case "stop":
6569
+ for (const content of assistantMessage.content) {
6570
+ if (content.type === "toolCall") return false;
6571
+ if (content.type === "text" && hasNonWhitespace(content.text)) return false;
6572
+ if (content.type === "thinking" && hasNonWhitespace(content.thinking)) return false;
6573
+ }
6574
+ return true;
6575
+ case "toolUse":
6576
+ // An orphaned toolUse stop (no tool_use block) corrupts Anthropic history:
6577
+ // a later tool_result has nothing to anchor to. Thinking alone cannot anchor
6578
+ // a tool_result, so it does not rescue a toolUse stop here.
6579
+ for (const content of assistantMessage.content) {
6580
+ if (content.type === "toolCall") return false;
6581
+ if (content.type === "text" && hasNonWhitespace(content.text)) return false;
6582
+ }
6583
+ return true;
6584
+ default:
6585
+ return false;
6586
+ }
6563
6587
  }
6564
6588
 
6565
6589
  #emptyStopRetryReminder(): string {
@@ -7143,7 +7167,7 @@ export class AgentSession {
7143
7167
 
7144
7168
  return resolveModelRoleValue(roleModelStr, availableModels, {
7145
7169
  settings: this.settings,
7146
- matchPreferences: { usageOrder: this.settings.getStorage()?.getModelUsageOrder() },
7170
+ matchPreferences: getModelMatchPreferences(this.settings),
7147
7171
  modelRegistry: this.#modelRegistry,
7148
7172
  });
7149
7173
  }
@@ -7874,11 +7898,12 @@ export class AgentSession {
7874
7898
  #isTransientTransportErrorMessage(errorMessage: string): boolean {
7875
7899
  // Match: overloaded_error, provider returned error, rate limit, 429, 500, 502, 503, 504,
7876
7900
  // service unavailable, provider-suggested retry, network/connection/socket errors, fetch failed,
7877
- // terminated, retry delay exceeded, Bun HTTP/2 stream resets (RST_STREAM / REFUSED_STREAM /
7878
- // ENHANCE_YOUR_CALM, surfaced verbatim from src/http/h2_client/dispatch.zig)
7901
+ // gateway upstream failures, terminated, retry delay exceeded, Bun HTTP/2 stream resets
7902
+ // (RST_STREAM / REFUSED_STREAM / ENHANCE_YOUR_CALM, surfaced verbatim from
7903
+ // src/http/h2_client/dispatch.zig)
7879
7904
  return (
7880
7905
  isUnexpectedSocketCloseMessage(errorMessage) ||
7881
- /overloaded|provider.?returned.?error|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|retry your request|network.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|socket hang up|timed? out|timeout|terminated|retry delay|stream stall|no error details in response|HTTP2(?:StreamReset|RefusedStream|EnhanceYourCalm)/i.test(
7906
+ /overloaded|provider.?returned.?error|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|retry your request|network.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|upstream.?request.?failed|reset before headers|socket hang up|timed? out|timeout|terminated|retry delay|stream stall|no error details in response|HTTP2(?:StreamReset|RefusedStream|EnhanceYourCalm)/i.test(
7882
7907
  errorMessage,
7883
7908
  )
7884
7909
  );
@@ -10,6 +10,8 @@ export type {
10
10
  AuthCredentialStore,
11
11
  AuthStorageData,
12
12
  AuthStorageOptions,
13
+ CredentialOrigin,
14
+ CredentialOriginKind,
13
15
  OAuthCredential,
14
16
  SerializedAuthStorage,
15
17
  SnapshotResponse,
@@ -845,11 +845,18 @@ function writeTerminalBreadcrumb(cwd: string, sessionFile: string): void {
845
845
  Bun.write(breadcrumbFile, content).catch(() => {});
846
846
  }
847
847
 
848
+ interface TerminalBreadcrumb {
849
+ cwd: string;
850
+ sessionFile: string;
851
+ }
852
+
848
853
  /**
849
- * Read the terminal breadcrumb for the current terminal, scoped to a cwd.
850
- * Returns the session file path if it exists and matches the cwd, null otherwise.
854
+ * Read the raw terminal breadcrumb for the current terminal.
855
+ * Returns the recorded cwd + session file (verified to exist) regardless of
856
+ * whether the recorded cwd still matches the current one. Callers decide how
857
+ * to interpret a cwd mismatch (e.g. a moved/renamed worktree).
851
858
  */
852
- async function readTerminalBreadcrumb(cwd: string): Promise<string | null> {
859
+ async function readTerminalBreadcrumbEntry(): Promise<TerminalBreadcrumb | null> {
853
860
  const terminalId = getTerminalId();
854
861
  if (!terminalId) return null;
855
862
 
@@ -862,12 +869,9 @@ async function readTerminalBreadcrumb(cwd: string): Promise<string | null> {
862
869
  const breadcrumbCwd = lines[0];
863
870
  const sessionFile = lines[1];
864
871
 
865
- // Only return if cwd matches (user might have cd'd)
866
- if (path.resolve(breadcrumbCwd) !== path.resolve(cwd)) return null;
867
-
868
872
  // Verify the session file still exists
869
873
  const stat = fs.statSync(sessionFile, { throwIfNoEntry: false });
870
- if (stat?.isFile()) return sessionFile;
874
+ if (stat?.isFile()) return { cwd: breadcrumbCwd, sessionFile };
871
875
  } catch (err) {
872
876
  if (!isEnoent(err)) logger.debug("Terminal breadcrumb read failed", { err });
873
877
  // Breadcrumb doesn't exist or is corrupt — fall through
@@ -1968,6 +1972,7 @@ export class SessionManager {
1968
1972
  #inMemoryArtifactCounter = 0;
1969
1973
  readonly #blobStore: BlobStore;
1970
1974
  #suppressBreadcrumb = false;
1975
+ #sessionNameChangedCallbacks = new Set<() => void>();
1971
1976
 
1972
1977
  private constructor(
1973
1978
  private cwd: string,
@@ -2163,19 +2168,24 @@ export class SessionManager {
2163
2168
  /**
2164
2169
  * Move the session to a new working directory.
2165
2170
  * Moves session files and artifacts on disk, updates all internal references,
2166
- * and rewrites the session header with the new cwd.
2171
+ * and rewrites the session header with the new cwd. When provided,
2172
+ * `targetSessionDir` is used instead of deriving the default directory for
2173
+ * the new cwd (for `--continue --session-dir` / `--resume --session-dir`).
2167
2174
  */
2168
- async moveTo(newCwd: string): Promise<void> {
2175
+ async moveTo(newCwd: string, targetSessionDir?: string): Promise<void> {
2169
2176
  const resolvedCwd = path.resolve(newCwd);
2170
- if (resolvedCwd === this.cwd) return;
2177
+ if (resolvedCwd === this.cwd && (!targetSessionDir || path.resolve(targetSessionDir) === this.sessionDir)) return;
2171
2178
 
2172
2179
  const managedSessionsRoot = resolveManagedSessionRoot(this.sessionDir, this.cwd);
2173
- const newSessionDir = managedSessionsRoot
2174
- ? computeDefaultSessionDir(resolvedCwd, this.storage, managedSessionsRoot)
2175
- : computeDefaultSessionDir(resolvedCwd, this.storage);
2180
+ const newSessionDir = targetSessionDir
2181
+ ? path.resolve(targetSessionDir)
2182
+ : managedSessionsRoot
2183
+ ? computeDefaultSessionDir(resolvedCwd, this.storage, managedSessionsRoot)
2184
+ : computeDefaultSessionDir(resolvedCwd, this.storage);
2176
2185
  let hadSessionFile = false;
2177
2186
 
2178
2187
  if (this.persist && this.#sessionFile) {
2188
+ this.storage.ensureDirSync(newSessionDir);
2179
2189
  // Close the persist writer before moving files
2180
2190
  await this.#closePersistWriter();
2181
2191
  this.#persistChain = Promise.resolve();
@@ -2186,25 +2196,29 @@ export class SessionManager {
2186
2196
  const newSessionFile = path.join(newSessionDir, path.basename(oldSessionFile));
2187
2197
  const oldArtifactDir = oldSessionFile.slice(0, -6); // strip .jsonl
2188
2198
  const newArtifactDir = newSessionFile.slice(0, -6);
2199
+ const sameSessionFile = path.resolve(oldSessionFile) === path.resolve(newSessionFile);
2200
+ const sameArtifactDir = path.resolve(oldArtifactDir) === path.resolve(newArtifactDir);
2189
2201
  hadSessionFile = this.storage.existsSync(oldSessionFile);
2190
2202
  let movedSessionFile = false;
2191
2203
  let movedArtifactDir = false;
2192
2204
 
2193
2205
  try {
2194
2206
  // Guard: session file may not exist yet (no assistant messages persisted)
2195
- if (hadSessionFile) {
2207
+ if (hadSessionFile && !sameSessionFile) {
2196
2208
  await fs.promises.rename(oldSessionFile, newSessionFile);
2197
2209
  movedSessionFile = true;
2198
2210
  }
2199
2211
 
2200
- try {
2201
- const stat = await fs.promises.stat(oldArtifactDir);
2202
- if (stat.isDirectory()) {
2203
- await fs.promises.rename(oldArtifactDir, newArtifactDir);
2204
- movedArtifactDir = true;
2212
+ if (!sameArtifactDir) {
2213
+ try {
2214
+ const stat = await fs.promises.stat(oldArtifactDir);
2215
+ if (stat.isDirectory()) {
2216
+ await fs.promises.rename(oldArtifactDir, newArtifactDir);
2217
+ movedArtifactDir = true;
2218
+ }
2219
+ } catch (err) {
2220
+ if (!isEnoent(err)) throw err;
2205
2221
  }
2206
- } catch (err) {
2207
- if (!isEnoent(err)) throw err;
2208
2222
  }
2209
2223
  } catch (err) {
2210
2224
  if (movedArtifactDir) {
@@ -2730,6 +2744,23 @@ export class SessionManager {
2730
2744
  return this.#sessionName;
2731
2745
  }
2732
2746
 
2747
+ onSessionNameChanged(cb: () => void): () => void {
2748
+ this.#sessionNameChangedCallbacks.add(cb);
2749
+ return () => {
2750
+ this.#sessionNameChangedCallbacks.delete(cb);
2751
+ };
2752
+ }
2753
+
2754
+ #fireSessionNameChanged(): void {
2755
+ for (const cb of [...this.#sessionNameChangedCallbacks]) {
2756
+ try {
2757
+ cb();
2758
+ } catch (err) {
2759
+ logger.warn("SessionManager: session name change hook failed", { error: String(err) });
2760
+ }
2761
+ }
2762
+ }
2763
+
2733
2764
  /** Strip C0/C1 control characters (includes ESC, so removes ANSI sequences) and collapse whitespace. */
2734
2765
  static #sanitizeName(name: string): string {
2735
2766
  return name
@@ -2765,6 +2796,7 @@ export class SessionManager {
2765
2796
  if (this.persist && sessionFile && this.storage.existsSync(sessionFile)) {
2766
2797
  await this.#rewriteFile();
2767
2798
  }
2799
+ this.#fireSessionNameChanged();
2768
2800
  return true;
2769
2801
  }
2770
2802
 
@@ -3491,8 +3523,49 @@ export class SessionManager {
3491
3523
  ): Promise<SessionManager> {
3492
3524
  const dir = sessionDir ?? SessionManager.getDefaultSessionDir(cwd, undefined, storage);
3493
3525
  // Prefer terminal-scoped breadcrumb (handles concurrent sessions correctly)
3494
- const terminalSession = await readTerminalBreadcrumb(cwd);
3495
- const mostRecent = terminalSession ?? (await findMostRecentSession(dir, storage));
3526
+ const breadcrumb = await readTerminalBreadcrumbEntry();
3527
+ const breadcrumbCwd = breadcrumb ? path.resolve(breadcrumb.cwd) : undefined;
3528
+ const resolvedCwd = path.resolve(cwd);
3529
+ let mostRecent: string | null | undefined;
3530
+ if (breadcrumb && breadcrumbCwd !== resolvedCwd) {
3531
+ // The terminal's last session was started in a different cwd. If that cwd no
3532
+ // longer exists (e.g. `git worktree move`/dir rename) and the new location has
3533
+ // no sessions of its own, re-root the session here instead of silently starting
3534
+ // fresh — otherwise the relocated session would be unreachable via --continue.
3535
+ // When an explicit sessionDir is reused across the move, the stale breadcrumb
3536
+ // file itself may be the most recent entry there; don't count it as a
3537
+ // current-directory session. If that shared dir also contains an older session
3538
+ // that already belongs to the current cwd, prefer that local session instead
3539
+ // of re-rooting the stale breadcrumb over it.
3540
+ const resolvedBreadcrumbCwd = path.resolve(breadcrumb.cwd);
3541
+ mostRecent = await findMostRecentSession(dir, storage);
3542
+ const sourceCwdGone = !fs.existsSync(resolvedBreadcrumbCwd);
3543
+ const breadcrumbSessionFile = path.resolve(breadcrumb.sessionFile);
3544
+ const mostRecentIsBreadcrumb =
3545
+ mostRecent !== null && mostRecent !== undefined && path.resolve(mostRecent) === breadcrumbSessionFile;
3546
+ let hasCurrentCwdSession = false;
3547
+ if (sourceCwdGone && mostRecentIsBreadcrumb) {
3548
+ const currentCwdSession = (await SessionManager.list(cwd, dir, storage)).find(
3549
+ session =>
3550
+ path.resolve(session.path) !== breadcrumbSessionFile &&
3551
+ session.cwd &&
3552
+ path.resolve(session.cwd) === resolvedCwd,
3553
+ );
3554
+ if (currentCwdSession) {
3555
+ mostRecent = currentCwdSession.path;
3556
+ hasCurrentCwdSession = true;
3557
+ }
3558
+ }
3559
+ const relocated = sourceCwdGone && (mostRecent === null || (mostRecentIsBreadcrumb && !hasCurrentCwdSession));
3560
+ if (relocated) {
3561
+ process.stderr.write(`Re-rooting moved session from ${resolvedBreadcrumbCwd} to ${resolvedCwd}.\n`);
3562
+ const manager = await SessionManager.open(breadcrumb.sessionFile, undefined, storage);
3563
+ await manager.moveTo(cwd, sessionDir);
3564
+ return manager;
3565
+ }
3566
+ }
3567
+ const terminalSession = breadcrumb && breadcrumbCwd === resolvedCwd ? breadcrumb.sessionFile : null;
3568
+ if (mostRecent === undefined) mostRecent = terminalSession ?? (await findMostRecentSession(dir, storage));
3496
3569
  const manager = new SessionManager(cwd, dir, true, storage);
3497
3570
  if (mostRecent) {
3498
3571
  await manager.#initSessionFile(mostRecent);
@@ -34,7 +34,11 @@ import { SessionManager } from "../session/session-manager";
34
34
  import { truncateTail } from "../session/streaming-output";
35
35
  import type { ContextFileEntry } from "../tools";
36
36
  import { normalizeSchema } from "../tools/jtd-to-json-schema";
37
- import { buildOutputValidator, summarizeValidationFailure } from "../tools/output-schema-validator";
37
+ import {
38
+ buildOutputValidator,
39
+ type OutputValidator,
40
+ summarizeValidationFailure,
41
+ } from "../tools/output-schema-validator";
38
42
 
39
43
  import { type ReportFindingDetails, toReviewFinding } from "../tools/review";
40
44
  import { ToolAbortError } from "../tools/tool-errors";
@@ -256,21 +260,40 @@ function extractCompletionData(parsed: unknown): unknown {
256
260
  return parsed;
257
261
  }
258
262
 
259
- function normalizeCompleteData(data: unknown, reportFindings?: ReviewFinding[]): unknown {
260
- let normalized = parseStringifiedJson(data ?? null);
263
+ /**
264
+ * Resolve the final yielded payload, optionally splicing collected
265
+ * `report_finding` entries into a top-level `findings` array.
266
+ *
267
+ * Injection is suppressed when an active validator would reject the augmented
268
+ * payload (e.g. a caller-supplied schema with `additionalProperties: false`
269
+ * that does not declare `findings`). That keeps the in-tool yield validator
270
+ * (which only sees the raw, pre-injection data) in lockstep with this
271
+ * post-mortem validator — honoring the "accepted in-tool ⇒ accepted
272
+ * post-mortem" guarantee documented in `output-schema-validator.ts`. The
273
+ * dropped findings are still preserved verbatim in the agent's progress
274
+ * stream and JSONL artifact, so no information is lost when injection is
275
+ * suppressed.
276
+ */
277
+ function normalizeCompleteData(
278
+ data: unknown,
279
+ reportFindings: ReviewFinding[] | undefined,
280
+ validator: OutputValidator | undefined,
281
+ ): unknown {
282
+ const normalized = parseStringifiedJson(data ?? null);
261
283
  if (
262
- Array.isArray(reportFindings) &&
263
- reportFindings.length > 0 &&
264
- normalized &&
265
- typeof normalized === "object" &&
266
- !Array.isArray(normalized)
284
+ !Array.isArray(reportFindings) ||
285
+ reportFindings.length === 0 ||
286
+ !normalized ||
287
+ typeof normalized !== "object" ||
288
+ Array.isArray(normalized)
267
289
  ) {
268
- const record = normalized as Record<string, unknown>;
269
- if (!("findings" in record)) {
270
- normalized = { ...record, findings: reportFindings };
271
- }
290
+ return normalized;
272
291
  }
273
- return normalized;
292
+ const record = normalized as Record<string, unknown>;
293
+ if ("findings" in record) return normalized;
294
+ const injected = { ...record, findings: reportFindings };
295
+ if (validator && !validator.validate(injected).success) return normalized;
296
+ return injected;
274
297
  }
275
298
 
276
299
  function resolveFallbackCompletion(rawOutput: string, outputSchema: unknown): { data: unknown } | null {
@@ -288,6 +311,15 @@ export interface YieldItem {
288
311
  data?: unknown;
289
312
  status?: "success" | "aborted";
290
313
  error?: string;
314
+ /**
315
+ * Set by the in-tool yield validator when it exhausted its retry budget
316
+ * (MAX_SCHEMA_RETRIES) and accepted a schema-invalid payload anyway.
317
+ * `finalizeSubprocessOutput` honors this by serializing the payload and
318
+ * surfacing a stderr warning, instead of re-emitting `schema_violation`
319
+ * — which would silently swap the subagent's "accepted" view for a
320
+ * different, opaque error blob in the parent's view of the result.
321
+ */
322
+ schemaOverridden?: boolean;
291
323
  }
292
324
 
293
325
  interface FinalizeSubprocessOutputArgs {
@@ -308,7 +340,8 @@ interface FinalizeSubprocessOutputResult {
308
340
  abortedViaYield: boolean;
309
341
  hasYield: boolean;
310
342
  }
311
-
343
+ export const SUBAGENT_WARNING_SCHEMA_OVERRIDDEN =
344
+ "SYSTEM WARNING: Subagent exhausted schema-retry budget; result was accepted despite failing the output schema.";
312
345
  export const SUBAGENT_WARNING_NULL_YIELD = "SYSTEM WARNING: Subagent called yield with null data.";
313
346
  export const SUBAGENT_WARNING_MISSING_YIELD =
314
347
  "SYSTEM WARNING: Subagent exited without calling yield tool after 3 reminders.";
@@ -360,30 +393,32 @@ export function finalizeSubprocessOutput(args: FinalizeSubprocessOutputArgs): Fi
360
393
  if (submitData === null || submitData === undefined) {
361
394
  rawOutput = rawOutput ? `${SUBAGENT_WARNING_NULL_YIELD}\n\n${rawOutput}` : SUBAGENT_WARNING_NULL_YIELD;
362
395
  } else {
363
- const completeData = normalizeCompleteData(submitData, reportFindings);
364
396
  const { validator, error: schemaError } = buildOutputValidator(outputSchema);
365
- if (schemaError) {
366
- rawOutput = `{"error":"schema_violation","message":"invalid output schema: ${schemaError.replace(/"/g, '\\"')}"}`;
367
- stderr = `schema_violation: invalid output schema: ${schemaError}`;
368
- exitCode = 1;
397
+ const overridden = lastYield?.schemaOverridden === true;
398
+ const completeData = normalizeCompleteData(submitData, reportFindings, validator);
399
+ const result =
400
+ schemaError || overridden
401
+ ? { success: true as const }
402
+ : (validator?.validate(completeData) ?? { success: true as const });
403
+ if (!result.success) {
404
+ const summary = summarizeValidationFailure(result, completeData, validator?.requiredFields ?? []);
405
+ const outcome = buildSchemaViolationOutcome(summary, completeData);
406
+ rawOutput = outcome.rawOutput;
407
+ stderr = outcome.stderr;
408
+ exitCode = outcome.exitCode;
369
409
  } else {
370
- const result = validator?.validate(completeData) ?? { success: true as const };
371
- if (!result.success) {
372
- const summary = summarizeValidationFailure(result, completeData, validator?.requiredFields ?? []);
373
- const outcome = buildSchemaViolationOutcome(summary, completeData);
374
- rawOutput = outcome.rawOutput;
375
- stderr = outcome.stderr;
376
- exitCode = outcome.exitCode;
377
- } else {
378
- try {
379
- rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
380
- } catch (err) {
381
- const errorMessage = err instanceof Error ? err.message : String(err);
382
- rawOutput = `{"error":"Failed to serialize yield data: ${errorMessage}"}`;
383
- }
384
- exitCode = 0;
385
- stderr = "";
410
+ try {
411
+ rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
412
+ } catch (err) {
413
+ const errorMessage = err instanceof Error ? err.message : String(err);
414
+ rawOutput = `{"error":"Failed to serialize yield data: ${errorMessage}"}`;
386
415
  }
416
+ exitCode = 0;
417
+ stderr = overridden
418
+ ? SUBAGENT_WARNING_SCHEMA_OVERRIDDEN
419
+ : schemaError
420
+ ? `invalid output schema: ${schemaError}`
421
+ : "";
387
422
  }
388
423
  }
389
424
  }
@@ -393,8 +428,8 @@ export function finalizeSubprocessOutput(args: FinalizeSubprocessOutputArgs): Fi
393
428
  const hasOutputSchema = normalizedSchema !== undefined && !schemaError;
394
429
  const fallback = allowFallback ? resolveFallbackCompletion(rawOutput, outputSchema) : null;
395
430
  if (fallback) {
396
- const completeData = normalizeCompleteData(fallback.data, reportFindings);
397
431
  const { validator } = buildOutputValidator(outputSchema);
432
+ const completeData = normalizeCompleteData(fallback.data, reportFindings, validator);
398
433
  const result = validator?.validate(completeData) ?? { success: true as const };
399
434
  if (!result.success) {
400
435
  const summary = summarizeValidationFailure(result, completeData, validator?.requiredFields ?? []);
@@ -633,12 +633,11 @@ function renderAgentProgress(
633
633
  let statusLine: string;
634
634
  if (progress.status === "running") {
635
635
  const bullet = theme.fg("accent", "•");
636
- const name = shimmerEnabled()
637
- ? shimmerText(displayId, theme)
638
- : theme.fg("accent", description ? theme.bold(displayId) : displayId);
636
+ const name = theme.fg("accent", description ? theme.bold(displayId) : displayId);
639
637
  statusLine = `${indent}${bullet} ${name}`;
640
638
  if (description) {
641
- statusLine += theme.fg("accent", `: ${description}`);
639
+ const desc = shimmerEnabled() ? shimmerText(description, theme) : theme.fg("accent", description);
640
+ statusLine += `${theme.fg("accent", ":")} ${desc}`;
642
641
  }
643
642
  } else {
644
643
  statusLine = `${indent}${theme.fg(iconColor, icon)} ${theme.fg("accent", titlePart)}`;
package/src/tools/bash.ts CHANGED
@@ -29,6 +29,7 @@ import { type BashInteractiveResult, runInteractiveBashPty } from "./bash-intera
29
29
  import { checkBashInterception } from "./bash-interceptor";
30
30
  import { canUseInteractiveBashPty } from "./bash-pty-selection";
31
31
  import { expandInternalUrls, type InternalUrlExpansionOptions } from "./bash-skill-urls";
32
+ import { invalidateGithubCacheForBashCommand } from "./gh-cache-invalidation";
32
33
  import { formatStyledTruncationWarning, type OutputMeta, stripOutputNotice } from "./output-meta";
33
34
  import { resolveToCwd } from "./path-utils";
34
35
  import { capPreviewLines, formatToolWorkingDirectory, replaceTabs } from "./render-utils";
@@ -721,6 +722,12 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
721
722
  cwd = await expandInternalUrls(cwd, { ...internalUrlOptions, noEscape: true });
722
723
  }
723
724
 
725
+ // Best-effort cache invalidation: drop github-cache rows for any issue/PR
726
+ // number touched by a mutating `gh` subcommand inside this bash call so
727
+ // subsequent issue:// / pr:// reads pick up the post-mutation state
728
+ // instead of the cached pre-mutation snapshot.
729
+ invalidateGithubCacheForBashCommand(command);
730
+
724
731
  const commandCwd = cwd ? resolveToCwd(cwd, this.session.cwd) : this.session.cwd;
725
732
  let cwdStat: fs.Stats;
726
733
  try {
@@ -101,11 +101,23 @@ export async function acquireTab(
101
101
  if (opts.dialogs !== undefined && opts.dialogs !== existing.dialogPolicy) {
102
102
  await releaseTab(name, { kill: false });
103
103
  } else {
104
+ const reuseSteps: string[] = [];
105
+ if (opts.viewport) {
106
+ const dsf = opts.viewport.deviceScaleFactor;
107
+ reuseSteps.push(
108
+ `await page.setViewport({ width: ${opts.viewport.width}, height: ${opts.viewport.height}, deviceScaleFactor: ${dsf === undefined ? "undefined" : String(dsf)} });`,
109
+ );
110
+ }
104
111
  if (opts.url) {
112
+ reuseSteps.push(
113
+ `await tab.goto(${JSON.stringify(opts.url)}, { waitUntil: ${JSON.stringify(opts.waitUntil ?? "load")} });`,
114
+ );
115
+ }
116
+ if (reuseSteps.length) {
105
117
  await runInTabWithSnapshot(
106
118
  name,
107
119
  {
108
- code: `await tab.goto(${JSON.stringify(opts.url)}, { waitUntil: ${JSON.stringify(opts.waitUntil ?? "load")} });`,
120
+ code: reuseSteps.join("\n"),
109
121
  timeoutMs: opts.timeoutMs,
110
122
  signal: opts.signal,
111
123
  },
@@ -27,7 +27,7 @@ import {
27
27
  DEFAULT_VIEWPORT,
28
28
  loadPuppeteerInWorker,
29
29
  } from "./launch";
30
- import { extractReadableFromHtml, type ReadableFormat, type ReadableResult } from "./readable";
30
+ import { extractReadableFromHtml, type ReadableFormat } from "./readable";
31
31
  import type {
32
32
  Observation,
33
33
  ObservationEntry,
@@ -97,7 +97,7 @@ interface TabApi {
97
97
  ): Promise<void>;
98
98
  observe(opts?: { includeAll?: boolean; viewportOnly?: boolean }): Promise<Observation>;
99
99
  screenshot(opts?: ScreenshotOptions): Promise<ScreenshotResult>;
100
- extract(format?: ReadableFormat): Promise<ReadableResult | null>;
100
+ extract(format?: ReadableFormat): Promise<string>;
101
101
  click(selector: string): Promise<void>;
102
102
  type(selector: string, text: string): Promise<void>;
103
103
  fill(selector: string, value: string): Promise<void>;
@@ -167,6 +167,25 @@ function cloneSafe(value: unknown): unknown {
167
167
  return String(value);
168
168
  }
169
169
 
170
+ /**
171
+ * Strip `user:pass@` from a URL before surfacing it in tool outputs / details
172
+ * so Basic Auth credentials don't leak into transcripts. Returns the original
173
+ * string verbatim when it doesn't parse as a URL or when there are no
174
+ * credentials to redact.
175
+ */
176
+ function redactUrlCredentials(url: string): string {
177
+ if (!url || (!url.includes("@") && !url.includes("//"))) return url;
178
+ try {
179
+ const parsed = new URL(url);
180
+ if (!parsed.username && !parsed.password) return url;
181
+ parsed.username = "";
182
+ parsed.password = "";
183
+ return parsed.toString();
184
+ } catch {
185
+ return url;
186
+ }
187
+ }
188
+
170
189
  function errorPayload(error: unknown): RunErrorPayload {
171
190
  if (error instanceof ToolAbortError) {
172
191
  return { name: error.name, message: error.message, stack: error.stack, isToolError: false, isAbort: true };
@@ -491,7 +510,7 @@ export class WorkerCore {
491
510
  const targetId = this.#targetId ?? (await targetIdForPage(page));
492
511
  this.#targetId = targetId;
493
512
  return {
494
- url: page.url(),
513
+ url: redactUrlCredentials(page.url()),
495
514
  title: await page.title().catch(() => undefined),
496
515
  viewport: page.viewport() ?? DEFAULT_VIEWPORT,
497
516
  targetId,
@@ -677,7 +696,17 @@ export class WorkerCore {
677
696
  screenshot: async opts => await this.#captureScreenshot(session, displays, screenshots, signal, opts),
678
697
  extract: async (format = "markdown") => {
679
698
  const html = (await untilAborted(signal, () => page.content())) as string;
680
- return extractReadableFromHtml(html, page.url(), format);
699
+ const result = await extractReadableFromHtml(html, page.url(), format);
700
+ if (!result) {
701
+ throw new ToolError(`tab.extract(${JSON.stringify(format)}) found no readable content on ${page.url()}`);
702
+ }
703
+ const content = format === "markdown" ? result.markdown : result.text;
704
+ if (!content) {
705
+ throw new ToolError(
706
+ `tab.extract(${JSON.stringify(format)}) produced empty ${format} content for ${page.url()}`,
707
+ );
708
+ }
709
+ return content;
681
710
  },
682
711
  click: async selector => {
683
712
  const resolved = normalizeSelector(selector);
package/src/tools/eval.ts CHANGED
@@ -88,12 +88,21 @@ function formatDisplayOutputsForText(outputs: EvalDisplayOutput[]): string {
88
88
  export interface EvalToolDescriptionOptions {
89
89
  py?: boolean;
90
90
  js?: boolean;
91
+ /**
92
+ * Whether `agent()` is allowed in this session. Driven by the parent's
93
+ * spawn policy (`getSessionSpawns`). Defaults to `true` for backward
94
+ * compatibility — when the session forbids spawning, the prelude doc
95
+ * omits the `agent()` entry so the model does not promise itself a
96
+ * helper that will only ever throw "spawns disabled".
97
+ */
98
+ spawns?: boolean;
91
99
  }
92
100
 
93
101
  export function getEvalToolDescription(options: EvalToolDescriptionOptions = {}): string {
94
102
  const py = options.py ?? true;
95
103
  const js = options.js ?? true;
96
- return prompt.render(evalDescription, { py, js });
104
+ const spawns = options.spawns ?? true;
105
+ return prompt.render(evalDescription, { py, js, spawns });
97
106
  }
98
107
 
99
108
  export interface EvalToolOptions {
@@ -169,7 +178,9 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
169
178
  get description(): string {
170
179
  if (!this.session) return getEvalToolDescription();
171
180
  const backends = resolveEvalBackends(this.session);
172
- return getEvalToolDescription({ py: backends.python, js: backends.js });
181
+ const sessionSpawns = this.session.getSessionSpawns?.() ?? "*";
182
+ const spawnsAllowed = sessionSpawns !== "" && sessionSpawns !== null;
183
+ return getEvalToolDescription({ py: backends.python, js: backends.js, spawns: spawnsAllowed });
173
184
  }
174
185
  readonly parameters = evalSchema;
175
186
  readonly concurrency = "exclusive";
package/src/tools/find.ts CHANGED
@@ -353,6 +353,13 @@ export class FindTool implements AgentTool<typeof findSchema, FindToolDetails> {
353
353
  maxResults: effectiveLimit,
354
354
  sortByMtime: true,
355
355
  gitignore: useGitignore,
356
+ // parseFindPattern explicitly prepends "**/" when the user's
357
+ // pattern begins with a glob (so `*.ts` becomes `**/*.ts`).
358
+ // Anything that arrives here without "**/" was scoped to a
359
+ // single directory by the user (e.g. `dir/*`); disable the
360
+ // native auto-recursion so `dir/*` does not silently match
361
+ // `dir/sub/nested.ts`.
362
+ recursive: false,
356
363
  signal: combinedSignal,
357
364
  },
358
365
  onMatch,