@oh-my-pi/pi-coding-agent 15.10.1 → 15.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/CHANGELOG.md +67 -0
  2. package/dist/types/cli/startup-cwd.d.ts +2 -0
  3. package/dist/types/commands/launch.d.ts +3 -0
  4. package/dist/types/config/keybindings.d.ts +2 -2
  5. package/dist/types/config/model-provider-priority.d.ts +1 -0
  6. package/dist/types/config/model-resolver.d.ts +4 -1
  7. package/dist/types/config/settings.d.ts +7 -2
  8. package/dist/types/debug/report-bundle.d.ts +3 -0
  9. package/dist/types/edit/file-snapshot-store.d.ts +18 -10
  10. package/dist/types/eval/py/__tests__/prelude.test.d.ts +1 -0
  11. package/dist/types/extensibility/extensions/types.d.ts +4 -1
  12. package/dist/types/lsp/client.d.ts +10 -0
  13. package/dist/types/main.d.ts +3 -9
  14. package/dist/types/mcp/tool-bridge.d.ts +2 -0
  15. package/dist/types/modes/components/custom-editor.d.ts +1 -1
  16. package/dist/types/modes/components/status-line.d.ts +2 -0
  17. package/dist/types/modes/controllers/event-controller.d.ts +17 -0
  18. package/dist/types/modes/interactive-mode.d.ts +1 -0
  19. package/dist/types/modes/magic-keywords.d.ts +1 -1
  20. package/dist/types/modes/markdown-prose.d.ts +1 -1
  21. package/dist/types/modes/types.d.ts +3 -0
  22. package/dist/types/modes/workflow.d.ts +3 -3
  23. package/dist/types/session/auth-storage.d.ts +1 -1
  24. package/dist/types/session/session-manager.d.ts +5 -2
  25. package/dist/types/task/executor.d.ts +10 -0
  26. package/dist/types/tools/eval.d.ts +8 -0
  27. package/dist/types/tools/gh-cache-invalidation.d.ts +6 -0
  28. package/dist/types/tools/github-cache.d.ts +12 -0
  29. package/dist/types/tools/path-utils.d.ts +8 -0
  30. package/dist/types/tools/search.d.ts +2 -2
  31. package/dist/types/tools/yield.d.ts +8 -0
  32. package/package.json +9 -9
  33. package/src/cli/args.ts +3 -1
  34. package/src/cli/dry-balance-cli.ts +2 -4
  35. package/src/cli/startup-cwd.ts +68 -0
  36. package/src/commands/launch.ts +3 -0
  37. package/src/commit/model-selection.ts +3 -2
  38. package/src/config/model-provider-priority.ts +55 -0
  39. package/src/config/model-registry.ts +4 -22
  40. package/src/config/model-resolver.ts +39 -7
  41. package/src/config/settings.ts +86 -41
  42. package/src/debug/index.ts +8 -0
  43. package/src/debug/raw-sse-buffer.ts +7 -4
  44. package/src/debug/report-bundle.ts +9 -0
  45. package/src/edit/file-snapshot-store.ts +33 -1
  46. package/src/edit/hashline/filesystem.ts +2 -1
  47. package/src/eval/__tests__/llm-bridge.test.ts +20 -0
  48. package/src/eval/js/context-manager.ts +32 -15
  49. package/src/eval/llm-bridge.ts +14 -3
  50. package/src/eval/py/__tests__/prelude.test.ts +19 -0
  51. package/src/eval/py/executor.ts +23 -11
  52. package/src/eval/py/prelude.py +1 -1
  53. package/src/extensibility/extensions/types.ts +10 -1
  54. package/src/internal-urls/docs-index.generated.ts +3 -3
  55. package/src/lsp/client.ts +23 -11
  56. package/src/lsp/config.ts +11 -1
  57. package/src/lsp/index.ts +61 -9
  58. package/src/main.ts +91 -65
  59. package/src/mcp/tool-bridge.ts +2 -0
  60. package/src/memories/index.ts +2 -2
  61. package/src/modes/components/custom-editor.ts +143 -111
  62. package/src/modes/components/model-selector.ts +59 -13
  63. package/src/modes/components/oauth-selector.ts +33 -7
  64. package/src/modes/components/status-line.ts +19 -4
  65. package/src/modes/components/tips.txt +1 -1
  66. package/src/modes/components/user-message.ts +1 -1
  67. package/src/modes/controllers/event-controller.ts +26 -0
  68. package/src/modes/controllers/input-controller.ts +46 -7
  69. package/src/modes/interactive-mode.ts +107 -20
  70. package/src/modes/magic-keywords.ts +1 -1
  71. package/src/modes/markdown-prose.ts +1 -1
  72. package/src/modes/theme/shimmer.ts +20 -9
  73. package/src/modes/types.ts +3 -0
  74. package/src/modes/workflow.ts +10 -10
  75. package/src/prompts/system/workflow-notice.md +1 -1
  76. package/src/prompts/tools/bash.md +9 -0
  77. package/src/prompts/tools/browser.md +1 -1
  78. package/src/prompts/tools/eval.md +2 -1
  79. package/src/prompts/tools/read.md +2 -2
  80. package/src/sdk.ts +26 -9
  81. package/src/session/agent-session.ts +37 -12
  82. package/src/session/auth-storage.ts +2 -0
  83. package/src/session/session-manager.ts +96 -23
  84. package/src/task/executor.ts +71 -36
  85. package/src/task/render.ts +3 -4
  86. package/src/tools/bash.ts +7 -0
  87. package/src/tools/browser/tab-supervisor.ts +13 -1
  88. package/src/tools/browser/tab-worker.ts +33 -4
  89. package/src/tools/eval.ts +13 -2
  90. package/src/tools/find.ts +7 -0
  91. package/src/tools/gh-cache-invalidation.ts +200 -0
  92. package/src/tools/github-cache.ts +25 -0
  93. package/src/tools/inspect-image.ts +2 -2
  94. package/src/tools/path-utils.ts +28 -2
  95. package/src/tools/plan-mode-guard.ts +52 -7
  96. package/src/tools/read.ts +25 -12
  97. package/src/tools/search.ts +38 -3
  98. package/src/tools/write.ts +2 -2
  99. package/src/tools/yield.ts +10 -1
  100. package/src/utils/commit-message-generator.ts +2 -2
  101. package/src/utils/enhanced-paste.ts +30 -2
  102. package/src/web/search/providers/codex.ts +37 -8
@@ -8,6 +8,8 @@
8
8
  * from `@oh-my-pi/hashline`; the only coding-agent-specific concern here
9
9
  * is wiring it onto the per-session owner object.
10
10
  */
11
+ import * as fs from "node:fs";
12
+ import * as path from "node:path";
11
13
  import { InMemorySnapshotStore } from "@oh-my-pi/hashline";
12
14
  import { normalizeToLF } from "./normalize";
13
15
 
@@ -33,6 +35,36 @@ export function getFileSnapshotStore(session: FileSnapshotStoreOwner): InMemoryS
33
35
  return session.fileSnapshotStore;
34
36
  }
35
37
 
38
+ /**
39
+ * Canonicalize an absolute path into the stable key the snapshot store uses.
40
+ *
41
+ * Different code paths reach the snapshot store via different path forms:
42
+ * `read local://foo.md` records under the file's `fs.realpath` (the local
43
+ * protocol handler resolves symlinks); a subsequent `edit` may address the
44
+ * same artifact via `local://foo.md`, whose resolver does NOT realpath, or
45
+ * via the absolute path returned in the `[path#tag]` header. macOS adds the
46
+ * same hazard at the working-tree level (`/tmp/...` vs `/private/tmp/...`).
47
+ * Collapsing every key through `realpath` makes those forms fuse onto one
48
+ * snapshot entry, so a freshly-minted tag is never rejected as stale just
49
+ * because the lookup spelled the same file differently.
50
+ *
51
+ * Non-existent paths (new-file writes) fall back to a realpath of the parent
52
+ * directory + basename, then to the input. This keeps creates and updates on
53
+ * the same canonical key.
54
+ */
55
+ export function canonicalSnapshotKey(absolutePath: string): string {
56
+ try {
57
+ return fs.realpathSync.native(absolutePath);
58
+ } catch {
59
+ try {
60
+ const parent = fs.realpathSync.native(path.dirname(absolutePath));
61
+ return path.join(parent, path.basename(absolutePath));
62
+ } catch {
63
+ return absolutePath;
64
+ }
65
+ }
66
+ }
67
+
36
68
  /**
37
69
  * Read the full text of `absolutePath` (within {@link SNAPSHOT_MAX_BYTES}),
38
70
  * record it as a version snapshot, and return its content-hash tag. Returns
@@ -52,7 +84,7 @@ export async function recordFileSnapshot(
52
84
  const file = Bun.file(absolutePath);
53
85
  if (file.size > SNAPSHOT_MAX_BYTES) return undefined;
54
86
  const normalized = normalizeToLF(await file.text());
55
- return getFileSnapshotStore(session).record(absolutePath, normalized);
87
+ return getFileSnapshotStore(session).record(canonicalSnapshotKey(absolutePath), normalized);
56
88
  } catch {
57
89
  return undefined;
58
90
  }
@@ -23,6 +23,7 @@ import type { ToolSession } from "../../tools";
23
23
  import { assertEditableFileContent } from "../../tools/auto-generated-guard";
24
24
  import { invalidateFsScanAfterWrite } from "../../tools/fs-cache-invalidation";
25
25
  import { enforcePlanModeWrite, resolvePlanPath } from "../../tools/plan-mode-guard";
26
+ import { canonicalSnapshotKey } from "../file-snapshot-store";
26
27
  import { readEditFileText, serializeEditFileText } from "../read-file";
27
28
  import type { LspBatchRequest } from "../renderer";
28
29
 
@@ -81,7 +82,7 @@ export class HashlineFilesystem extends Filesystem {
81
82
  }
82
83
 
83
84
  canonicalPath(relativePath: string): string {
84
- return this.resolveAbsolute(relativePath);
85
+ return canonicalSnapshotKey(this.resolveAbsolute(relativePath));
85
86
  }
86
87
 
87
88
  async readText(relativePath: string): Promise<string> {
@@ -206,6 +206,26 @@ describe("runEvalLlm", () => {
206
206
  expect(result.details).toEqual({ model: "p/smol", tier: "smol", structured: false });
207
207
  });
208
208
 
209
+ it("supplies a non-empty systemPrompt when system is omitted (codex 'Instructions are required' guard)", async () => {
210
+ // The openai-codex Responses transformer drops `instructions` when no
211
+ // system prompt is provided, and the remote endpoint then 400s with
212
+ // "Instructions are required". runEvalLlm must always carry a non-empty
213
+ // systemPrompt so `llm("…")` without a `system` argument works.
214
+ const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
215
+ await runEvalLlm({ prompt: "q", model: "smol" }, { session: makeSession() });
216
+ const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
217
+ expect(ctx.systemPrompt).toBeDefined();
218
+ expect(ctx.systemPrompt?.length).toBeGreaterThan(0);
219
+ expect(ctx.systemPrompt?.[0]).toMatch(/.+/);
220
+ });
221
+
222
+ it("honors an explicit system prompt instead of overriding it", async () => {
223
+ const spy = vi.spyOn(ai, "completeSimple").mockResolvedValue(assistant({ text: "ok" }));
224
+ await runEvalLlm({ prompt: "q", model: "smol", system: "Be terse." }, { session: makeSession() });
225
+ const ctx = spy.mock.calls[0]?.[1] as { systemPrompt?: string[] };
226
+ expect(ctx.systemPrompt).toEqual(["Be terse."]);
227
+ });
228
+
209
229
  it("forces a respond tool call and returns its arguments in structured mode", async () => {
210
230
  const spy = vi
211
231
  .spyOn(ai, "completeSimple")
@@ -52,8 +52,14 @@ interface JsSession {
52
52
 
53
53
  const sessions = new Map<string, JsSession>();
54
54
  const startingSessions = new Map<string, Promise<JsSession>>();
55
- const resettingSessions = new Set<string>();
56
- const READY_TIMEOUT_MS_DEFAULT = 5_000;
55
+ const resettingSessions = new Map<string, Promise<void>>();
56
+ // Worker startup (module-graph import + WorkerCore construction) is infrastructure
57
+ // cost, not user compute. Floor it independently of Bun's 5s default per-test timeout
58
+ // so a slow cold-start under load isn't aborted mid-init — terminating a still-
59
+ // initializing Bun worker triggers the same kind of terminate-race that motivates
60
+ // avoiding `vm.runInContext` (see shared/indirect-eval.ts), here surfacing as a
61
+ // SIGILL/SIGSEGV. Callers that pass a larger per-cell budget still dominate.
62
+ const WORKER_INIT_TIMEOUT_MS = 15_000;
57
63
 
58
64
  export async function executeInVmContext(options: {
59
65
  sessionKey: string;
@@ -67,17 +73,28 @@ export async function executeInVmContext(options: {
67
73
  runState: VmRunState;
68
74
  }): Promise<{ value: unknown }> {
69
75
  if (options.reset) {
70
- if (resettingSessions.has(options.sessionKey)) {
71
- throw new ToolError("JS context reset already in progress");
72
- }
73
- resettingSessions.add(options.sessionKey);
74
- try {
75
- await resetVmContext(options.sessionKey);
76
- } finally {
77
- resettingSessions.delete(options.sessionKey);
76
+ // Coalesce concurrent resets: an existing in-flight reset already
77
+ // produces a fresh context, so a follow-up `reset: true` cell should
78
+ // just wait for it rather than failing the user-visible call.
79
+ const inFlight = resettingSessions.get(options.sessionKey);
80
+ if (inFlight) await inFlight.catch(() => undefined);
81
+ else {
82
+ const resetPromise = resetVmContext(options.sessionKey);
83
+ resettingSessions.set(
84
+ options.sessionKey,
85
+ resetPromise.then(() => undefined),
86
+ );
87
+ try {
88
+ await resetPromise;
89
+ } finally {
90
+ resettingSessions.delete(options.sessionKey);
91
+ }
78
92
  }
79
- } else if (resettingSessions.has(options.sessionKey)) {
80
- throw new ToolError("JS context reset in progress");
93
+ } else {
94
+ // Internal coordination: wait for any in-flight reset to settle and
95
+ // then run on the freshly-rebuilt context.
96
+ const inFlight = resettingSessions.get(options.sessionKey);
97
+ if (inFlight) await inFlight.catch(() => undefined);
81
98
  }
82
99
  const session = await acquireSession(
83
100
  options.sessionKey,
@@ -191,9 +208,9 @@ async function acquireSession(sessionKey: string, snapshot: SessionSnapshot, tim
191
208
  handleSessionMessage(session, msg);
192
209
  });
193
210
  try {
194
- // Cold-start can exceed 5s on slow hosts. Let the caller's per-cell timeout dominate so
195
- // users can grant more headroom when they raise `timeout` on a cell.
196
- const readyTimeoutMs = Math.max(READY_TIMEOUT_MS_DEFAULT, timeoutMs ?? 0);
211
+ // Init headroom is the fixed infrastructure floor; the caller's per-cell timeout
212
+ // dominates when larger so users can grant more by raising `timeout` on a cell.
213
+ const readyTimeoutMs = Math.max(WORKER_INIT_TIMEOUT_MS, timeoutMs ?? 0);
197
214
  await raceWithTimeout(readyPromise, readyTimeoutMs, "Timed out initializing JS eval worker");
198
215
  worker.send({ type: "init", snapshot });
199
216
  sessions.set(sessionKey, session);
@@ -16,7 +16,12 @@ import { type Api, Effort, getSupportedEfforts, type Model, type Tool } from "@o
16
16
  import * as z from "zod/v4";
17
17
  import { extractTextContent, extractToolCall, parseJsonPayload } from "../commit/utils";
18
18
 
19
- import { expandRoleAlias, formatModelString, resolveModelFromString } from "../config/model-resolver";
19
+ import {
20
+ expandRoleAlias,
21
+ formatModelString,
22
+ getModelMatchPreferences,
23
+ resolveModelFromString,
24
+ } from "../config/model-resolver";
20
25
  import type { ToolSession } from "../tools";
21
26
  import { ToolError } from "../tools/tool-errors";
22
27
  import { withBridgeTimeoutPause } from "./bridge-timeout";
@@ -65,7 +70,7 @@ function resolveTierModel(tier: LlmTier, session: ToolSession): Model<Api> | und
65
70
  const available = modelRegistry.getAvailable();
66
71
  if (available.length === 0) return undefined;
67
72
 
68
- const matchPreferences = { usageOrder: session.settings.getStorage()?.getModelUsageOrder() };
73
+ const matchPreferences = getModelMatchPreferences(session.settings);
69
74
  const resolve = (pattern: string | undefined): Model<Api> | undefined => {
70
75
  if (!pattern) return undefined;
71
76
  const expanded = expandRoleAlias(pattern, session.settings);
@@ -134,13 +139,19 @@ export async function runEvalLlm(args: unknown, options: EvalLlmBridgeOptions):
134
139
 
135
140
  const telemetry = resolveTelemetry(options.session.getTelemetry?.(), options.session.getSessionId?.() ?? undefined);
136
141
 
142
+ // Some providers (notably openai-codex) require a non-empty `instructions`
143
+ // field on every Responses request and 400 with "Instructions are required"
144
+ // when it is missing. Fall back to a minimal default so `llm(prompt)` works
145
+ // without forcing every caller to pass a `system` prompt.
146
+ const systemPrompt = system ? [system] : ["You are a helpful assistant."];
147
+
137
148
  // Suspend eval timeout accounting while the model request owns control. The
138
149
  // timeout clock restarts once the bridge returns to the cell runtime.
139
150
  const response = await withBridgeTimeoutPause(options.emitStatus, () =>
140
151
  instrumentedCompleteSimple(
141
152
  model,
142
153
  {
143
- systemPrompt: system ? [system] : undefined,
154
+ systemPrompt,
144
155
  messages: [{ role: "user", content: [{ type: "text", text: prompt }], timestamp: Date.now() }],
145
156
  tools,
146
157
  },
@@ -0,0 +1,19 @@
1
+ import { describe, expect, it } from "bun:test";
2
+ import { PYTHON_PRELUDE } from "../prelude";
3
+
4
+ describe("python prelude", () => {
5
+ it("exposes read(path, offset?, limit?) with positional optional args", () => {
6
+ // The eval docs advertise `read(path, offset?=1, limit?=None)`. A
7
+ // keyword-only signature (`def read(path, *, offset=1, limit=None)`)
8
+ // makes `read("file", 10)` raise `TypeError: read() takes 1 positional
9
+ // argument but 2 were given`, which agents in the wild repeatedly hit.
10
+ // Lock the contract so the helper accepts both positional and keyword
11
+ // forms.
12
+ const match = PYTHON_PRELUDE.match(/def\s+read\(([^)]+)\)/);
13
+ expect(match).not.toBeNull();
14
+ const signature = match?.[1] ?? "";
15
+ expect(signature).not.toContain("*,");
16
+ expect(signature).toContain("offset");
17
+ expect(signature).toContain("limit");
18
+ });
19
+ });
@@ -126,7 +126,7 @@ interface PythonSession {
126
126
 
127
127
  const sessions = new Map<string, PythonSession>();
128
128
  const startingSessions = new Map<string, Promise<PythonSession>>();
129
- const resettingSessions = new Set<string>();
129
+ const resettingSessions = new Map<string, Promise<void>>();
130
130
 
131
131
  function normalizeSessionCwd(cwd: string): string {
132
132
  return path.resolve(cwd);
@@ -611,17 +611,29 @@ async function executeOnSession(code: string, cwd: string, options: PythonExecut
611
611
  options.bridgeSessionId = sessionId;
612
612
  }
613
613
  if (options.reset) {
614
- if (resettingSessions.has(sessionKey)) {
615
- throw new Error("Python kernel reset already in progress");
616
- }
617
- resettingSessions.add(sessionKey);
618
- try {
619
- await resetSession(sessionKey);
620
- } finally {
621
- resettingSessions.delete(sessionKey);
614
+ // Coalesce concurrent resets: if another reset is in flight for this
615
+ // session, await it instead of throwing — the caller's intent ("start
616
+ // from a clean kernel") is satisfied once that reset settles.
617
+ const inFlight = resettingSessions.get(sessionKey);
618
+ if (inFlight) await inFlight.catch(() => undefined);
619
+ else {
620
+ const resetPromise = resetSession(sessionKey);
621
+ resettingSessions.set(
622
+ sessionKey,
623
+ resetPromise.then(() => undefined),
624
+ );
625
+ try {
626
+ await resetPromise;
627
+ } finally {
628
+ resettingSessions.delete(sessionKey);
629
+ }
622
630
  }
623
- } else if (resettingSessions.has(sessionKey)) {
624
- throw new Error("Python kernel reset in progress");
631
+ } else {
632
+ // A reset already in progress is an internal coordination state, not a
633
+ // user-visible failure. Wait for it to clear, then proceed with the
634
+ // requested execution on the freshly-restarted kernel.
635
+ const inFlight = resettingSessions.get(sessionKey);
636
+ if (inFlight) await inFlight.catch(() => undefined);
625
637
  }
626
638
  const session = await acquireSession(sessionKey, sessionId, cwd, options);
627
639
  if (options.signal?.aborted) {
@@ -53,7 +53,7 @@ if "__omp_prelude_loaded__" not in globals():
53
53
  _emit_status("env", key=key, value=val, action="get")
54
54
  return val
55
55
 
56
- def read(path: str | Path, *, offset: int = 1, limit: int | None = None) -> str:
56
+ def read(path: str | Path, offset: int = 1, limit: int | None = None) -> str:
57
57
  """Read file contents. offset/limit are 1-indexed line numbers."""
58
58
  p = Path(path)
59
59
  data = p.read_text(encoding="utf-8")
@@ -7,7 +7,13 @@
7
7
  * - Register commands, keyboard shortcuts, and CLI flags
8
8
  * - Interact with the user via UI primitives
9
9
  */
10
- import type { AgentMessage, AgentToolResult, AgentToolUpdateCallback, ThinkingLevel } from "@oh-my-pi/pi-agent-core";
10
+ import type {
11
+ AgentMessage,
12
+ AgentToolResult,
13
+ AgentToolUpdateCallback,
14
+ ThinkingLevel,
15
+ ToolApproval,
16
+ } from "@oh-my-pi/pi-agent-core";
11
17
  import type { CompactionResult } from "@oh-my-pi/pi-agent-core/compaction";
12
18
  import type {
13
19
  Api,
@@ -392,6 +398,9 @@ export interface ToolDefinition<TParams extends TSchema = TSchema, TDetails = un
392
398
  defaultInactive?: boolean;
393
399
  /** If true, tool may stage deferred changes that require explicit resolve/discard. */
394
400
  deferrable?: boolean;
401
+ /** Tool approval tier. Defaults to `"exec"` when omitted.
402
+ * `"read"`: read-only operations. `"write"`: mutations. `"exec"`: code execution. */
403
+ approval?: ToolApproval;
395
404
  /** MCP server name for discovery/search metadata when this tool fronts an MCP server. */
396
405
  mcpServerName?: string;
397
406
  /** Original MCP tool name for discovery/search metadata. */