npm - @pi-ohm/subagents - Versions diffs - 0.6.4-dev.22169815567.1.cdde4e8 → 0.6.4-dev.22204560961.1.746486e - Mend

@pi-ohm/subagents 0.6.4-dev.22169815567.1.cdde4e8 → 0.6.4-dev.22204560961.1.746486e

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +105 -12
package/package.json +3 -3
package/src/extension.test.ts +36 -0
package/src/extension.ts +39 -1
package/src/policy.test.ts +1 -0
package/src/runtime/backend.test.ts +273 -3
package/src/runtime/backend.ts +267 -41
package/src/runtime/config-models.test.ts +120 -0
package/src/runtime/tasks.test.ts +40 -0
package/src/runtime/tasks.ts +55 -1
package/src/runtime/ui.test.ts +52 -0
package/src/runtime/ui.ts +64 -3
package/src/tools/primary.test.ts +1 -0
package/src/tools/primary.ts +5 -9
package/src/tools/task.test.ts +349 -258
package/src/tools/task.ts +222 -117

package/README.md CHANGED Viewed

@@ -27,19 +27,19 @@ from profile IDs with deterministic collision handling.
 - direct-tool execution and task-routed execution share the same runtime/result envelope
 The orchestration tool name is **`task`**. Async orchestration lifecycle
-operations (`start/status/wait/send/cancel`) are exposed through this tool, but
-default execution is sync (`async:false`). Use `async:true` only for background/long tasks.
+operations (`start/status/wait/send/cancel`) are exposed through this tool.
+Subagent starts are synchronous/blocking. `async:true` start requests are rejected.
 ## Task tool (current)
 Current behavior:
-- supports `op: "start"` for a single task payload (sync + `async:true`)
+- supports `op: "start"` for a single task payload (sync)
 - supports batched `op: "start"` payloads via `tasks[]` with optional `parallel:true`
 - supports lifecycle operations: `status`, `wait`, `send`, `cancel`
 - input normalization: `status`/`wait` accept `id` or `ids`; `op:"result"` is normalized to `status`
 - non-debug result text renders Amp-style inline message trees (prompt -> tool calls -> result)
-- running background updates use minimal inline progress lines
+- running updates stream inline tool rows in-place from SDK events
 - returns `task_id`, status, and deterministic task details
 - includes explicit wait/cancel ergonomics fields:
   - `wait_status` (`completed|timeout|aborted`)
@@ -77,13 +77,32 @@ Current behavior:
 Runtime backend is selected from `subagentBackend` config:
-- `interactive-shell` (default): executes a real nested `pi` run for subagent prompts
-  using built-in tools (`read,bash,edit,write,grep,find,ls`)
-- `interactive-sdk` (opt-in): executes subagent prompts through in-process Pi SDK
+- `interactive-sdk` (default): executes subagent prompts through in-process Pi SDK
   sessions with in-memory session/settings managers
+- `interactive-shell` (fallback): executes a real nested `pi` run for subagent prompts
+  using built-in tools (`read,bash,edit,write,grep,find,ls`)
 - `none`: uses deterministic scaffold backend (echo-style debug output)
 - `custom-plugin`: currently returns `unsupported_subagent_backend`
+Per-subagent model override is supported via `ohm.json`:
+```jsonc
+{
+  "subagents": {
+    "finder": { "model": "openai/gpt-4o" },
+    "oracle": { "model": "anthropic/claude-sonnet-4-5" },
+    "librarian": { "model": "openai/gpt-5:high" },
+  },
+}
+```
+- format is required: `<provider>/<model>`
+- optional thinking suffix: `<provider>/<model>:<thinking>`
+- valid thinking values: `off|minimal|low|medium|high|xhigh`
+- provider is normalized to lowercase
+- SDK backend validates against Pi model registry (built-ins + custom `models.json`)
+- interactive-shell backend forwards the same `--model` pattern to nested `pi`
 Optional safety fallback:
 - set `OHM_SUBAGENTS_SDK_FALLBACK_TO_CLI=true` to fallback from `interactive-sdk` to
@@ -96,6 +115,73 @@ Nested interactive-shell outputs are sanitized to strip runtime metadata lines (
 For unknown tasks/expired tasks, error categorization is explicit: `error_category: "not_found"`.
+## Operator cookbook
+### 1) Execution mode policy
+| scenario                                     | recommended mode              | why                                                     |
+| -------------------------------------------- | ----------------------------- | ------------------------------------------------------- |
+| quick lookup, single task, result needed now | `start` (sync blocking)       | simplest UX; one call, one terminal result              |
+| fan-out independent tasks                    | `start tasks[] parallel:true` | deterministic ordered aggregation + bounded concurrency |
+| follow-up on an existing active task         | `send`                        | preserves task history + follow-up prompts              |
+`async:true` start requests are rejected (`task_async_disabled`).
+### 2) Backend tradeoff matrix
+| backend                        | strengths                                                                    | tradeoffs                                        | when to pick                   |
+| ------------------------------ | ---------------------------------------------------------------------------- | ------------------------------------------------ | ------------------------------ |
+| `interactive-sdk` (default)    | structured tool/assistant events, event-derived rows, better inline fidelity | newer path                                       | default                        |
+| `interactive-shell` (fallback) | mature nested CLI behavior; straightforward rollback                         | text-capture based transcript fidelity           | explicit rollback / fallback   |
+| `none`                         | deterministic scaffold output                                                | no real execution                                | testing/demo/debug wiring only |
+| `custom-plugin`                | reserved hook                                                                | not implemented (`unsupported_subagent_backend`) | none currently                 |
+Fallback policy:
+- enable `OHM_SUBAGENTS_SDK_FALLBACK_TO_CLI=true` to downgrade only recoverable SDK bootstrap failures (`task_backend_execution_failed`) from SDK -> CLI path.
+### 3) Recommended smoke matrix
+```bash
+# default backend visibility
+printf '/ohm-subagents\n' | pi -e ./packages/subagents/extension.ts
+# explicit sdk backend visibility
+mkdir -p /tmp/pi-ohm-sdk-smoke
+cat >/tmp/pi-ohm-sdk-smoke/ohm.json <<'EOF'
+{ "subagentBackend": "interactive-sdk" }
+EOF
+printf '/ohm-subagents\n' | PI_CONFIG_DIR=/tmp/pi-ohm-sdk-smoke pi -e ./packages/subagents/extension.ts
+```
+Task lifecycle smoke checklist:
+1. sync single `start`
+2. async guard (`start async:true` returns `task_async_disabled`)
+3. batch partial acceptance (`tasks[]` mixed validity)
+4. timeout path (`wait timeout_ms`)
+5. follow-up `send` on running task
+### 4) Troubleshooting quick map
+| symptom                                 | likely cause                                                   | check/fix                                                                      |
+| --------------------------------------- | -------------------------------------------------------------- | ------------------------------------------------------------------------------ |
+| output looks scaffolded/echoed          | backend is `none`                                              | set `subagentBackend` to `interactive-shell` or `interactive-sdk`              |
+| sdk selected but execution drops to cli | fallback env enabled and sdk hit recoverable bootstrap failure | inspect `OHM_SUBAGENTS_SDK_FALLBACK_TO_CLI`; disable to keep hard sdk failures |
+| `task_wait_timeout`                     | task still non-terminal at timeout                             | increase `timeout_ms`, poll with `status`, or reduce batch size                |
+| `task_wait_aborted`                     | caller signal cancelled wait                                   | retry wait with active signal                                                  |
+| `task_expired` on old IDs               | retention/capacity eviction                                    | increase retention/cap env knobs; treat task IDs as ephemeral                  |
+| too many inline progress updates        | high-frequency non-terminal emissions                          | increase `OHM_SUBAGENTS_ONUPDATE_THROTTLE_MS`                                  |
+### 5) Guardrail env knobs
+- `OHM_SUBAGENTS_TASK_RETENTION_MS` — terminal task retention window
+- `OHM_SUBAGENTS_TASK_MAX_EVENTS` — per-task structured event cap
+- `OHM_SUBAGENTS_TASK_MAX_ENTRIES` — in-memory task registry cap
+- `OHM_SUBAGENTS_TASK_MAX_EXPIRED_ENTRIES` — expired-task reason cache cap
+- `OHM_SUBAGENTS_ONUPDATE_THROTTLE_MS` — non-terminal onUpdate emission throttle
+- `OHM_SUBAGENTS_OUTPUT_MAX_CHARS` — terminal output payload cap
 ### Output truncation policy
 Task output returned in tool payloads is capped to prevent oversized context injection.
@@ -123,8 +209,6 @@ Batch execution notes:
 - aggregate item order is deterministic (input order)
 - bounded parallelism is enforced by `subagents.taskMaxConcurrency` (default `3`)
 - task failures are isolated; one failed batch item does not abort siblings
-- async mixed-validity batch starts no longer collapse to top-level failure when tasks were accepted;
-  use acceptance counters + `batch_status` to decide polling behavior
 ## Task permission policy
@@ -160,7 +244,15 @@ Persistence details:
 - default snapshot path: `${PI_CONFIG_DIR|PI_CODING_AGENT_DIR|PI_AGENT_DIR|~/.pi/agent}/ohm.subagents.tasks.json`
 - retention window is configurable via `OHM_SUBAGENTS_TASK_RETENTION_MS` (positive integer ms)
+- per-task structured event timeline cap is configurable via `OHM_SUBAGENTS_TASK_MAX_EVENTS`
+  (default `120`)
+- in-memory task registry capacity is configurable via `OHM_SUBAGENTS_TASK_MAX_ENTRIES`
+  (default `200`); oldest terminal tasks are evicted first once cap is exceeded
+- expired-task reason cache is configurable via `OHM_SUBAGENTS_TASK_MAX_EXPIRED_ENTRIES`
+  (default `500`)
 - corrupt snapshot files are auto-recovered to `*.corrupt-<epoch>` and runtime falls back to empty state
+- inline `onUpdate` emission is throttled via `OHM_SUBAGENTS_ONUPDATE_THROTTLE_MS`
+  (default `120ms`) with duplicate-frame suppression to avoid async wait/update spam
 ## Migration notes
@@ -260,16 +352,17 @@ For profiles marked `primary:true`, direct tool input schema is subagent-specifi
 - `librarian`
   - required: `query`
-  - optional: `context`, `async`, `description`
+  - optional: `context`, `description`
 - `oracle`
   - required: `task`
-  - optional: `context`, `files[]`, `async`, `description`
+  - optional: `context`, `files[]`, `description`
 - `finder`
   - required: `query`
-  - optional: `async`, `description`
+  - optional: `description`
 Normalization behavior:
 - `context` is forwarded in a dedicated prompt section (`Context:`)
 - oracle `files[]` is forwarded in a dedicated prompt block (`Files:` + bullet paths)
+- `async:true` inputs are rejected by task lifecycle policy (`task_async_disabled`)
 - task lifecycle/result payload remains the same shape after primary normalization

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pi-ohm/subagents",
-  "version": "0.6.4-dev.22169815567.1.cdde4e8",
+  "version": "0.6.4-dev.22204560961.1.746486e",
   "homepage": "https://github.com/pi-ohm/pi-ohm/tree/dev/packages/subagents#readme",
   "repository": {
     "type": "git",
@@ -20,8 +20,8 @@
   },
   "dependencies": {
     "@mariozechner/pi-coding-agent": "catalog:pi",
-    "@pi-ohm/config": "0.6.4-dev.22169815567.1.cdde4e8",
-    "@pi-ohm/tui": "0.6.4-dev.22169815567.1.cdde4e8",
+    "@pi-ohm/config": "0.6.4-dev.22204560961.1.746486e",
+    "@pi-ohm/tui": "0.6.4-dev.22204560961.1.746486e",
     "better-result": "catalog:",
     "zod": "catalog:"
   },

package/src/extension.test.ts CHANGED Viewed

@@ -50,6 +50,7 @@ const configFixture: OhmRuntimeConfig = {
       subagents: {},
       allowInternalRouting: false,
     },
+    profiles: {},
   },
 };
@@ -134,10 +135,45 @@ defineTest("buildSubagentDetailText preserves detailed subagent view", () => {
   });
   assert.match(text, /Subagent: Librarian/);
+  assert.match(text, /model: runtime default/);
+  assert.match(text, /thinking: runtime default/);
   assert.match(text, /When to use:/);
   assert.match(text, /Scaffold prompt:/);
 });
+defineTest("buildSubagentDetailText shows configured model + thinking override", () => {
+  const librarian = getSubagentById("librarian");
+  assert.notEqual(librarian, undefined);
+  if (!librarian) {
+    assert.fail("Expected librarian profile");
+  }
+  const subagents = configFixture.subagents;
+  assert.notEqual(subagents, undefined);
+  if (!subagents) {
+    assert.fail("Expected subagents config");
+  }
+  const text = buildSubagentDetailText({
+    config: {
+      ...configFixture,
+      subagents: {
+        ...subagents,
+        profiles: {
+          librarian: {
+            model: "openai-codex/gpt-5.2-codex:xhigh",
+          },
+        },
+      },
+    },
+    subagent: librarian,
+  });
+  assert.match(text, /model: openai-codex\/gpt-5.2-codex/);
+  assert.match(text, /thinking: xhigh/);
+  assert.match(text, /modelPattern: openai-codex\/gpt-5.2-codex:xhigh/);
+});
 defineTest("resolveSubagentsLiveUiModeCommand sets requested mode", () => {
   setTaskLiveUiMode("compact");

package/src/extension.ts CHANGED Viewed

@@ -1,5 +1,10 @@
 import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
-import { loadOhmRuntimeConfig, registerOhmSettings, type OhmRuntimeConfig } from "@pi-ohm/config";
+import {
+  getSubagentConfiguredModel,
+  loadOhmRuntimeConfig,
+  registerOhmSettings,
+  type OhmRuntimeConfig,
+} from "@pi-ohm/config";
 import { getSubagentById, OHM_SUBAGENT_CATALOG } from "./catalog";
 import { isSubagentVisibleInTaskRoster } from "./policy";
 import {
@@ -95,12 +100,21 @@ export function buildSubagentDetailText(input: {
   readonly subagent: (typeof OHM_SUBAGENT_CATALOG)[number];
 }): string {
   const isAvailable = input.subagent.id !== "painter" || input.config.features.painterImagegen;
+  const configuredModelPattern = getSubagentConfiguredModel(input.config, input.subagent.id);
+  const configuredThinking = parseConfiguredSubagentThinking(configuredModelPattern);
+  const resolvedModel =
+    configuredThinking !== undefined && configuredModelPattern
+      ? configuredModelPattern.slice(0, configuredModelPattern.lastIndexOf(":"))
+      : configuredModelPattern;
   return [
     `Subagent: ${input.subagent.name}`,
     `id: ${input.subagent.id}`,
     `available: ${isAvailable ? "yes" : "no"}`,
     `invocation: ${getSubagentInvocationMode(input.subagent.primary)}`,
+    `model: ${resolvedModel ?? "runtime default"}`,
+    `thinking: ${configuredThinking ?? "runtime default"}`,
+    `modelPattern: ${configuredModelPattern ?? "runtime default"}`,
     input.subagent.requiresPackage
       ? `requiresPackage: ${input.subagent.requiresPackage}`
       : "requiresPackage: none",
@@ -113,6 +127,30 @@ export function buildSubagentDetailText(input: {
   ].join("\n");
 }
+function parseConfiguredSubagentThinking(modelPattern: string | undefined): string | undefined {
+  if (!modelPattern) return undefined;
+  const suffixIndex = modelPattern.lastIndexOf(":");
+  if (suffixIndex <= 0 || suffixIndex >= modelPattern.length - 1) return undefined;
+  const candidate = modelPattern
+    .slice(suffixIndex + 1)
+    .trim()
+    .toLowerCase();
+  if (
+    candidate !== "off" &&
+    candidate !== "minimal" &&
+    candidate !== "low" &&
+    candidate !== "medium" &&
+    candidate !== "high" &&
+    candidate !== "xhigh"
+  ) {
+    return undefined;
+  }
+  return candidate;
+}
 export interface ResolveSubagentsLiveUiModeResult {
   readonly ok: boolean;
   readonly mode: TaskLiveUiMode;

package/src/policy.test.ts CHANGED Viewed

@@ -22,6 +22,7 @@ const baseSubagentRuntimeConfig = {
     subagents: {},
     allowInternalRouting: false,
   },
+  profiles: {},
 } as const;
 const baseConfig: OhmRuntimeConfig = {

package/src/runtime/backend.test.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import {
   finalizePiSdkStreamCapture,
   PiCliTaskExecutionBackend,
   PiSdkTaskExecutionBackend,
+  parseSubagentModelSelection,
   ScaffoldTaskExecutionBackend,
   type PiCliRunner,
   type PiSdkRunner,
@@ -21,7 +22,10 @@ function defineTest(name: string, run: () => void | Promise<void>): void {
   void test(name, run);
 }
-function makeConfig(subagentBackend: OhmSubagentBackend): OhmRuntimeConfig {
+function makeConfig(
+  subagentBackend: OhmSubagentBackend,
+  profiles: Record<string, { model: string }> = {},
+): OhmRuntimeConfig {
   return {
     defaultMode: "smart",
     subagentBackend,
@@ -56,6 +60,7 @@ function makeConfig(subagentBackend: OhmSubagentBackend): OhmRuntimeConfig {
         subagents: {},
         allowInternalRouting: false,
       },
+      profiles,
     },
   };
 }
@@ -68,6 +73,63 @@ const subagentFixture: OhmSubagentDefinition = {
   scaffoldPrompt: "search prompt",
 };
+defineTest("parseSubagentModelSelection parses provider/model", () => {
+  const parsed = parseSubagentModelSelection({
+    modelPattern: "OpenAI/gpt-4o",
+    hasModel: (provider, modelId) => provider === "openai" && modelId === "gpt-4o",
+  });
+  assert.equal(parsed.ok, true);
+  if (!parsed.ok) {
+    assert.fail("Expected model selection parse to succeed");
+  }
+  assert.equal(parsed.value.provider, "openai");
+  assert.equal(parsed.value.modelId, "gpt-4o");
+  assert.equal(parsed.value.thinkingLevel, undefined);
+});
+defineTest("parseSubagentModelSelection parses optional :thinking suffix", () => {
+  const parsed = parseSubagentModelSelection({
+    modelPattern: "openai/gpt-5:high",
+    hasModel: (provider, modelId) => provider === "openai" && modelId === "gpt-5",
+  });
+  assert.equal(parsed.ok, true);
+  if (!parsed.ok) {
+    assert.fail("Expected model+thinking parse to succeed");
+  }
+  assert.equal(parsed.value.provider, "openai");
+  assert.equal(parsed.value.modelId, "gpt-5");
+  assert.equal(parsed.value.thinkingLevel, "high");
+});
+defineTest("parseSubagentModelSelection prefers full model IDs containing colons", () => {
+  const parsed = parseSubagentModelSelection({
+    modelPattern: "openrouter/vendor/model:exacto",
+    hasModel: (provider, modelId) => provider === "openrouter" && modelId === "vendor/model:exacto",
+  });
+  assert.equal(parsed.ok, true);
+  if (!parsed.ok) {
+    assert.fail("Expected full model id parse to succeed");
+  }
+  assert.equal(parsed.value.modelId, "vendor/model:exacto");
+  assert.equal(parsed.value.thinkingLevel, undefined);
+});
+defineTest("parseSubagentModelSelection rejects invalid thinking suffix", () => {
+  const parsed = parseSubagentModelSelection({
+    modelPattern: "openai/gpt-5:mega",
+    hasModel: (provider, modelId) => provider === "openai" && modelId === "gpt-5",
+  });
+  assert.equal(parsed.ok, false);
+  if (parsed.ok) {
+    assert.fail("Expected invalid thinking parse failure");
+  }
+  assert.equal(parsed.reason, "invalid_thinking_level");
+});
 defineTest("ScaffoldTaskExecutionBackend returns deterministic summary/output", async () => {
   const backend = new ScaffoldTaskExecutionBackend();
@@ -212,6 +274,77 @@ defineTest(
   },
 );
+defineTest("PiCliTaskExecutionBackend forwards configured subagent model pattern", async () => {
+  const requestedModels: string[] = [];
+  const runner: PiCliRunner = async (input) => {
+    if (input.modelPattern) {
+      requestedModels.push(input.modelPattern);
+    }
+    return {
+      exitCode: 0,
+      stdout: "finder online",
+      stderr: "",
+      timedOut: false,
+      aborted: false,
+    };
+  };
+  const backend = new PiCliTaskExecutionBackend(runner, 1_000);
+  const result = await backend.executeStart({
+    taskId: "task_4_model",
+    subagent: subagentFixture,
+    description: "Auth flow scan",
+    prompt: "Find auth validation path and refresh flow",
+    cwd: "/tmp/project",
+    config: makeConfig("interactive-shell", {
+      finder: { model: "openai/gpt-4o" },
+    }),
+    signal: undefined,
+  });
+  assert.equal(Result.isOk(result), true);
+  assert.deepEqual(requestedModels, ["openai/gpt-4o"]);
+});
+defineTest(
+  "PiCliTaskExecutionBackend forwards configured subagent model pattern with thinking suffix",
+  async () => {
+    const requestedModels: string[] = [];
+    const runner: PiCliRunner = async (input) => {
+      if (input.modelPattern) {
+        requestedModels.push(input.modelPattern);
+      }
+      return {
+        exitCode: 0,
+        stdout: "finder online",
+        stderr: "",
+        timedOut: false,
+        aborted: false,
+      };
+    };
+    const backend = new PiCliTaskExecutionBackend(runner, 1_000);
+    const result = await backend.executeStart({
+      taskId: "task_4_model_thinking",
+      subagent: subagentFixture,
+      description: "Auth flow scan",
+      prompt: "Find auth validation path and refresh flow",
+      cwd: "/tmp/project",
+      config: makeConfig("interactive-shell", {
+        finder: { model: "openai/gpt-5:high" },
+      }),
+      signal: undefined,
+    });
+    assert.equal(Result.isOk(result), true);
+    assert.deepEqual(requestedModels, ["openai/gpt-5:high"]);
+  },
+);
 defineTest(
   "PiCliTaskExecutionBackend falls back to scaffold mode when backend is none",
   async () => {
@@ -336,6 +469,143 @@ defineTest("PiSdkTaskExecutionBackend executes sdk runner for interactive-sdk",
   assert.equal(result.value.route, "interactive-sdk");
 });
+defineTest("PiSdkTaskExecutionBackend forwards streamed events to caller", async () => {
+  const backend = new PiSdkTaskExecutionBackend(async (input) => {
+    input.onEvent?.({
+      type: "tool_start",
+      toolCallId: "tool_1",
+      toolName: "read",
+      argsText: '{"path":"src/index.ts"}',
+      atEpochMs: 1001,
+    });
+    input.onEvent?.({
+      type: "tool_end",
+      toolCallId: "tool_1",
+      toolName: "read",
+      resultText: '{"ok":true}',
+      status: "success",
+      atEpochMs: 1002,
+    });
+    return {
+      output: "sdk output",
+      events: [
+        {
+          type: "tool_start",
+          toolCallId: "tool_1",
+          toolName: "read",
+          argsText: '{"path":"src/index.ts"}',
+          atEpochMs: 1001,
+        },
+        {
+          type: "tool_end",
+          toolCallId: "tool_1",
+          toolName: "read",
+          resultText: '{"ok":true}',
+          status: "success",
+          atEpochMs: 1002,
+        },
+      ],
+      timedOut: false,
+      aborted: false,
+    };
+  });
+  const streamed: string[] = [];
+  const result = await backend.executeStart({
+    taskId: "task_sdk_streamed_events",
+    subagent: subagentFixture,
+    description: "stream events",
+    prompt: "stream events",
+    cwd: "/tmp/project",
+    config: makeConfig("interactive-sdk"),
+    signal: undefined,
+    onEvent: (event) => {
+      if (event.type === "tool_start" || event.type === "tool_end") {
+        streamed.push(`${event.type}:${event.toolName}`);
+      }
+    },
+  });
+  assert.equal(Result.isOk(result), true);
+  assert.deepEqual(streamed, ["tool_start:read", "tool_end:read"]);
+});
+defineTest("PiSdkTaskExecutionBackend forwards configured subagent model pattern", async () => {
+  const requestedModels: string[] = [];
+  const runner: PiSdkRunner = async (input) => {
+    if (input.modelPattern) {
+      requestedModels.push(input.modelPattern);
+    }
+    return {
+      output: "sdk online",
+      events: [],
+      provider: "sdk-provider",
+      model: "sdk-model",
+      runtime: "pi-sdk",
+      timedOut: false,
+      aborted: false,
+    };
+  };
+  const backend = new PiSdkTaskExecutionBackend(runner, 1_000);
+  const result = await backend.executeStart({
+    taskId: "task_sdk_model",
+    subagent: subagentFixture,
+    description: "Auth flow scan",
+    prompt: "Trace auth validation path",
+    cwd: "/tmp/project",
+    config: makeConfig("interactive-sdk", {
+      finder: { model: "anthropic/claude-sonnet-4-5" },
+    }),
+    signal: undefined,
+  });
+  assert.equal(Result.isOk(result), true);
+  assert.deepEqual(requestedModels, ["anthropic/claude-sonnet-4-5"]);
+});
+defineTest(
+  "PiSdkTaskExecutionBackend forwards configured subagent model pattern with thinking suffix",
+  async () => {
+    const requestedModels: string[] = [];
+    const runner: PiSdkRunner = async (input) => {
+      if (input.modelPattern) {
+        requestedModels.push(input.modelPattern);
+      }
+      return {
+        output: "sdk online",
+        events: [],
+        provider: "sdk-provider",
+        model: "sdk-model",
+        runtime: "pi-sdk",
+        timedOut: false,
+        aborted: false,
+      };
+    };
+    const backend = new PiSdkTaskExecutionBackend(runner, 1_000);
+    const result = await backend.executeStart({
+      taskId: "task_sdk_model_thinking",
+      subagent: subagentFixture,
+      description: "Auth flow scan",
+      prompt: "Trace auth validation path",
+      cwd: "/tmp/project",
+      config: makeConfig("interactive-sdk", {
+        finder: { model: "openai/gpt-5:high" },
+      }),
+      signal: undefined,
+    });
+    assert.equal(Result.isOk(result), true);
+    assert.deepEqual(requestedModels, ["openai/gpt-5:high"]);
+  },
+);
 defineTest("Pi SDK stream capture records tool lifecycle and assistant deltas", () => {
   const capture = createPiSdkStreamCaptureState();
@@ -804,7 +1074,7 @@ defineTest("PiCliTaskExecutionBackend resolves backend IDs from runtime config",
   assert.equal(backend.resolveBackendId(makeConfig("custom-plugin")), "custom-plugin");
 });
-defineTest("createDefaultTaskExecutionBackend defaults to interactive-shell backend", () => {
+defineTest("createDefaultTaskExecutionBackend defaults to interactive-sdk backend", () => {
   const backend = createDefaultTaskExecutionBackend();
-  assert.equal(backend.id, "interactive-shell");
+  assert.equal(backend.id, "interactive-sdk");
 });