@pi-ohm/subagents 0.6.4-dev.22169815567.1.cdde4e8 → 0.6.4-dev.22204560961.1.746486e

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -27,19 +27,19 @@ from profile IDs with deterministic collision handling.
27
27
  - direct-tool execution and task-routed execution share the same runtime/result envelope
28
28
 
29
29
  The orchestration tool name is **`task`**. Async orchestration lifecycle
30
- operations (`start/status/wait/send/cancel`) are exposed through this tool, but
31
- default execution is sync (`async:false`). Use `async:true` only for background/long tasks.
30
+ operations (`start/status/wait/send/cancel`) are exposed through this tool.
31
+ Subagent starts are synchronous/blocking. `async:true` start requests are rejected.
32
32
 
33
33
  ## Task tool (current)
34
34
 
35
35
  Current behavior:
36
36
 
37
- - supports `op: "start"` for a single task payload (sync + `async:true`)
37
+ - supports `op: "start"` for a single task payload (sync)
38
38
  - supports batched `op: "start"` payloads via `tasks[]` with optional `parallel:true`
39
39
  - supports lifecycle operations: `status`, `wait`, `send`, `cancel`
40
40
  - input normalization: `status`/`wait` accept `id` or `ids`; `op:"result"` is normalized to `status`
41
41
  - non-debug result text renders Amp-style inline message trees (prompt -> tool calls -> result)
42
- - running background updates use minimal inline progress lines
42
+ - running updates stream inline tool rows in-place from SDK events
43
43
  - returns `task_id`, status, and deterministic task details
44
44
  - includes explicit wait/cancel ergonomics fields:
45
45
  - `wait_status` (`completed|timeout|aborted`)
@@ -77,13 +77,32 @@ Current behavior:
77
77
 
78
78
  Runtime backend is selected from `subagentBackend` config:
79
79
 
80
- - `interactive-shell` (default): executes a real nested `pi` run for subagent prompts
81
- using built-in tools (`read,bash,edit,write,grep,find,ls`)
82
- - `interactive-sdk` (opt-in): executes subagent prompts through in-process Pi SDK
80
+ - `interactive-sdk` (default): executes subagent prompts through in-process Pi SDK
83
81
  sessions with in-memory session/settings managers
82
+ - `interactive-shell` (fallback): executes a real nested `pi` run for subagent prompts
83
+ using built-in tools (`read,bash,edit,write,grep,find,ls`)
84
84
  - `none`: uses deterministic scaffold backend (echo-style debug output)
85
85
  - `custom-plugin`: currently returns `unsupported_subagent_backend`
86
86
 
87
+ Per-subagent model override is supported via `ohm.json`:
88
+
89
+ ```jsonc
90
+ {
91
+ "subagents": {
92
+ "finder": { "model": "openai/gpt-4o" },
93
+ "oracle": { "model": "anthropic/claude-sonnet-4-5" },
94
+ "librarian": { "model": "openai/gpt-5:high" },
95
+ },
96
+ }
97
+ ```
98
+
99
+ - format is required: `<provider>/<model>`
100
+ - optional thinking suffix: `<provider>/<model>:<thinking>`
101
+ - valid thinking values: `off|minimal|low|medium|high|xhigh`
102
+ - provider is normalized to lowercase
103
+ - SDK backend validates against Pi model registry (built-ins + custom `models.json`)
104
+ - interactive-shell backend forwards the same `--model` pattern to nested `pi`
105
+
87
106
  Optional safety fallback:
88
107
 
89
108
  - set `OHM_SUBAGENTS_SDK_FALLBACK_TO_CLI=true` to fallback from `interactive-sdk` to
@@ -96,6 +115,73 @@ Nested interactive-shell outputs are sanitized to strip runtime metadata lines (
96
115
 
97
116
  For unknown tasks/expired tasks, error categorization is explicit: `error_category: "not_found"`.
98
117
 
118
+ ## Operator cookbook
119
+
120
+ ### 1) Execution mode policy
121
+
122
+ | scenario | recommended mode | why |
123
+ | -------------------------------------------- | ----------------------------- | ------------------------------------------------------- |
124
+ | quick lookup, single task, result needed now | `start` (sync blocking) | simplest UX; one call, one terminal result |
125
+ | fan-out independent tasks | `start tasks[] parallel:true` | deterministic ordered aggregation + bounded concurrency |
126
+ | follow-up on an existing active task | `send` | preserves task history + follow-up prompts |
127
+
128
+ `async:true` start requests are rejected (`task_async_disabled`).
129
+
130
+ ### 2) Backend tradeoff matrix
131
+
132
+ | backend | strengths | tradeoffs | when to pick |
133
+ | ------------------------------ | ---------------------------------------------------------------------------- | ------------------------------------------------ | ------------------------------ |
134
+ | `interactive-sdk` (default) | structured tool/assistant events, event-derived rows, better inline fidelity | newer path | default |
135
+ | `interactive-shell` (fallback) | mature nested CLI behavior; straightforward rollback | text-capture based transcript fidelity | explicit rollback / fallback |
136
+ | `none` | deterministic scaffold output | no real execution | testing/demo/debug wiring only |
137
+ | `custom-plugin` | reserved hook | not implemented (`unsupported_subagent_backend`) | none currently |
138
+
139
+ Fallback policy:
140
+
141
+ - enable `OHM_SUBAGENTS_SDK_FALLBACK_TO_CLI=true` to downgrade only recoverable SDK bootstrap failures (`task_backend_execution_failed`) from SDK -> CLI path.
142
+
143
+ ### 3) Recommended smoke matrix
144
+
145
+ ```bash
146
+ # default backend visibility
147
+ printf '/ohm-subagents\n' | pi -e ./packages/subagents/extension.ts
148
+
149
+ # explicit sdk backend visibility
150
+ mkdir -p /tmp/pi-ohm-sdk-smoke
151
+ cat >/tmp/pi-ohm-sdk-smoke/ohm.json <<'EOF'
152
+ { "subagentBackend": "interactive-sdk" }
153
+ EOF
154
+ printf '/ohm-subagents\n' | PI_CONFIG_DIR=/tmp/pi-ohm-sdk-smoke pi -e ./packages/subagents/extension.ts
155
+ ```
156
+
157
+ Task lifecycle smoke checklist:
158
+
159
+ 1. sync single `start`
160
+ 2. async guard (`start async:true` returns `task_async_disabled`)
161
+ 3. batch partial acceptance (`tasks[]` mixed validity)
162
+ 4. timeout path (`wait timeout_ms`)
163
+ 5. follow-up `send` on running task
164
+
165
+ ### 4) Troubleshooting quick map
166
+
167
+ | symptom | likely cause | check/fix |
168
+ | --------------------------------------- | -------------------------------------------------------------- | ------------------------------------------------------------------------------ |
169
+ | output looks scaffolded/echoed | backend is `none` | set `subagentBackend` to `interactive-shell` or `interactive-sdk` |
170
+ | sdk selected but execution drops to cli | fallback env enabled and sdk hit recoverable bootstrap failure | inspect `OHM_SUBAGENTS_SDK_FALLBACK_TO_CLI`; disable to keep hard sdk failures |
171
+ | `task_wait_timeout` | task still non-terminal at timeout | increase `timeout_ms`, poll with `status`, or reduce batch size |
172
+ | `task_wait_aborted` | caller signal cancelled wait | retry wait with active signal |
173
+ | `task_expired` on old IDs | retention/capacity eviction | increase retention/cap env knobs; treat task IDs as ephemeral |
174
+ | too many inline progress updates | high-frequency non-terminal emissions | increase `OHM_SUBAGENTS_ONUPDATE_THROTTLE_MS` |
175
+
176
+ ### 5) Guardrail env knobs
177
+
178
+ - `OHM_SUBAGENTS_TASK_RETENTION_MS` — terminal task retention window
179
+ - `OHM_SUBAGENTS_TASK_MAX_EVENTS` — per-task structured event cap
180
+ - `OHM_SUBAGENTS_TASK_MAX_ENTRIES` — in-memory task registry cap
181
+ - `OHM_SUBAGENTS_TASK_MAX_EXPIRED_ENTRIES` — expired-task reason cache cap
182
+ - `OHM_SUBAGENTS_ONUPDATE_THROTTLE_MS` — non-terminal onUpdate emission throttle
183
+ - `OHM_SUBAGENTS_OUTPUT_MAX_CHARS` — terminal output payload cap
184
+
99
185
  ### Output truncation policy
100
186
 
101
187
  Task output returned in tool payloads is capped to prevent oversized context injection.
@@ -123,8 +209,6 @@ Batch execution notes:
123
209
  - aggregate item order is deterministic (input order)
124
210
  - bounded parallelism is enforced by `subagents.taskMaxConcurrency` (default `3`)
125
211
  - task failures are isolated; one failed batch item does not abort siblings
126
- - async mixed-validity batch starts no longer collapse to top-level failure when tasks were accepted;
127
- use acceptance counters + `batch_status` to decide polling behavior
128
212
 
129
213
  ## Task permission policy
130
214
 
@@ -160,7 +244,15 @@ Persistence details:
160
244
 
161
245
  - default snapshot path: `${PI_CONFIG_DIR|PI_CODING_AGENT_DIR|PI_AGENT_DIR|~/.pi/agent}/ohm.subagents.tasks.json`
162
246
  - retention window is configurable via `OHM_SUBAGENTS_TASK_RETENTION_MS` (positive integer ms)
247
+ - per-task structured event timeline cap is configurable via `OHM_SUBAGENTS_TASK_MAX_EVENTS`
248
+ (default `120`)
249
+ - in-memory task registry capacity is configurable via `OHM_SUBAGENTS_TASK_MAX_ENTRIES`
250
+ (default `200`); oldest terminal tasks are evicted first once cap is exceeded
251
+ - expired-task reason cache is configurable via `OHM_SUBAGENTS_TASK_MAX_EXPIRED_ENTRIES`
252
+ (default `500`)
163
253
  - corrupt snapshot files are auto-recovered to `*.corrupt-<epoch>` and runtime falls back to empty state
254
+ - inline `onUpdate` emission is throttled via `OHM_SUBAGENTS_ONUPDATE_THROTTLE_MS`
255
+ (default `120ms`) with duplicate-frame suppression to avoid async wait/update spam
164
256
 
165
257
  ## Migration notes
166
258
 
@@ -260,16 +352,17 @@ For profiles marked `primary:true`, direct tool input schema is subagent-specifi
260
352
 
261
353
  - `librarian`
262
354
  - required: `query`
263
- - optional: `context`, `async`, `description`
355
+ - optional: `context`, `description`
264
356
  - `oracle`
265
357
  - required: `task`
266
- - optional: `context`, `files[]`, `async`, `description`
358
+ - optional: `context`, `files[]`, `description`
267
359
  - `finder`
268
360
  - required: `query`
269
- - optional: `async`, `description`
361
+ - optional: `description`
270
362
 
271
363
  Normalization behavior:
272
364
 
273
365
  - `context` is forwarded in a dedicated prompt section (`Context:`)
274
366
  - oracle `files[]` is forwarded in a dedicated prompt block (`Files:` + bullet paths)
367
+ - `async:true` inputs are rejected by task lifecycle policy (`task_async_disabled`)
275
368
  - task lifecycle/result payload remains the same shape after primary normalization
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pi-ohm/subagents",
3
- "version": "0.6.4-dev.22169815567.1.cdde4e8",
3
+ "version": "0.6.4-dev.22204560961.1.746486e",
4
4
  "homepage": "https://github.com/pi-ohm/pi-ohm/tree/dev/packages/subagents#readme",
5
5
  "repository": {
6
6
  "type": "git",
@@ -20,8 +20,8 @@
20
20
  },
21
21
  "dependencies": {
22
22
  "@mariozechner/pi-coding-agent": "catalog:pi",
23
- "@pi-ohm/config": "0.6.4-dev.22169815567.1.cdde4e8",
24
- "@pi-ohm/tui": "0.6.4-dev.22169815567.1.cdde4e8",
23
+ "@pi-ohm/config": "0.6.4-dev.22204560961.1.746486e",
24
+ "@pi-ohm/tui": "0.6.4-dev.22204560961.1.746486e",
25
25
  "better-result": "catalog:",
26
26
  "zod": "catalog:"
27
27
  },
@@ -50,6 +50,7 @@ const configFixture: OhmRuntimeConfig = {
50
50
  subagents: {},
51
51
  allowInternalRouting: false,
52
52
  },
53
+ profiles: {},
53
54
  },
54
55
  };
55
56
 
@@ -134,10 +135,45 @@ defineTest("buildSubagentDetailText preserves detailed subagent view", () => {
134
135
  });
135
136
 
136
137
  assert.match(text, /Subagent: Librarian/);
138
+ assert.match(text, /model: runtime default/);
139
+ assert.match(text, /thinking: runtime default/);
137
140
  assert.match(text, /When to use:/);
138
141
  assert.match(text, /Scaffold prompt:/);
139
142
  });
140
143
 
144
+ defineTest("buildSubagentDetailText shows configured model + thinking override", () => {
145
+ const librarian = getSubagentById("librarian");
146
+ assert.notEqual(librarian, undefined);
147
+ if (!librarian) {
148
+ assert.fail("Expected librarian profile");
149
+ }
150
+
151
+ const subagents = configFixture.subagents;
152
+ assert.notEqual(subagents, undefined);
153
+ if (!subagents) {
154
+ assert.fail("Expected subagents config");
155
+ }
156
+
157
+ const text = buildSubagentDetailText({
158
+ config: {
159
+ ...configFixture,
160
+ subagents: {
161
+ ...subagents,
162
+ profiles: {
163
+ librarian: {
164
+ model: "openai-codex/gpt-5.2-codex:xhigh",
165
+ },
166
+ },
167
+ },
168
+ },
169
+ subagent: librarian,
170
+ });
171
+
172
+ assert.match(text, /model: openai-codex\/gpt-5.2-codex/);
173
+ assert.match(text, /thinking: xhigh/);
174
+ assert.match(text, /modelPattern: openai-codex\/gpt-5.2-codex:xhigh/);
175
+ });
176
+
141
177
  defineTest("resolveSubagentsLiveUiModeCommand sets requested mode", () => {
142
178
  setTaskLiveUiMode("compact");
143
179
 
package/src/extension.ts CHANGED
@@ -1,5 +1,10 @@
1
1
  import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
2
- import { loadOhmRuntimeConfig, registerOhmSettings, type OhmRuntimeConfig } from "@pi-ohm/config";
2
+ import {
3
+ getSubagentConfiguredModel,
4
+ loadOhmRuntimeConfig,
5
+ registerOhmSettings,
6
+ type OhmRuntimeConfig,
7
+ } from "@pi-ohm/config";
3
8
  import { getSubagentById, OHM_SUBAGENT_CATALOG } from "./catalog";
4
9
  import { isSubagentVisibleInTaskRoster } from "./policy";
5
10
  import {
@@ -95,12 +100,21 @@ export function buildSubagentDetailText(input: {
95
100
  readonly subagent: (typeof OHM_SUBAGENT_CATALOG)[number];
96
101
  }): string {
97
102
  const isAvailable = input.subagent.id !== "painter" || input.config.features.painterImagegen;
103
+ const configuredModelPattern = getSubagentConfiguredModel(input.config, input.subagent.id);
104
+ const configuredThinking = parseConfiguredSubagentThinking(configuredModelPattern);
105
+ const resolvedModel =
106
+ configuredThinking !== undefined && configuredModelPattern
107
+ ? configuredModelPattern.slice(0, configuredModelPattern.lastIndexOf(":"))
108
+ : configuredModelPattern;
98
109
 
99
110
  return [
100
111
  `Subagent: ${input.subagent.name}`,
101
112
  `id: ${input.subagent.id}`,
102
113
  `available: ${isAvailable ? "yes" : "no"}`,
103
114
  `invocation: ${getSubagentInvocationMode(input.subagent.primary)}`,
115
+ `model: ${resolvedModel ?? "runtime default"}`,
116
+ `thinking: ${configuredThinking ?? "runtime default"}`,
117
+ `modelPattern: ${configuredModelPattern ?? "runtime default"}`,
104
118
  input.subagent.requiresPackage
105
119
  ? `requiresPackage: ${input.subagent.requiresPackage}`
106
120
  : "requiresPackage: none",
@@ -113,6 +127,30 @@ export function buildSubagentDetailText(input: {
113
127
  ].join("\n");
114
128
  }
115
129
 
130
+ function parseConfiguredSubagentThinking(modelPattern: string | undefined): string | undefined {
131
+ if (!modelPattern) return undefined;
132
+
133
+ const suffixIndex = modelPattern.lastIndexOf(":");
134
+ if (suffixIndex <= 0 || suffixIndex >= modelPattern.length - 1) return undefined;
135
+
136
+ const candidate = modelPattern
137
+ .slice(suffixIndex + 1)
138
+ .trim()
139
+ .toLowerCase();
140
+ if (
141
+ candidate !== "off" &&
142
+ candidate !== "minimal" &&
143
+ candidate !== "low" &&
144
+ candidate !== "medium" &&
145
+ candidate !== "high" &&
146
+ candidate !== "xhigh"
147
+ ) {
148
+ return undefined;
149
+ }
150
+
151
+ return candidate;
152
+ }
153
+
116
154
  export interface ResolveSubagentsLiveUiModeResult {
117
155
  readonly ok: boolean;
118
156
  readonly mode: TaskLiveUiMode;
@@ -22,6 +22,7 @@ const baseSubagentRuntimeConfig = {
22
22
  subagents: {},
23
23
  allowInternalRouting: false,
24
24
  },
25
+ profiles: {},
25
26
  } as const;
26
27
 
27
28
  const baseConfig: OhmRuntimeConfig = {
@@ -11,6 +11,7 @@ import {
11
11
  finalizePiSdkStreamCapture,
12
12
  PiCliTaskExecutionBackend,
13
13
  PiSdkTaskExecutionBackend,
14
+ parseSubagentModelSelection,
14
15
  ScaffoldTaskExecutionBackend,
15
16
  type PiCliRunner,
16
17
  type PiSdkRunner,
@@ -21,7 +22,10 @@ function defineTest(name: string, run: () => void | Promise<void>): void {
21
22
  void test(name, run);
22
23
  }
23
24
 
24
- function makeConfig(subagentBackend: OhmSubagentBackend): OhmRuntimeConfig {
25
+ function makeConfig(
26
+ subagentBackend: OhmSubagentBackend,
27
+ profiles: Record<string, { model: string }> = {},
28
+ ): OhmRuntimeConfig {
25
29
  return {
26
30
  defaultMode: "smart",
27
31
  subagentBackend,
@@ -56,6 +60,7 @@ function makeConfig(subagentBackend: OhmSubagentBackend): OhmRuntimeConfig {
56
60
  subagents: {},
57
61
  allowInternalRouting: false,
58
62
  },
63
+ profiles,
59
64
  },
60
65
  };
61
66
  }
@@ -68,6 +73,63 @@ const subagentFixture: OhmSubagentDefinition = {
68
73
  scaffoldPrompt: "search prompt",
69
74
  };
70
75
 
76
+ defineTest("parseSubagentModelSelection parses provider/model", () => {
77
+ const parsed = parseSubagentModelSelection({
78
+ modelPattern: "OpenAI/gpt-4o",
79
+ hasModel: (provider, modelId) => provider === "openai" && modelId === "gpt-4o",
80
+ });
81
+
82
+ assert.equal(parsed.ok, true);
83
+ if (!parsed.ok) {
84
+ assert.fail("Expected model selection parse to succeed");
85
+ }
86
+ assert.equal(parsed.value.provider, "openai");
87
+ assert.equal(parsed.value.modelId, "gpt-4o");
88
+ assert.equal(parsed.value.thinkingLevel, undefined);
89
+ });
90
+
91
+ defineTest("parseSubagentModelSelection parses optional :thinking suffix", () => {
92
+ const parsed = parseSubagentModelSelection({
93
+ modelPattern: "openai/gpt-5:high",
94
+ hasModel: (provider, modelId) => provider === "openai" && modelId === "gpt-5",
95
+ });
96
+
97
+ assert.equal(parsed.ok, true);
98
+ if (!parsed.ok) {
99
+ assert.fail("Expected model+thinking parse to succeed");
100
+ }
101
+ assert.equal(parsed.value.provider, "openai");
102
+ assert.equal(parsed.value.modelId, "gpt-5");
103
+ assert.equal(parsed.value.thinkingLevel, "high");
104
+ });
105
+
106
+ defineTest("parseSubagentModelSelection prefers full model IDs containing colons", () => {
107
+ const parsed = parseSubagentModelSelection({
108
+ modelPattern: "openrouter/vendor/model:exacto",
109
+ hasModel: (provider, modelId) => provider === "openrouter" && modelId === "vendor/model:exacto",
110
+ });
111
+
112
+ assert.equal(parsed.ok, true);
113
+ if (!parsed.ok) {
114
+ assert.fail("Expected full model id parse to succeed");
115
+ }
116
+ assert.equal(parsed.value.modelId, "vendor/model:exacto");
117
+ assert.equal(parsed.value.thinkingLevel, undefined);
118
+ });
119
+
120
+ defineTest("parseSubagentModelSelection rejects invalid thinking suffix", () => {
121
+ const parsed = parseSubagentModelSelection({
122
+ modelPattern: "openai/gpt-5:mega",
123
+ hasModel: (provider, modelId) => provider === "openai" && modelId === "gpt-5",
124
+ });
125
+
126
+ assert.equal(parsed.ok, false);
127
+ if (parsed.ok) {
128
+ assert.fail("Expected invalid thinking parse failure");
129
+ }
130
+ assert.equal(parsed.reason, "invalid_thinking_level");
131
+ });
132
+
71
133
  defineTest("ScaffoldTaskExecutionBackend returns deterministic summary/output", async () => {
72
134
  const backend = new ScaffoldTaskExecutionBackend();
73
135
 
@@ -212,6 +274,77 @@ defineTest(
212
274
  },
213
275
  );
214
276
 
277
+ defineTest("PiCliTaskExecutionBackend forwards configured subagent model pattern", async () => {
278
+ const requestedModels: string[] = [];
279
+
280
+ const runner: PiCliRunner = async (input) => {
281
+ if (input.modelPattern) {
282
+ requestedModels.push(input.modelPattern);
283
+ }
284
+
285
+ return {
286
+ exitCode: 0,
287
+ stdout: "finder online",
288
+ stderr: "",
289
+ timedOut: false,
290
+ aborted: false,
291
+ };
292
+ };
293
+
294
+ const backend = new PiCliTaskExecutionBackend(runner, 1_000);
295
+ const result = await backend.executeStart({
296
+ taskId: "task_4_model",
297
+ subagent: subagentFixture,
298
+ description: "Auth flow scan",
299
+ prompt: "Find auth validation path and refresh flow",
300
+ cwd: "/tmp/project",
301
+ config: makeConfig("interactive-shell", {
302
+ finder: { model: "openai/gpt-4o" },
303
+ }),
304
+ signal: undefined,
305
+ });
306
+
307
+ assert.equal(Result.isOk(result), true);
308
+ assert.deepEqual(requestedModels, ["openai/gpt-4o"]);
309
+ });
310
+
311
+ defineTest(
312
+ "PiCliTaskExecutionBackend forwards configured subagent model pattern with thinking suffix",
313
+ async () => {
314
+ const requestedModels: string[] = [];
315
+
316
+ const runner: PiCliRunner = async (input) => {
317
+ if (input.modelPattern) {
318
+ requestedModels.push(input.modelPattern);
319
+ }
320
+
321
+ return {
322
+ exitCode: 0,
323
+ stdout: "finder online",
324
+ stderr: "",
325
+ timedOut: false,
326
+ aborted: false,
327
+ };
328
+ };
329
+
330
+ const backend = new PiCliTaskExecutionBackend(runner, 1_000);
331
+ const result = await backend.executeStart({
332
+ taskId: "task_4_model_thinking",
333
+ subagent: subagentFixture,
334
+ description: "Auth flow scan",
335
+ prompt: "Find auth validation path and refresh flow",
336
+ cwd: "/tmp/project",
337
+ config: makeConfig("interactive-shell", {
338
+ finder: { model: "openai/gpt-5:high" },
339
+ }),
340
+ signal: undefined,
341
+ });
342
+
343
+ assert.equal(Result.isOk(result), true);
344
+ assert.deepEqual(requestedModels, ["openai/gpt-5:high"]);
345
+ },
346
+ );
347
+
215
348
  defineTest(
216
349
  "PiCliTaskExecutionBackend falls back to scaffold mode when backend is none",
217
350
  async () => {
@@ -336,6 +469,143 @@ defineTest("PiSdkTaskExecutionBackend executes sdk runner for interactive-sdk",
336
469
  assert.equal(result.value.route, "interactive-sdk");
337
470
  });
338
471
 
472
+ defineTest("PiSdkTaskExecutionBackend forwards streamed events to caller", async () => {
473
+ const backend = new PiSdkTaskExecutionBackend(async (input) => {
474
+ input.onEvent?.({
475
+ type: "tool_start",
476
+ toolCallId: "tool_1",
477
+ toolName: "read",
478
+ argsText: '{"path":"src/index.ts"}',
479
+ atEpochMs: 1001,
480
+ });
481
+ input.onEvent?.({
482
+ type: "tool_end",
483
+ toolCallId: "tool_1",
484
+ toolName: "read",
485
+ resultText: '{"ok":true}',
486
+ status: "success",
487
+ atEpochMs: 1002,
488
+ });
489
+
490
+ return {
491
+ output: "sdk output",
492
+ events: [
493
+ {
494
+ type: "tool_start",
495
+ toolCallId: "tool_1",
496
+ toolName: "read",
497
+ argsText: '{"path":"src/index.ts"}',
498
+ atEpochMs: 1001,
499
+ },
500
+ {
501
+ type: "tool_end",
502
+ toolCallId: "tool_1",
503
+ toolName: "read",
504
+ resultText: '{"ok":true}',
505
+ status: "success",
506
+ atEpochMs: 1002,
507
+ },
508
+ ],
509
+ timedOut: false,
510
+ aborted: false,
511
+ };
512
+ });
513
+
514
+ const streamed: string[] = [];
515
+ const result = await backend.executeStart({
516
+ taskId: "task_sdk_streamed_events",
517
+ subagent: subagentFixture,
518
+ description: "stream events",
519
+ prompt: "stream events",
520
+ cwd: "/tmp/project",
521
+ config: makeConfig("interactive-sdk"),
522
+ signal: undefined,
523
+ onEvent: (event) => {
524
+ if (event.type === "tool_start" || event.type === "tool_end") {
525
+ streamed.push(`${event.type}:${event.toolName}`);
526
+ }
527
+ },
528
+ });
529
+
530
+ assert.equal(Result.isOk(result), true);
531
+ assert.deepEqual(streamed, ["tool_start:read", "tool_end:read"]);
532
+ });
533
+
534
+ defineTest("PiSdkTaskExecutionBackend forwards configured subagent model pattern", async () => {
535
+ const requestedModels: string[] = [];
536
+
537
+ const runner: PiSdkRunner = async (input) => {
538
+ if (input.modelPattern) {
539
+ requestedModels.push(input.modelPattern);
540
+ }
541
+
542
+ return {
543
+ output: "sdk online",
544
+ events: [],
545
+ provider: "sdk-provider",
546
+ model: "sdk-model",
547
+ runtime: "pi-sdk",
548
+ timedOut: false,
549
+ aborted: false,
550
+ };
551
+ };
552
+
553
+ const backend = new PiSdkTaskExecutionBackend(runner, 1_000);
554
+ const result = await backend.executeStart({
555
+ taskId: "task_sdk_model",
556
+ subagent: subagentFixture,
557
+ description: "Auth flow scan",
558
+ prompt: "Trace auth validation path",
559
+ cwd: "/tmp/project",
560
+ config: makeConfig("interactive-sdk", {
561
+ finder: { model: "anthropic/claude-sonnet-4-5" },
562
+ }),
563
+ signal: undefined,
564
+ });
565
+
566
+ assert.equal(Result.isOk(result), true);
567
+ assert.deepEqual(requestedModels, ["anthropic/claude-sonnet-4-5"]);
568
+ });
569
+
570
+ defineTest(
571
+ "PiSdkTaskExecutionBackend forwards configured subagent model pattern with thinking suffix",
572
+ async () => {
573
+ const requestedModels: string[] = [];
574
+
575
+ const runner: PiSdkRunner = async (input) => {
576
+ if (input.modelPattern) {
577
+ requestedModels.push(input.modelPattern);
578
+ }
579
+
580
+ return {
581
+ output: "sdk online",
582
+ events: [],
583
+ provider: "sdk-provider",
584
+ model: "sdk-model",
585
+ runtime: "pi-sdk",
586
+ timedOut: false,
587
+ aborted: false,
588
+ };
589
+ };
590
+
591
+ const backend = new PiSdkTaskExecutionBackend(runner, 1_000);
592
+ const result = await backend.executeStart({
593
+ taskId: "task_sdk_model_thinking",
594
+ subagent: subagentFixture,
595
+ description: "Auth flow scan",
596
+ prompt: "Trace auth validation path",
597
+ cwd: "/tmp/project",
598
+ config: makeConfig("interactive-sdk", {
599
+ finder: { model: "openai/gpt-5:high" },
600
+ }),
601
+ signal: undefined,
602
+ });
603
+
604
+ assert.equal(Result.isOk(result), true);
605
+ assert.deepEqual(requestedModels, ["openai/gpt-5:high"]);
606
+ },
607
+ );
608
+
339
609
  defineTest("Pi SDK stream capture records tool lifecycle and assistant deltas", () => {
340
610
  const capture = createPiSdkStreamCaptureState();
341
611
 
@@ -804,7 +1074,7 @@ defineTest("PiCliTaskExecutionBackend resolves backend IDs from runtime config",
804
1074
  assert.equal(backend.resolveBackendId(makeConfig("custom-plugin")), "custom-plugin");
805
1075
  });
806
1076
 
807
- defineTest("createDefaultTaskExecutionBackend defaults to interactive-shell backend", () => {
1077
+ defineTest("createDefaultTaskExecutionBackend defaults to interactive-sdk backend", () => {
808
1078
  const backend = createDefaultTaskExecutionBackend();
809
- assert.equal(backend.id, "interactive-shell");
1079
+ assert.equal(backend.id, "interactive-sdk");
810
1080
  });