reasonix 0.5.13 → 0.5.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -69,6 +69,14 @@ interface ChatMessage {
69
69
  name?: string;
70
70
  tool_call_id?: string;
71
71
  tool_calls?: ToolCall[];
72
+ /**
73
+ * R1 `reasoning_content` captured from the assistant's thinking turn.
74
+ * DeepSeek's thinking mode 400s with "reasoning_content in the
75
+ * thinking mode must be passed back" when a tool-loop continuation
76
+ * omits it from the preceding assistant message. Round-tripped for
77
+ * deepseek-reasoner turns with tool_calls; absent for deepseek-chat.
78
+ */
79
+ reasoning_content?: string | null;
72
80
  }
73
81
  interface RawUsage {
74
82
  prompt_tokens?: number;
@@ -89,6 +97,21 @@ interface ChatRequestOptions {
89
97
  responseFormat?: {
90
98
  type: "json_object" | "text";
91
99
  };
100
+ /**
101
+ * Explicitly toggle V4 thinking mode. Serialized as
102
+ * `extra_body.thinking.type = enabled|disabled`. Omit to let the
103
+ * server default apply (thinking enabled). Mainly used so the loop
104
+ * can pin the mode per model: `deepseek-chat` → disabled (legacy
105
+ * non-thinking compat), everything else → enabled.
106
+ */
107
+ thinking?: "enabled" | "disabled";
108
+ /**
109
+ * Per-request reasoning-effort cap. Serialized as the top-level
110
+ * `reasoning_effort` field. DeepSeek accepts `high` (standard) or
111
+ * `max` (Agent-class, auto-applied to Claude-Code-style flows per
112
+ * the V4 docs). Reasonix pins `max` because every turn is agent-like.
113
+ */
114
+ reasoningEffort?: "high" | "max";
92
115
  }
93
116
 
94
117
  declare class Usage {
@@ -790,6 +813,19 @@ interface LoopEvent {
790
813
  toolArgs?: string;
791
814
  /** Cumulative arguments-string length for `role === "tool_call_delta"`. */
792
815
  toolCallArgsChars?: number;
816
+ /**
817
+ * Zero-based index of the tool call this delta belongs to. Surfaces
818
+ * multi-tool turns: on a response emitting 4 write_file calls the UI
819
+ * can show "building call 3/?" instead of a context-free spinner.
820
+ */
821
+ toolCallIndex?: number;
822
+ /**
823
+ * Count of prior tool calls (this turn) whose arguments have finished
824
+ * streaming into valid JSON. Not all ready calls have been dispatched
825
+ * yet — dispatch still happens post-stream — but the user gets "2
826
+ * ready" progress feedback while later calls keep streaming.
827
+ */
828
+ toolCallReadyCount?: number;
793
829
  stats?: TurnStats;
794
830
  planState?: TypedPlanState;
795
831
  repair?: RepairReport;
@@ -827,6 +863,11 @@ interface CacheFirstLoopOptions {
827
863
  * since the default selector scores samples by plan-state uncertainty.
828
864
  */
829
865
  branch?: number | BranchOptions;
866
+ /**
867
+ * Reasoning-effort cap. See {@link ReconfigurableOptions} — default
868
+ * `max` for Reasonix (agent-class use per DeepSeek V4 docs).
869
+ */
870
+ reasoningEffort?: "high" | "max";
830
871
  /**
831
872
  * Session name. When set, the loop pre-loads the session's prior messages
832
873
  * into its log on construction, and appends every new log entry to
@@ -863,6 +904,14 @@ interface ReconfigurableOptions {
863
904
  harvest?: boolean | HarvestOptions;
864
905
  branch?: number | BranchOptions;
865
906
  stream?: boolean;
907
+ /**
908
+ * Reasoning-effort cap sent per turn (V4 thinking mode only;
909
+ * deepseek-chat ignores it). Reasonix pins `max` by default because
910
+ * DeepSeek's V4 docs flag Claude-Code-style agent loops as the
911
+ * canonical `max` use case. `/effort high` lets a user step down
912
+ * mid-session for cheaper, faster turns on simple tasks.
913
+ */
914
+ reasoningEffort?: "high" | "max";
866
915
  }
867
916
  declare class CacheFirstLoop {
868
917
  readonly client: DeepSeekClient;
@@ -879,6 +928,8 @@ declare class CacheFirstLoop {
879
928
  harvestOptions: HarvestOptions;
880
929
  branchEnabled: boolean;
881
930
  branchOptions: BranchOptions;
931
+ /** See ReconfigurableOptions — mutable so `/effort` can flip mid-session. */
932
+ reasoningEffort: "high" | "max";
882
933
  sessionName: string | null;
883
934
  /**
884
935
  * Hook list, mutable so `/hooks reload` can swap it without
@@ -922,7 +973,7 @@ declare class CacheFirstLoop {
922
973
  tokensSaved: number;
923
974
  charsSaved: number;
924
975
  };
925
- private appendAndPersist;
976
+ appendAndPersist(message: ChatMessage): void;
926
977
  /**
927
978
  * Start a fresh conversation WITHOUT exiting. Drops every message
928
979
  * in the in-memory log AND rewrites the session file to empty so
@@ -970,6 +1021,15 @@ declare class CacheFirstLoop {
970
1021
  private forceSummaryAfterIterLimit;
971
1022
  run(userInput: string, onEvent?: (ev: LoopEvent) => void): Promise<string>;
972
1023
  private assistantMessage;
1024
+ /**
1025
+ * Build a synthetic assistant message we insert into the log without
1026
+ * a real API round trip (abort notices, future system injections).
1027
+ * Reasoner models reject follow-up requests whose assistant history
1028
+ * is missing `reasoning_content`, so we stamp an empty-string
1029
+ * placeholder on reasoner sessions to satisfy the validator. V3
1030
+ * doesn't care — field stays absent there.
1031
+ */
1032
+ private syntheticAssistantMessage;
973
1033
  }
974
1034
  /**
975
1035
  * R1 occasionally hallucinates tool-call markup as plain text when the
@@ -1550,6 +1610,10 @@ interface SubagentEvent {
1550
1610
  kind: "start" | "progress" | "end";
1551
1611
  /** First ~30 chars of the task prompt — used for the TUI status row. */
1552
1612
  task: string;
1613
+ /** Skill that spawned this subagent, when applicable. Stamped on every event so the TUI/logger can attribute without extra plumbing. */
1614
+ skillName?: string;
1615
+ /** Model id the child loop ran on. Stamped alongside skillName. */
1616
+ model?: string;
1553
1617
  /** Iteration count inside the child loop (number of tool results so far). */
1554
1618
  iter?: number;
1555
1619
  /** Wall-clock ms since the subagent started. */
@@ -1560,6 +1624,10 @@ interface SubagentEvent {
1560
1624
  error?: string;
1561
1625
  /** Total turns the subagent took. Set on `end`. */
1562
1626
  turns?: number;
1627
+ /** Total USD spent inside the child loop. Set on `end`. */
1628
+ costUsd?: number;
1629
+ /** Aggregated child-loop Usage (sum across turns). Set on `end`. */
1630
+ usage?: Usage;
1563
1631
  }
1564
1632
  /**
1565
1633
  * Mutable ref the registration writes through. The TUI sets `.current`
@@ -1582,7 +1650,7 @@ interface SubagentToolOptions {
1582
1650
  defaultSystem?: string;
1583
1651
  /** Project root for `applyProjectMemory` lookup. Omit in chat mode. */
1584
1652
  projectRoot?: string;
1585
- /** Default model. `deepseek-chat` (V3) by default. */
1653
+ /** Default model. `deepseek-v4-pro` by default. */
1586
1654
  defaultModel?: string;
1587
1655
  /** Iteration ceiling. Lower than the parent (16 by default). */
1588
1656
  maxToolIters?: number;
@@ -3066,6 +3134,22 @@ interface UsageRecord {
3066
3134
  costUsd: number;
3067
3135
  /** What the same turn would have cost at Claude Sonnet 4.6 rates. */
3068
3136
  claudeEquivUsd: number;
3137
+ /**
3138
+ * Distinguishes ordinary parent-loop turns from subagent summary rows.
3139
+ * Absent on pre-0.5.14 records — treat as "turn" when missing.
3140
+ */
3141
+ kind?: "turn" | "subagent";
3142
+ /** Present when `kind === "subagent"`. Attribution metadata for the /stats roll-up. */
3143
+ subagent?: {
3144
+ /** Skill that spawned it, when the spawn came from a `runAs: subagent` skill. */
3145
+ skillName?: string;
3146
+ /** First ~60 chars of the task prompt — enough context to recognize a run, never the full text. */
3147
+ taskPreview: string;
3148
+ /** Tool calls the child loop dispatched before returning. */
3149
+ toolIters: number;
3150
+ /** Wall-clock ms. */
3151
+ durationMs: number;
3152
+ };
3069
3153
  }
3070
3154
  /** Where the log lives. Tests override via `opts.path`. */
3071
3155
  declare function defaultUsageLogPath(homeDirOverride?: string): string;
@@ -3077,6 +3161,9 @@ interface AppendUsageInput {
3077
3161
  now?: number;
3078
3162
  /** Override the log path (tests). */
3079
3163
  path?: string;
3164
+ /** When appending a subagent summary row, set `kind: "subagent"` and populate `subagent`. */
3165
+ kind?: "turn" | "subagent";
3166
+ subagent?: UsageRecord["subagent"];
3080
3167
  }
3081
3168
  /**
3082
3169
  * Append one record and return it. Swallows disk errors — the TUI
@@ -3131,6 +3218,25 @@ interface UsageAggregate {
3131
3218
  firstSeen: number | null;
3132
3219
  /** Latest record's ts, or `null` when the log is empty. */
3133
3220
  lastSeen: number | null;
3221
+ /**
3222
+ * Subagent-specific rollup. Undefined when no subagent records exist
3223
+ * in the log so consumers can cheaply skip the section. Counts reflect
3224
+ * subagent SPAWNS (not internal child-loop turns) — one row per run.
3225
+ */
3226
+ subagents?: SubagentAggregate;
3227
+ }
3228
+ /** Rolled-up view of all `kind: "subagent"` records. */
3229
+ interface SubagentAggregate {
3230
+ total: number;
3231
+ costUsd: number;
3232
+ totalDurationMs: number;
3233
+ /** Per-skill breakdown. Records without `skillName` (raw spawn_subagent calls) group under `"(adhoc)"`. */
3234
+ bySkill: Array<{
3235
+ skillName: string;
3236
+ count: number;
3237
+ costUsd: number;
3238
+ durationMs: number;
3239
+ }>;
3134
3240
  }
3135
3241
  /**
3136
3242
  * Fold a flat record list into the dashboard shape — rolling windows
package/dist/index.js CHANGED
@@ -131,6 +131,12 @@ var DeepSeekClient = class {
131
131
  if (opts.temperature !== void 0) payload.temperature = opts.temperature;
132
132
  if (opts.maxTokens !== void 0) payload.max_tokens = opts.maxTokens;
133
133
  if (opts.responseFormat) payload.response_format = opts.responseFormat;
134
+ if (opts.thinking) {
135
+ payload.extra_body = { thinking: { type: opts.thinking } };
136
+ }
137
+ if (opts.reasoningEffort) {
138
+ payload.reasoning_effort = opts.reasoningEffort;
139
+ }
134
140
  return payload;
135
141
  }
136
142
  /**
@@ -345,6 +351,13 @@ async function harvest(reasoningContent, client, options = {}, signal) {
345
351
  responseFormat: { type: "json_object" },
346
352
  temperature: 0,
347
353
  maxTokens: 600,
354
+ // Pin mode + effort so a future default-model swap (e.g. someone
355
+ // sets `options.model = "deepseek-v4-pro"`) can't accidentally
356
+ // turn this micro-extraction into a multi-thousand-reasoning-
357
+ // token call. DeepSeek ignores these on non-thinking models, so
358
+ // the request stays valid regardless of the chosen model.
359
+ thinking: "disabled",
360
+ reasoningEffort: "high",
348
361
  signal
349
362
  });
350
363
  return parsePlanState(resp.content, maxItems, maxItemLen);
@@ -1563,6 +1576,11 @@ function deleteSession(name) {
1563
1576
  const path = sessionPath(name);
1564
1577
  try {
1565
1578
  unlinkSync(path);
1579
+ const sidecar = path.replace(/\.jsonl$/, ".pending.json");
1580
+ try {
1581
+ unlinkSync(sidecar);
1582
+ } catch {
1583
+ }
1566
1584
  return true;
1567
1585
  } catch {
1568
1586
  return false;
@@ -1590,13 +1608,18 @@ function countLines(path) {
1590
1608
 
1591
1609
  // src/telemetry.ts
1592
1610
  var DEEPSEEK_PRICING = {
1593
- "deepseek-chat": { inputCacheHit: 0.028, inputCacheMiss: 0.28, output: 0.42 },
1594
- "deepseek-reasoner": { inputCacheHit: 0.028, inputCacheMiss: 0.28, output: 0.42 }
1611
+ "deepseek-v4-flash": { inputCacheHit: 0.028, inputCacheMiss: 0.139, output: 0.278 },
1612
+ "deepseek-v4-pro": { inputCacheHit: 0.139, inputCacheMiss: 1.667, output: 3.333 },
1613
+ // Compat aliases — priced as v4-flash per the deprecation notice.
1614
+ "deepseek-chat": { inputCacheHit: 0.028, inputCacheMiss: 0.139, output: 0.278 },
1615
+ "deepseek-reasoner": { inputCacheHit: 0.028, inputCacheMiss: 0.139, output: 0.278 }
1595
1616
  };
1596
1617
  var CLAUDE_SONNET_PRICING = { input: 3, output: 15 };
1597
1618
  var DEEPSEEK_CONTEXT_TOKENS = {
1598
- "deepseek-chat": 131072,
1599
- "deepseek-reasoner": 131072
1619
+ "deepseek-v4-flash": 1e6,
1620
+ "deepseek-v4-pro": 1e6,
1621
+ "deepseek-chat": 1e6,
1622
+ "deepseek-reasoner": 1e6
1600
1623
  };
1601
1624
  var DEFAULT_CONTEXT_TOKENS = 131072;
1602
1625
  function costUsd(model, usage) {
@@ -1694,6 +1717,8 @@ var CacheFirstLoop = class {
1694
1717
  harvestOptions;
1695
1718
  branchEnabled;
1696
1719
  branchOptions;
1720
+ /** See ReconfigurableOptions — mutable so `/effort` can flip mid-session. */
1721
+ reasoningEffort;
1697
1722
  sessionName;
1698
1723
  /**
1699
1724
  * Hook list, mutable so `/hooks reload` can swap it without
@@ -1719,7 +1744,8 @@ var CacheFirstLoop = class {
1719
1744
  this.client = opts.client;
1720
1745
  this.prefix = opts.prefix;
1721
1746
  this.tools = opts.tools ?? new ToolRegistry();
1722
- this.model = opts.model ?? "deepseek-chat";
1747
+ this.model = opts.model ?? "deepseek-v4-pro";
1748
+ this.reasoningEffort = opts.reasoningEffort ?? "max";
1723
1749
  this.maxToolIters = opts.maxToolIters ?? 64;
1724
1750
  this.hooks = opts.hooks ?? [];
1725
1751
  this.hookCwd = opts.hookCwd ?? process.cwd();
@@ -1835,6 +1861,7 @@ var CacheFirstLoop = class {
1835
1861
  configure(opts) {
1836
1862
  if (opts.model !== void 0) this.model = opts.model;
1837
1863
  if (opts.stream !== void 0) this._streamPreference = opts.stream;
1864
+ if (opts.reasoningEffort !== void 0) this.reasoningEffort = opts.reasoningEffort;
1838
1865
  if (opts.branch !== void 0) {
1839
1866
  if (typeof opts.branch === "number") {
1840
1867
  this.branchOptions = { budget: opts.branch };
@@ -1924,7 +1951,7 @@ var CacheFirstLoop = class {
1924
1951
  content: `aborted at iter ${iter}/${this.maxToolIters} \u2014 stopped without producing a summary (press \u2191 + Enter or /retry to resume)`
1925
1952
  };
1926
1953
  const stoppedMsg = "[aborted by user (Esc) \u2014 no summary produced. Ask again or /retry when ready; prior tool output is still in the log.]";
1927
- this.appendAndPersist({ role: "assistant", content: stoppedMsg });
1954
+ this.appendAndPersist(this.syntheticAssistantMessage(stoppedMsg));
1928
1955
  yield {
1929
1956
  turn: this._turn,
1930
1957
  role: "assistant_final",
@@ -2013,7 +2040,9 @@ var CacheFirstLoop = class {
2013
2040
  model: this.model,
2014
2041
  messages,
2015
2042
  tools: toolSpecs.length ? toolSpecs : void 0,
2016
- signal
2043
+ signal,
2044
+ thinking: thinkingModeForModel(this.model),
2045
+ reasoningEffort: this.reasoningEffort
2017
2046
  },
2018
2047
  {
2019
2048
  ...this.branchOptions,
@@ -2060,11 +2089,14 @@ var CacheFirstLoop = class {
2060
2089
  };
2061
2090
  } else if (this.stream) {
2062
2091
  const callBuf = /* @__PURE__ */ new Map();
2092
+ const readyIndices = /* @__PURE__ */ new Set();
2063
2093
  for await (const chunk of this.client.stream({
2064
2094
  model: this.model,
2065
2095
  messages,
2066
2096
  tools: toolSpecs.length ? toolSpecs : void 0,
2067
- signal
2097
+ signal,
2098
+ thinking: thinkingModeForModel(this.model),
2099
+ reasoningEffort: this.reasoningEffort
2068
2100
  })) {
2069
2101
  if (chunk.contentDelta) {
2070
2102
  assistantContent += chunk.contentDelta;
@@ -2095,13 +2127,18 @@ var CacheFirstLoop = class {
2095
2127
  if (d.argumentsDelta)
2096
2128
  cur.function.arguments = (cur.function.arguments ?? "") + d.argumentsDelta;
2097
2129
  callBuf.set(d.index, cur);
2130
+ if (!readyIndices.has(d.index) && cur.function.name && looksLikeCompleteJson(cur.function.arguments ?? "")) {
2131
+ readyIndices.add(d.index);
2132
+ }
2098
2133
  if (cur.function.name) {
2099
2134
  yield {
2100
2135
  turn: this._turn,
2101
2136
  role: "tool_call_delta",
2102
2137
  content: "",
2103
2138
  toolName: cur.function.name,
2104
- toolCallArgsChars: (cur.function.arguments ?? "").length
2139
+ toolCallArgsChars: (cur.function.arguments ?? "").length,
2140
+ toolCallIndex: d.index,
2141
+ toolCallReadyCount: readyIndices.size
2105
2142
  };
2106
2143
  }
2107
2144
  }
@@ -2113,7 +2150,9 @@ var CacheFirstLoop = class {
2113
2150
  model: this.model,
2114
2151
  messages,
2115
2152
  tools: toolSpecs.length ? toolSpecs : void 0,
2116
- signal
2153
+ signal,
2154
+ thinking: thinkingModeForModel(this.model),
2155
+ reasoningEffort: this.reasoningEffort
2117
2156
  });
2118
2157
  assistantContent = resp.content;
2119
2158
  reasoningContent = resp.reasoningContent ?? "";
@@ -2152,7 +2191,9 @@ var CacheFirstLoop = class {
2152
2191
  reasoningContent || null,
2153
2192
  assistantContent || null
2154
2193
  );
2155
- this.appendAndPersist(this.assistantMessage(assistantContent, repairedCalls));
2194
+ this.appendAndPersist(
2195
+ this.assistantMessage(assistantContent, repairedCalls, reasoningContent)
2196
+ );
2156
2197
  yield {
2157
2198
  turn: this._turn,
2158
2199
  role: "assistant_final",
@@ -2304,7 +2345,9 @@ ${reason}`;
2304
2345
  model: this.model,
2305
2346
  messages,
2306
2347
  // no tools → model is forced to answer in text
2307
- signal: this._turnAbort.signal
2348
+ signal: this._turnAbort.signal,
2349
+ thinking: thinkingModeForModel(this.model),
2350
+ reasoningEffort: this.reasoningEffort
2308
2351
  });
2309
2352
  const rawContent = resp.content?.trim() ?? "";
2310
2353
  const cleaned = stripHallucinatedToolMarkup(rawContent);
@@ -2314,7 +2357,7 @@ ${reason}`;
2314
2357
 
2315
2358
  ${summary}`;
2316
2359
  const summaryStats = this.stats.record(this._turn, this.model, resp.usage ?? new Usage());
2317
- this.appendAndPersist({ role: "assistant", content: summary });
2360
+ this.appendAndPersist(this.assistantMessage(summary, [], resp.reasoningContent ?? void 0));
2318
2361
  yield {
2319
2362
  turn: this._turn,
2320
2363
  role: "assistant_final",
@@ -2343,12 +2386,41 @@ ${summary}`;
2343
2386
  }
2344
2387
  return final;
2345
2388
  }
2346
- assistantMessage(content, toolCalls) {
2389
+ assistantMessage(content, toolCalls, reasoningContent) {
2347
2390
  const msg = { role: "assistant", content };
2348
2391
  if (toolCalls.length > 0) msg.tool_calls = toolCalls;
2392
+ if (reasoningContent && reasoningContent.length > 0) {
2393
+ msg.reasoning_content = reasoningContent;
2394
+ }
2395
+ return msg;
2396
+ }
2397
+ /**
2398
+ * Build a synthetic assistant message we insert into the log without
2399
+ * a real API round trip (abort notices, future system injections).
2400
+ * Reasoner models reject follow-up requests whose assistant history
2401
+ * is missing `reasoning_content`, so we stamp an empty-string
2402
+ * placeholder on reasoner sessions to satisfy the validator. V3
2403
+ * doesn't care — field stays absent there.
2404
+ */
2405
+ syntheticAssistantMessage(content) {
2406
+ const msg = { role: "assistant", content };
2407
+ if (isThinkingModeModel(this.model)) {
2408
+ msg.reasoning_content = "";
2409
+ }
2349
2410
  return msg;
2350
2411
  }
2351
2412
  };
2413
+ function isThinkingModeModel(model) {
2414
+ if (model.includes("reasoner")) return true;
2415
+ if (model === "deepseek-v4-flash" || model === "deepseek-v4-pro") return true;
2416
+ return false;
2417
+ }
2418
+ function thinkingModeForModel(model) {
2419
+ if (model === "deepseek-chat") return "disabled";
2420
+ if (model.includes("reasoner")) return "enabled";
2421
+ if (model === "deepseek-v4-flash" || model === "deepseek-v4-pro") return "enabled";
2422
+ return void 0;
2423
+ }
2352
2424
  function stripHallucinatedToolMarkup(s) {
2353
2425
  let out = s;
2354
2426
  out = out.replace(/<|DSML|function_calls>[\s\S]*?<\/?|DSML|function_calls>/g, "");
@@ -2364,6 +2436,15 @@ function safeParseToolArgs(raw) {
2364
2436
  return raw;
2365
2437
  }
2366
2438
  }
2439
+ function looksLikeCompleteJson(s) {
2440
+ if (!s || !s.trim()) return false;
2441
+ try {
2442
+ JSON.parse(s);
2443
+ return true;
2444
+ } catch {
2445
+ return false;
2446
+ }
2447
+ }
2367
2448
  function* hookWarnings(outcomes, turn) {
2368
2449
  for (const o of outcomes) {
2369
2450
  if (o.decision === "pass") continue;
@@ -3953,7 +4034,7 @@ Formatting rules (the parent renders your reply in a TUI with a real markdown re
3953
4034
  - For flow charts and diagrams: use a markdown bullet list with \`\u2192\` or \`\u2193\` between steps. Don't try to draw boxes-and-arrows in ASCII; it never survives word-wrap.`;
3954
4035
  var DEFAULT_MAX_RESULT_CHARS2 = 8e3;
3955
4036
  var DEFAULT_MAX_ITERS = 16;
3956
- var DEFAULT_SUBAGENT_MODEL = "deepseek-chat";
4037
+ var DEFAULT_SUBAGENT_MODEL = "deepseek-v4-pro";
3957
4038
  var SUBAGENT_TOOL_NAME = "spawn_subagent";
3958
4039
  var NEVER_INHERITED_TOOLS = /* @__PURE__ */ new Set([SUBAGENT_TOOL_NAME, "submit_plan"]);
3959
4040
  async function spawnSubagent(opts) {
@@ -3961,11 +4042,14 @@ async function spawnSubagent(opts) {
3961
4042
  const maxToolIters = opts.maxToolIters ?? DEFAULT_MAX_ITERS;
3962
4043
  const maxResultChars = opts.maxResultChars ?? DEFAULT_MAX_RESULT_CHARS2;
3963
4044
  const sink = opts.sink;
4045
+ const skillName = opts.skillName;
3964
4046
  const startedAt = Date.now();
3965
4047
  const taskPreview = opts.task.length > 30 ? `${opts.task.slice(0, 30)}\u2026` : opts.task;
3966
4048
  sink?.current?.({
3967
4049
  kind: "start",
3968
4050
  task: taskPreview,
4051
+ skillName,
4052
+ model,
3969
4053
  iter: 0,
3970
4054
  elapsedMs: 0
3971
4055
  });
@@ -3995,6 +4079,8 @@ async function spawnSubagent(opts) {
3995
4079
  sink?.current?.({
3996
4080
  kind: "progress",
3997
4081
  task: taskPreview,
4082
+ skillName,
4083
+ model,
3998
4084
  iter: toolIter,
3999
4085
  elapsedMs: Date.now() - startedAt
4000
4086
  });
@@ -4017,17 +4103,22 @@ async function spawnSubagent(opts) {
4017
4103
  const elapsedMs = Date.now() - startedAt;
4018
4104
  const turns = childLoop.stats.turns.length;
4019
4105
  const costUsd2 = childLoop.stats.totalCost;
4106
+ const usage = aggregateChildUsage(childLoop);
4020
4107
  const truncated = final.length > maxResultChars ? `${final.slice(0, maxResultChars)}
4021
4108
 
4022
4109
  [\u2026truncated ${final.length - maxResultChars} chars; ask the subagent for a tighter summary if you need more.]` : final;
4023
4110
  sink?.current?.({
4024
4111
  kind: "end",
4025
4112
  task: taskPreview,
4113
+ skillName,
4114
+ model,
4026
4115
  iter: toolIter,
4027
4116
  elapsedMs,
4028
4117
  summary: errorMessage ? void 0 : truncated.slice(0, 120),
4029
4118
  error: errorMessage,
4030
- turns
4119
+ turns,
4120
+ costUsd: costUsd2,
4121
+ usage
4031
4122
  });
4032
4123
  return {
4033
4124
  success: !errorMessage,
@@ -4036,9 +4127,23 @@ async function spawnSubagent(opts) {
4036
4127
  turns,
4037
4128
  toolIters: toolIter,
4038
4129
  elapsedMs,
4039
- costUsd: costUsd2
4130
+ costUsd: costUsd2,
4131
+ model,
4132
+ skillName,
4133
+ usage
4040
4134
  };
4041
4135
  }
4136
+ function aggregateChildUsage(loop) {
4137
+ const agg = new Usage();
4138
+ for (const t of loop.stats.turns) {
4139
+ agg.promptTokens += t.usage.promptTokens;
4140
+ agg.completionTokens += t.usage.completionTokens;
4141
+ agg.totalTokens += t.usage.totalTokens;
4142
+ agg.promptCacheHitTokens += t.usage.promptCacheHitTokens;
4143
+ agg.promptCacheMissTokens += t.usage.promptCacheMissTokens;
4144
+ }
4145
+ return agg;
4146
+ }
4042
4147
  function formatSubagentResult(r) {
4043
4148
  if (!r.success) {
4044
4149
  return JSON.stringify({
@@ -4081,8 +4186,8 @@ function registerSubagentTool(parentRegistry, opts) {
4081
4186
  },
4082
4187
  model: {
4083
4188
  type: "string",
4084
- enum: ["deepseek-chat", "deepseek-reasoner"],
4085
- description: "Which DeepSeek model the subagent runs on. 'deepseek-chat' (V3) is the default \u2014 fast and cheap. Use 'deepseek-reasoner' (R1) only when the subtask genuinely needs planning or multi-step reasoning; it is roughly 5-10x more expensive."
4189
+ enum: ["deepseek-v4-flash", "deepseek-v4-pro", "deepseek-chat", "deepseek-reasoner"],
4190
+ description: "Which DeepSeek model the subagent runs on. Default is 'deepseek-v4-pro' \u2014 the strongest model, best for complex subtasks. Override to 'deepseek-v4-flash' (or the legacy 'deepseek-chat' / 'deepseek-reasoner' aliases, which route to flash non-thinking / thinking modes) when the subtask is simple enough that flash's quality suffices \u2014 flash is roughly 12\xD7 cheaper."
4086
4191
  }
4087
4192
  },
4088
4193
  required: ["task"]
@@ -6245,6 +6350,8 @@ function appendUsage(input) {
6245
6350
  costUsd: costUsd(input.model, input.usage),
6246
6351
  claudeEquivUsd: claudeEquivalentCost(input.usage)
6247
6352
  };
6353
+ if (input.kind === "subagent") record.kind = "subagent";
6354
+ if (input.subagent) record.subagent = input.subagent;
6248
6355
  const path = input.path ?? defaultUsageLogPath();
6249
6356
  try {
6250
6357
  mkdirSync6(dirname7(path), { recursive: true });
@@ -6318,6 +6425,10 @@ function aggregateUsage(records, opts = {}) {
6318
6425
  const sessionCounts = /* @__PURE__ */ new Map();
6319
6426
  let firstSeen = null;
6320
6427
  let lastSeen = null;
6428
+ const skillCounts = /* @__PURE__ */ new Map();
6429
+ let subagentTotal = 0;
6430
+ let subagentCost = 0;
6431
+ let subagentDuration = 0;
6321
6432
  for (const r of records) {
6322
6433
  addToBucket(all, r);
6323
6434
  if (r.ts >= today.since) addToBucket(today, r);
@@ -6328,15 +6439,34 @@ function aggregateUsage(records, opts = {}) {
6328
6439
  sessionCounts.set(sessKey, (sessionCounts.get(sessKey) ?? 0) + 1);
6329
6440
  if (firstSeen === null || r.ts < firstSeen) firstSeen = r.ts;
6330
6441
  if (lastSeen === null || r.ts > lastSeen) lastSeen = r.ts;
6442
+ if (r.kind === "subagent") {
6443
+ subagentTotal += 1;
6444
+ subagentCost += r.costUsd;
6445
+ const dur = r.subagent?.durationMs ?? 0;
6446
+ subagentDuration += dur;
6447
+ const key = r.subagent?.skillName?.trim() || "(adhoc)";
6448
+ const prev = skillCounts.get(key) ?? { count: 0, costUsd: 0, durationMs: 0 };
6449
+ prev.count += 1;
6450
+ prev.costUsd += r.costUsd;
6451
+ prev.durationMs += dur;
6452
+ skillCounts.set(key, prev);
6453
+ }
6331
6454
  }
6332
6455
  const byModel = Array.from(modelCounts.entries()).map(([model, turns]) => ({ model, turns })).sort((a, b) => b.turns - a.turns);
6333
6456
  const bySession = Array.from(sessionCounts.entries()).map(([session, turns]) => ({ session, turns })).sort((a, b) => b.turns - a.turns);
6457
+ const subagents = subagentTotal > 0 ? {
6458
+ total: subagentTotal,
6459
+ costUsd: subagentCost,
6460
+ totalDurationMs: subagentDuration,
6461
+ bySkill: Array.from(skillCounts.entries()).map(([skillName, v]) => ({ skillName, ...v })).sort((a, b) => b.count - a.count)
6462
+ } : void 0;
6334
6463
  return {
6335
6464
  buckets: [today, week, month, all],
6336
6465
  byModel,
6337
6466
  bySession,
6338
6467
  firstSeen,
6339
- lastSeen
6468
+ lastSeen,
6469
+ subagents
6340
6470
  };
6341
6471
  }
6342
6472
  function formatLogSize(path = defaultUsageLogPath()) {