@llmist/cli 15.11.0 → 15.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -51,7 +51,11 @@ var OPTION_FLAGS = {
51
51
  maxRetries: "--max-retries <count>",
52
52
  retryMinTimeout: "--retry-min-timeout <ms>",
53
53
  retryMaxTimeout: "--retry-max-timeout <ms>",
54
- noRetry: "--no-retry"
54
+ noRetry: "--no-retry",
55
+ // Reasoning options
56
+ reasoning: "--reasoning [effort]",
57
+ noReasoning: "--no-reasoning",
58
+ reasoningBudget: "--reasoning-budget <tokens>"
55
59
  };
56
60
  var OPTION_DESCRIPTIONS = {
57
61
  model: "Model identifier, e.g. openai:gpt-5-nano or anthropic:claude-sonnet-4-5.",
@@ -88,7 +92,11 @@ var OPTION_DESCRIPTIONS = {
88
92
  maxRetries: "Maximum retry attempts for failed API calls.",
89
93
  retryMinTimeout: "Initial retry delay in milliseconds.",
90
94
  retryMaxTimeout: "Maximum retry delay in milliseconds.",
91
- noRetry: "Disable retry logic for API calls."
95
+ noRetry: "Disable retry logic for API calls.",
96
+ // Reasoning descriptions
97
+ reasoning: "Enable reasoning mode. Optional effort: none, low, medium, high, maximum (default: medium).",
98
+ noReasoning: "Disable auto-enabled reasoning for reasoning-capable models.",
99
+ reasoningBudget: "Explicit reasoning token budget (Anthropic/Gemini 2.5). Overrides effort level."
92
100
  };
93
101
  var SUMMARY_PREFIX = "[llmist]";
94
102
 
@@ -98,7 +106,7 @@ import { Command, InvalidArgumentError as InvalidArgumentError2 } from "commande
98
106
  // package.json
99
107
  var package_default = {
100
108
  name: "@llmist/cli",
101
- version: "15.11.0",
109
+ version: "15.13.0",
102
110
  description: "CLI for llmist - run LLM agents from the command line",
103
111
  type: "module",
104
112
  main: "dist/cli.js",
@@ -154,7 +162,7 @@ var package_default = {
154
162
  node: ">=22.0.0"
155
163
  },
156
164
  dependencies: {
157
- llmist: "^15.11.0",
165
+ llmist: "^15.13.0",
158
166
  "@unblessed/node": "^1.0.0-alpha.23",
159
167
  chalk: "^5.6.2",
160
168
  commander: "^12.1.0",
@@ -168,7 +176,7 @@ var package_default = {
168
176
  zod: "^4.1.12"
169
177
  },
170
178
  devDependencies: {
171
- "@llmist/testing": "^15.11.0",
179
+ "@llmist/testing": "^15.13.0",
172
180
  "@types/diff": "^8.0.0",
173
181
  "@types/js-yaml": "^4.0.9",
174
182
  "@types/marked-terminal": "^6.1.1",
@@ -180,7 +188,7 @@ var package_default = {
180
188
  };
181
189
 
182
190
  // src/agent-command.ts
183
- import { AgentBuilder, GadgetRegistry, isAbortError, text } from "llmist";
191
+ import { AgentBuilder, GadgetRegistry, HookPresets, isAbortError, text } from "llmist";
184
192
 
185
193
  // src/builtin-gadgets.ts
186
194
  import { createGadget, HumanInputRequiredException, TaskCompletionSignal } from "llmist";
@@ -420,6 +428,7 @@ var COMPLETE_CONFIG_KEYS = /* @__PURE__ */ new Set([
420
428
  "log-llm-requests",
421
429
  "rate-limits",
422
430
  "retry",
431
+ "reasoning",
423
432
  "type"
424
433
  // Allowed for inheritance compatibility, ignored for built-in commands
425
434
  ]);
@@ -452,6 +461,7 @@ var AGENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
452
461
  "log-llm-requests",
453
462
  "rate-limits",
454
463
  "retry",
464
+ "reasoning",
455
465
  "type"
456
466
  // Allowed for inheritance compatibility, ignored for built-in commands
457
467
  ]);
@@ -684,6 +694,39 @@ function validateRetryConfig(value, section) {
684
694
  }
685
695
  return result;
686
696
  }
697
+ var REASONING_CONFIG_KEYS = /* @__PURE__ */ new Set(["enabled", "effort", "budget-tokens"]);
698
+ var VALID_REASONING_EFFORTS = /* @__PURE__ */ new Set(["none", "low", "medium", "high", "maximum"]);
699
+ function validateReasoningConfig(value, section) {
700
+ if (typeof value !== "object" || value === null || Array.isArray(value)) {
701
+ throw new ConfigError(`[${section}] must be a table`);
702
+ }
703
+ const raw = value;
704
+ const result = {};
705
+ for (const [key, val] of Object.entries(raw)) {
706
+ if (!REASONING_CONFIG_KEYS.has(key)) {
707
+ throw new ConfigError(`[${section}] has unknown key: ${key}`);
708
+ }
709
+ switch (key) {
710
+ case "enabled":
711
+ result.enabled = validateBoolean(val, key, section);
712
+ break;
713
+ case "effort": {
714
+ const effort = validateString(val, key, section);
715
+ if (!VALID_REASONING_EFFORTS.has(effort)) {
716
+ throw new ConfigError(
717
+ `[${section}].effort must be one of: none, low, medium, high, maximum (got "${effort}")`
718
+ );
719
+ }
720
+ result.effort = effort;
721
+ break;
722
+ }
723
+ case "budget-tokens":
724
+ result["budget-tokens"] = validateNumber(val, key, section, { integer: true, min: 1 });
725
+ break;
726
+ }
727
+ }
728
+ return result;
729
+ }
687
730
  function validateGlobalSubagentConfig(value, section) {
688
731
  if (typeof value !== "object" || value === null || Array.isArray(value)) {
689
732
  throw new ConfigError(`[${section}] must be a table`);
@@ -850,6 +893,9 @@ function validateCompleteConfig(raw, section) {
850
893
  if ("retry" in rawObj) {
851
894
  result.retry = validateRetryConfig(rawObj.retry, `${section}.retry`);
852
895
  }
896
+ if ("reasoning" in rawObj) {
897
+ result.reasoning = validateReasoningConfig(rawObj.reasoning, `${section}.reasoning`);
898
+ }
853
899
  return result;
854
900
  }
855
901
  function validateAgentConfig(raw, section) {
@@ -947,6 +993,9 @@ function validateAgentConfig(raw, section) {
947
993
  if ("retry" in rawObj) {
948
994
  result.retry = validateRetryConfig(rawObj.retry, `${section}.retry`);
949
995
  }
996
+ if ("reasoning" in rawObj) {
997
+ result.reasoning = validateReasoningConfig(rawObj.reasoning, `${section}.reasoning`);
998
+ }
950
999
  return result;
951
1000
  }
952
1001
  function validateImageConfig(raw, section) {
@@ -2800,27 +2849,6 @@ async function loadGadgets(specifiers, cwd, importer = createTypeScriptImporter(
2800
2849
  return gadgets;
2801
2850
  }
2802
2851
 
2803
- // src/llm-logging.ts
2804
- import { mkdir, writeFile as writeFile2 } from "fs/promises";
2805
- import { join as join2 } from "path";
2806
- import { extractMessageText } from "llmist";
2807
- function formatLlmRequest(messages) {
2808
- const lines = [];
2809
- for (const msg of messages) {
2810
- lines.push(`=== ${msg.role.toUpperCase()} ===`);
2811
- lines.push(msg.content ? extractMessageText(msg.content) : "");
2812
- lines.push("");
2813
- }
2814
- return lines.join("\n");
2815
- }
2816
- async function writeLogFile(dir, filename, content) {
2817
- await mkdir(dir, { recursive: true });
2818
- await writeFile2(join2(dir, filename), content, "utf-8");
2819
- }
2820
- function formatCallNumber(n) {
2821
- return n.toString().padStart(4, "0");
2822
- }
2823
-
2824
2852
  // src/utils.ts
2825
2853
  import chalk2 from "chalk";
2826
2854
  import { InvalidArgumentError } from "commander";
@@ -3185,6 +3213,8 @@ var StreamProgress = class {
3185
3213
  // Cache token tracking for live cost estimation during streaming
3186
3214
  callCachedInputTokens = 0;
3187
3215
  callCacheCreationInputTokens = 0;
3216
+ // Reasoning token tracking for live cost estimation during streaming
3217
+ callReasoningTokens = 0;
3188
3218
  // Cumulative stats (cumulative mode)
3189
3219
  totalStartTime = Date.now();
3190
3220
  totalTokens = 0;
@@ -3303,6 +3333,7 @@ var StreamProgress = class {
3303
3333
  if (info.cachedInputTokens !== void 0) agent.cachedInputTokens = info.cachedInputTokens;
3304
3334
  if (info.cacheCreationInputTokens !== void 0)
3305
3335
  agent.cacheCreationInputTokens = info.cacheCreationInputTokens;
3336
+ if (info.reasoningTokens !== void 0) agent.reasoningTokens = info.reasoningTokens;
3306
3337
  if (info.finishReason !== void 0) agent.finishReason = info.finishReason;
3307
3338
  if (info.cost !== void 0) {
3308
3339
  agent.cost = info.cost;
@@ -3314,7 +3345,8 @@ var StreamProgress = class {
3314
3345
  agent.inputTokens ?? 0,
3315
3346
  agent.outputTokens,
3316
3347
  agent.cachedInputTokens,
3317
- agent.cacheCreationInputTokens
3348
+ agent.cacheCreationInputTokens,
3349
+ agent.reasoningTokens
3318
3350
  );
3319
3351
  agent.cost = costResult?.totalCost;
3320
3352
  } catch {
@@ -3428,6 +3460,7 @@ var StreamProgress = class {
3428
3460
  this.isStreaming = false;
3429
3461
  this.callCachedInputTokens = 0;
3430
3462
  this.callCacheCreationInputTokens = 0;
3463
+ this.callReasoningTokens = 0;
3431
3464
  this.start();
3432
3465
  }
3433
3466
  /**
@@ -3446,7 +3479,8 @@ var StreamProgress = class {
3446
3479
  usage.inputTokens,
3447
3480
  usage.outputTokens,
3448
3481
  usage.cachedInputTokens ?? 0,
3449
- usage.cacheCreationInputTokens ?? 0
3482
+ usage.cacheCreationInputTokens ?? 0,
3483
+ usage.reasoningTokens ?? 0
3450
3484
  );
3451
3485
  if (cost) {
3452
3486
  this.totalCost += cost.totalCost;
@@ -3505,6 +3539,14 @@ var StreamProgress = class {
3505
3539
  this.callCachedInputTokens = cachedInputTokens;
3506
3540
  this.callCacheCreationInputTokens = cacheCreationInputTokens;
3507
3541
  }
3542
+ /**
3543
+ * Sets reasoning token count for the current call (from stream metadata).
3544
+ * Used for live cost estimation during streaming.
3545
+ * @param reasoningTokens - Number of reasoning/thinking tokens (subset of outputTokens)
3546
+ */
3547
+ setReasoningTokens(reasoningTokens) {
3548
+ this.callReasoningTokens = reasoningTokens;
3549
+ }
3508
3550
  /**
3509
3551
  * Get total elapsed time in seconds since the first call started.
3510
3552
  * @returns Elapsed time in seconds with 1 decimal place
@@ -3746,7 +3788,8 @@ var StreamProgress = class {
3746
3788
  this.callInputTokens,
3747
3789
  outputTokens,
3748
3790
  this.callCachedInputTokens,
3749
- this.callCacheCreationInputTokens
3791
+ this.callCacheCreationInputTokens,
3792
+ this.callReasoningTokens
3750
3793
  );
3751
3794
  return cost?.totalCost ?? 0;
3752
3795
  } catch {
@@ -3967,7 +4010,11 @@ function addCompleteOptions(cmd, defaults) {
3967
4010
  OPTION_DESCRIPTIONS.retryMaxTimeout,
3968
4011
  createNumericParser({ label: "Max timeout", integer: true, min: 0 }),
3969
4012
  defaults?.retry?.["max-timeout"]
3970
- ).option(OPTION_FLAGS.noRetry, OPTION_DESCRIPTIONS.noRetry);
4013
+ ).option(OPTION_FLAGS.noRetry, OPTION_DESCRIPTIONS.noRetry).option(OPTION_FLAGS.reasoning, OPTION_DESCRIPTIONS.reasoning).option(OPTION_FLAGS.noReasoning, OPTION_DESCRIPTIONS.noReasoning).option(
4014
+ OPTION_FLAGS.reasoningBudget,
4015
+ OPTION_DESCRIPTIONS.reasoningBudget,
4016
+ createNumericParser({ label: "Reasoning budget", integer: true, min: 1 })
4017
+ );
3971
4018
  }
3972
4019
  function addAgentOptions(cmd, defaults) {
3973
4020
  const gadgetAccumulator = (value, previous = []) => [
@@ -4030,7 +4077,11 @@ function addAgentOptions(cmd, defaults) {
4030
4077
  OPTION_DESCRIPTIONS.retryMaxTimeout,
4031
4078
  createNumericParser({ label: "Max timeout", integer: true, min: 0 }),
4032
4079
  defaults?.retry?.["max-timeout"]
4033
- ).option(OPTION_FLAGS.noRetry, OPTION_DESCRIPTIONS.noRetry);
4080
+ ).option(OPTION_FLAGS.noRetry, OPTION_DESCRIPTIONS.noRetry).option(OPTION_FLAGS.reasoning, OPTION_DESCRIPTIONS.reasoning).option(OPTION_FLAGS.noReasoning, OPTION_DESCRIPTIONS.noReasoning).option(
4081
+ OPTION_FLAGS.reasoningBudget,
4082
+ OPTION_DESCRIPTIONS.reasoningBudget,
4083
+ createNumericParser({ label: "Reasoning budget", integer: true, min: 1 })
4084
+ );
4034
4085
  }
4035
4086
  function configToCompleteOptions(config) {
4036
4087
  const result = {};
@@ -4055,6 +4106,9 @@ function configToCompleteOptions(config) {
4055
4106
  if (r["max-timeout"] !== void 0) result.retryMaxTimeout = r["max-timeout"];
4056
4107
  if (r.enabled === false) result.noRetry = true;
4057
4108
  }
4109
+ if (config.reasoning) {
4110
+ result.profileReasoning = config.reasoning;
4111
+ }
4058
4112
  return result;
4059
4113
  }
4060
4114
  function configToAgentOptions(config) {
@@ -4094,6 +4148,9 @@ function configToAgentOptions(config) {
4094
4148
  if (r["max-timeout"] !== void 0) result.retryMaxTimeout = r["max-timeout"];
4095
4149
  if (r.enabled === false) result.noRetry = true;
4096
4150
  }
4151
+ if (config.reasoning) {
4152
+ result.profileReasoning = config.reasoning;
4153
+ }
4097
4154
  if (config["show-hints"] !== void 0) result.showHints = config["show-hints"];
4098
4155
  return result;
4099
4156
  }
@@ -4348,6 +4405,9 @@ function formatLLMCallCollapsed(node, selected) {
4348
4405
  if (d.outputTokens && d.outputTokens > 0) {
4349
4406
  parts.push(chalk3.dim("\u2193") + chalk3.green(` ${formatTokens(d.outputTokens)}`));
4350
4407
  }
4408
+ if (d.reasoningTokens && d.reasoningTokens > 0) {
4409
+ parts.push(chalk3.dim("\u{1F4AD}") + chalk3.magenta(` ${formatTokens(d.reasoningTokens)}`));
4410
+ }
4351
4411
  if (d.elapsedSeconds !== void 0) {
4352
4412
  parts.push(chalk3.dim(`${d.elapsedSeconds.toFixed(1)}s`));
4353
4413
  }
@@ -4395,6 +4455,11 @@ function formatLLMCallExpanded(node) {
4395
4455
  `${indent}${chalk3.dim(BOX.vertical)} Output: ${chalk3.green(formatTokens(d.outputTokens))} tokens`
4396
4456
  );
4397
4457
  }
4458
+ if (d.reasoningTokens !== void 0 && d.reasoningTokens > 0) {
4459
+ lines.push(
4460
+ `${indent}${chalk3.dim(BOX.vertical)} Reason: ${chalk3.magenta(formatTokens(d.reasoningTokens))} tokens`
4461
+ );
4462
+ }
4398
4463
  if (d.contextPercent !== void 0) {
4399
4464
  let contextColor = chalk3.green;
4400
4465
  if (d.contextPercent >= 80) contextColor = chalk3.red;
@@ -4654,6 +4719,8 @@ var BlockRenderer = class _BlockRenderer {
4654
4719
  nodeIdCounter = 0;
4655
4720
  /** Current LLM call node (for adding gadget children) */
4656
4721
  currentLLMCallId = null;
4722
+ /** Current thinking block (accumulates chunks during streaming) */
4723
+ currentThinkingId = null;
4657
4724
  /** Persisted expanded states (survives rebuildBlocks) */
4658
4725
  expandedStates = /* @__PURE__ */ new Map();
4659
4726
  /** Whether to auto-scroll to bottom on new content ("follow mode") */
@@ -4889,6 +4956,66 @@ var BlockRenderer = class _BlockRenderer {
4889
4956
  this.rebuildBlocks();
4890
4957
  return id;
4891
4958
  }
4959
+ /**
4960
+ * Add thinking content from a reasoning model.
4961
+ * Creates a new thinking block on first chunk, appends to existing on subsequent chunks.
4962
+ * The block lives as a child of the current LLM call.
4963
+ *
4964
+ * @param content - Thinking text chunk
4965
+ * @param thinkingType - Whether this is actual thinking or redacted content
4966
+ */
4967
+ addThinking(content, thinkingType) {
4968
+ if (this.currentThinkingId) {
4969
+ const node2 = this.getNode(this.currentThinkingId);
4970
+ if (node2 && node2.type === "thinking") {
4971
+ node2.content += content;
4972
+ this.updateBlock(this.currentThinkingId);
4973
+ return;
4974
+ }
4975
+ }
4976
+ const id = this.generateId("thinking");
4977
+ const parentLLMCallId = this.currentLLMCallId;
4978
+ let depth = 0;
4979
+ if (parentLLMCallId) {
4980
+ const parent = this.getNode(parentLLMCallId);
4981
+ if (parent) {
4982
+ depth = parent.depth + 1;
4983
+ }
4984
+ }
4985
+ const node = {
4986
+ id,
4987
+ type: "thinking",
4988
+ depth,
4989
+ parentId: parentLLMCallId,
4990
+ sessionId: this.currentSessionId,
4991
+ content,
4992
+ thinkingType,
4993
+ isComplete: false,
4994
+ children: []
4995
+ };
4996
+ this.nodes.set(id, node);
4997
+ if (parentLLMCallId) {
4998
+ const parent = this.getNode(parentLLMCallId);
4999
+ parent.children.push(id);
5000
+ } else {
5001
+ this.rootIds.push(id);
5002
+ }
5003
+ this.currentThinkingId = id;
5004
+ this.rebuildBlocks();
5005
+ }
5006
+ /**
5007
+ * Complete the current thinking block.
5008
+ * Called when the LLM call finishes to mark thinking as complete.
5009
+ */
5010
+ completeThinking() {
5011
+ if (!this.currentThinkingId) return;
5012
+ const node = this.getNode(this.currentThinkingId);
5013
+ if (node && node.type === "thinking") {
5014
+ node.isComplete = true;
5015
+ this.updateBlock(this.currentThinkingId);
5016
+ }
5017
+ this.currentThinkingId = null;
5018
+ }
4892
5019
  /**
4893
5020
  * Add a user message block (for REPL mid-session input).
4894
5021
  *
@@ -4972,6 +5099,7 @@ var BlockRenderer = class _BlockRenderer {
4972
5099
  this.selectableIds = [];
4973
5100
  this.selectedIndex = -1;
4974
5101
  this.currentLLMCallId = null;
5102
+ this.currentThinkingId = null;
4975
5103
  for (const child of [...this.container.children]) {
4976
5104
  child.detach();
4977
5105
  }
@@ -5301,6 +5429,25 @@ ${fullContent}
5301
5429
  }
5302
5430
  return this.abbreviateToLines(fullContent, 2, selected);
5303
5431
  }
5432
+ case "thinking": {
5433
+ const DIM2 = "\x1B[2m";
5434
+ const RED_DIM = "\x1B[2;31m";
5435
+ const RESET3 = "\x1B[0m";
5436
+ const contIndent = getContinuationIndent(node.depth);
5437
+ if (node.thinkingType === "redacted") {
5438
+ const header2 = `${indent}${RED_DIM}\u{1F512} [Redacted thinking block]${RESET3}`;
5439
+ return header2;
5440
+ }
5441
+ if (!expanded) {
5442
+ const firstLine = node.content.split("\n")[0]?.slice(0, 60) ?? "";
5443
+ const suffix = node.isComplete ? "" : "...";
5444
+ return `${indent}${DIM2}\u{1F4AD} Thinking${suffix} ${firstLine}${RESET3}`;
5445
+ }
5446
+ const tokenInfo = node.isComplete ? ` (${Math.ceil(node.content.length / 4)} tokens est.)` : "";
5447
+ const header = `${indent}${DIM2}\u25BC \u{1F4AD} Thinking${tokenInfo}${RESET3}`;
5448
+ const contentLines = node.content.split("\n").map((line) => `${contIndent}${DIM2}${line}${RESET3}`);
5449
+ return [header, ...contentLines].join("\n");
5450
+ }
5304
5451
  case "system_message": {
5305
5452
  const icon = this.getSystemMessageIcon(node.category);
5306
5453
  const color = this.getSystemMessageColor(node.category);
@@ -5640,6 +5787,8 @@ ${indicator}`;
5640
5787
  return true;
5641
5788
  case "llm_call":
5642
5789
  return false;
5790
+ case "thinking":
5791
+ return false;
5643
5792
  case "gadget": {
5644
5793
  const name = node.name;
5645
5794
  return name === "TellUser" || name === "AskUser" || name === "Finish";
@@ -5701,6 +5850,7 @@ ${indicator}`;
5701
5850
  handleTreeEvent(event, tree) {
5702
5851
  switch (event.type) {
5703
5852
  case "llm_call_start": {
5853
+ this.currentThinkingId = null;
5704
5854
  let parentBlockId;
5705
5855
  if (event.parentId) {
5706
5856
  parentBlockId = this.treeNodeToBlockId.get(event.parentId);
@@ -5719,12 +5869,14 @@ ${indicator}`;
5719
5869
  break;
5720
5870
  }
5721
5871
  case "llm_call_complete": {
5872
+ this.completeThinking();
5722
5873
  const blockId = this.treeNodeToBlockId.get(event.nodeId);
5723
5874
  if (blockId) {
5724
5875
  this.completeLLMCall(blockId, {
5725
5876
  inputTokens: event.usage?.inputTokens,
5726
5877
  cachedInputTokens: event.usage?.cachedInputTokens,
5727
5878
  outputTokens: event.usage?.outputTokens,
5879
+ reasoningTokens: event.usage?.reasoningTokens,
5728
5880
  cost: event.cost,
5729
5881
  finishReason: event.finishReason ?? void 0
5730
5882
  });
@@ -5735,6 +5887,10 @@ ${indicator}`;
5735
5887
  }
5736
5888
  break;
5737
5889
  }
5890
+ case "thinking": {
5891
+ this.addThinking(event.content, event.thinkingType);
5892
+ break;
5893
+ }
5738
5894
  case "gadget_call": {
5739
5895
  let parentBlockId;
5740
5896
  if (event.parentId) {
@@ -6154,10 +6310,10 @@ var HintsBar = class {
6154
6310
  import { spawnSync as spawnSync2 } from "child_process";
6155
6311
  import { readFileSync as readFileSync4, unlinkSync, writeFileSync as writeFileSync2 } from "fs";
6156
6312
  import { tmpdir } from "os";
6157
- import { join as join3 } from "path";
6313
+ import { join as join2 } from "path";
6158
6314
  function openEditorSync(initialContent = "") {
6159
6315
  const editor = process.env.VISUAL || process.env.EDITOR || "vi";
6160
- const tmpFile = join3(tmpdir(), `llmist-input-${Date.now()}.txt`);
6316
+ const tmpFile = join2(tmpdir(), `llmist-input-${Date.now()}.txt`);
6161
6317
  writeFileSync2(tmpFile, initialContent, "utf-8");
6162
6318
  try {
6163
6319
  const parts = editor.split(/\s+/);
@@ -7425,6 +7581,7 @@ var StatusBar = class {
7425
7581
  inputTokens: 0,
7426
7582
  outputTokens: 0,
7427
7583
  cachedTokens: 0,
7584
+ reasoningTokens: 0,
7428
7585
  cost: 0,
7429
7586
  startTime: Date.now(),
7430
7587
  iteration: 0,
@@ -7458,10 +7615,11 @@ var StatusBar = class {
7458
7615
  * Called when an LLM call completes.
7459
7616
  * Replaces streaming estimates with actual values.
7460
7617
  */
7461
- endCall(inputTokens, outputTokens, cachedTokens, cost) {
7618
+ endCall(inputTokens, outputTokens, cachedTokens, cost, reasoningTokens = 0) {
7462
7619
  this.metrics.inputTokens += inputTokens;
7463
7620
  this.metrics.outputTokens += outputTokens;
7464
7621
  this.metrics.cachedTokens += cachedTokens;
7622
+ this.metrics.reasoningTokens += reasoningTokens;
7465
7623
  this.metrics.cost += cost;
7466
7624
  this.streamingInputTokens = 0;
7467
7625
  this.streamingOutputTokens = 0;
@@ -7661,7 +7819,8 @@ var StatusBar = class {
7661
7819
  event.usage?.inputTokens ?? 0,
7662
7820
  event.usage?.outputTokens ?? 0,
7663
7821
  event.usage?.cachedInputTokens ?? 0,
7664
- event.cost ?? 0
7822
+ event.cost ?? 0,
7823
+ event.usage?.reasoningTokens ?? 0
7665
7824
  );
7666
7825
  }
7667
7826
  this.nodeIdToLabel.delete(event.nodeId);
@@ -7826,6 +7985,9 @@ var StatusBar = class {
7826
7985
  const outputPrefix = this.isStreaming ? "~" : "";
7827
7986
  parts.push(`${GREEN2}\u2193${outputPrefix}${formatTokens(displayOutputTokens)}${RESET3}`);
7828
7987
  }
7988
+ if (this.metrics.reasoningTokens > 0) {
7989
+ parts.push(`${MAGENTA2}\u{1F4AD}${formatTokens(this.metrics.reasoningTokens)}${RESET3}`);
7990
+ }
7829
7991
  const earliestStart = this.getEarliestLLMCallStartTime();
7830
7992
  if (earliestStart !== null) {
7831
7993
  const elapsedSeconds = (Date.now() - earliestStart) / 1e3;
@@ -8069,6 +8231,8 @@ var TUIApp = class _TUIApp {
8069
8231
  handleEvent(event) {
8070
8232
  if (event.type === "text") {
8071
8233
  this.blockRenderer.addText(event.content);
8234
+ } else if (event.type === "thinking") {
8235
+ this.blockRenderer.addThinking(event.content, event.thinkingType);
8072
8236
  }
8073
8237
  }
8074
8238
  /**
@@ -8561,40 +8725,20 @@ async function executeAgent(promptArg, options, env, commandName) {
8561
8725
  let iterations = 0;
8562
8726
  const llmLogsEnabled = options.logLlmRequests === true;
8563
8727
  const llmLogDir = llmLogsEnabled ? env.session?.logDir : void 0;
8564
- let llmCallCounter = 0;
8565
- const _countGadgetOutputTokens = async (output) => {
8566
- if (!output) return void 0;
8567
- try {
8568
- const messages = [{ role: "assistant", content: output }];
8569
- return await client.countTokens(options.model, messages);
8570
- } catch {
8571
- return void 0;
8572
- }
8573
- };
8574
8728
  const resolvedSubagentConfig = buildSubagentConfigMap(
8575
8729
  options.model,
8576
8730
  options.subagents,
8577
8731
  options.globalSubagents
8578
8732
  );
8579
- const builder = new AgentBuilder(client).withModel(options.model).withSubagentConfig(resolvedSubagentConfig).withLogger(env.createLogger("llmist:cli:agent")).withHooks({
8733
+ const tuiHooks = {
8580
8734
  observers: {
8581
8735
  // onLLMCallStart: Track iteration for status bar label formatting
8582
8736
  onLLMCallStart: async (context) => {
8583
8737
  if (context.subagentContext) return;
8584
- llmCallCounter++;
8585
8738
  if (tui) {
8586
8739
  tui.showLLMCallStart(iterations + 1);
8587
8740
  }
8588
8741
  },
8589
- // onLLMCallReady: Log the exact request being sent to the LLM
8590
- onLLMCallReady: async (context) => {
8591
- if (context.subagentContext) return;
8592
- if (llmLogDir) {
8593
- const filename = `${formatCallNumber(llmCallCounter)}.request`;
8594
- const content = formatLlmRequest(context.options.messages);
8595
- await writeLogFile(llmLogDir, filename, content);
8596
- }
8597
- },
8598
8742
  // onStreamChunk: Update status bar with real-time output token estimate
8599
8743
  onStreamChunk: async (context) => {
8600
8744
  if (context.subagentContext) return;
@@ -8602,15 +8746,11 @@ async function executeAgent(promptArg, options, env, commandName) {
8602
8746
  const estimatedOutputTokens = StatusBar.estimateTokens(context.accumulatedText);
8603
8747
  tui.updateStreamingTokens(estimatedOutputTokens);
8604
8748
  },
8605
- // onLLMCallComplete: Capture metadata for final summary and file logging
8749
+ // onLLMCallComplete: Capture metadata for final summary
8606
8750
  onLLMCallComplete: async (context) => {
8607
8751
  if (context.subagentContext) return;
8608
8752
  _usage = context.usage;
8609
8753
  iterations = Math.max(iterations, context.iteration + 1);
8610
- if (llmLogDir) {
8611
- const filename = `${formatCallNumber(llmCallCounter)}.response`;
8612
- await writeLogFile(llmLogDir, filename, context.rawResponse);
8613
- }
8614
8754
  if (tui) {
8615
8755
  tui.clearRetry();
8616
8756
  }
@@ -8718,7 +8858,9 @@ ${ctx.gadgetName} requires interactive approval. Run in a terminal to approve.`
8718
8858
  };
8719
8859
  }
8720
8860
  }
8721
- });
8861
+ };
8862
+ const finalHooks = llmLogDir ? HookPresets.merge(HookPresets.fileLogging({ directory: llmLogDir }), tuiHooks) : tuiHooks;
8863
+ const builder = new AgentBuilder(client).withModel(options.model).withSubagentConfig(resolvedSubagentConfig).withLogger(env.createLogger("llmist:cli:agent")).withHooks(finalHooks);
8722
8864
  const rateLimitConfig = resolveRateLimitConfig(
8723
8865
  options,
8724
8866
  options.globalRateLimits,
@@ -8741,6 +8883,27 @@ ${ctx.gadgetName} requires interactive approval. Run in a terminal to approve.`
8741
8883
  if (options.temperature !== void 0) {
8742
8884
  builder.withTemperature(options.temperature);
8743
8885
  }
8886
+ if (options.reasoning === false) {
8887
+ builder.withoutReasoning();
8888
+ } else if (options.reasoning !== void 0 || options.reasoningBudget !== void 0) {
8889
+ const effort = typeof options.reasoning === "string" ? options.reasoning : void 0;
8890
+ builder.withReasoning({
8891
+ enabled: true,
8892
+ ...effort && { effort },
8893
+ ...options.reasoningBudget && { budgetTokens: options.reasoningBudget }
8894
+ });
8895
+ } else if (options.profileReasoning) {
8896
+ const cfg = options.profileReasoning;
8897
+ if (cfg.enabled === false) {
8898
+ builder.withoutReasoning();
8899
+ } else {
8900
+ builder.withReasoning({
8901
+ enabled: true,
8902
+ ...cfg.effort && { effort: cfg.effort },
8903
+ ...cfg["budget-tokens"] && { budgetTokens: cfg["budget-tokens"] }
8904
+ });
8905
+ }
8906
+ }
8744
8907
  if (tui) {
8745
8908
  builder.onHumanInput(async (question) => {
8746
8909
  return tui.waitForInput(question, "AskUser");
@@ -8831,6 +8994,11 @@ ${ctx.gadgetName} requires interactive approval. Run in a terminal to approve.`
8831
8994
  } else {
8832
8995
  if (event.type === "text") {
8833
8996
  env.stdout.write(event.content);
8997
+ } else if (event.type === "thinking") {
8998
+ const stderrTTY = env.stderr.isTTY === true;
8999
+ if (stderrTTY && !options.quiet) {
9000
+ env.stderr.write(`\x1B[2m${event.content}\x1B[0m`);
9001
+ }
8834
9002
  } else if (event.type === "gadget_result" && event.result.gadgetName === "TellUser" && event.result.result) {
8835
9003
  env.stdout.write(`${event.result.result}
8836
9004
  `);
@@ -8897,6 +9065,7 @@ function registerAgentCommand(program, env, config, globalSubagents, globalRateL
8897
9065
  globalRetry,
8898
9066
  profileRateLimits: config?.["rate-limits"],
8899
9067
  profileRetry: config?.retry,
9068
+ profileReasoning: config?.reasoning,
8900
9069
  showHints: config?.["show-hints"]
8901
9070
  };
8902
9071
  return executeAgent(prompt, mergedOptions, env, "agent");
@@ -8905,7 +9074,24 @@ function registerAgentCommand(program, env, config, globalSubagents, globalRateL
8905
9074
  }
8906
9075
 
8907
9076
  // src/complete-command.ts
8908
- import { FALLBACK_CHARS_PER_TOKEN as FALLBACK_CHARS_PER_TOKEN2, LLMMessageBuilder, resolveModel as resolveModel2, text as text2 } from "llmist";
9077
+ import {
9078
+ FALLBACK_CHARS_PER_TOKEN as FALLBACK_CHARS_PER_TOKEN2,
9079
+ formatLlmRequest as formatLlmRequest2,
9080
+ LLMMessageBuilder,
9081
+ resolveModel as resolveModel2,
9082
+ text as text2
9083
+ } from "llmist";
9084
+
9085
+ // src/llm-logging.ts
9086
+ import { mkdir, writeFile as writeFile2 } from "fs/promises";
9087
+ import { join as join3 } from "path";
9088
+ import { formatCallNumber, formatLlmRequest } from "llmist";
9089
+ async function writeLogFile(dir, filename, content) {
9090
+ await mkdir(dir, { recursive: true });
9091
+ await writeFile2(join3(dir, filename), content, "utf-8");
9092
+ }
9093
+
9094
+ // src/complete-command.ts
8909
9095
  async function executeComplete(promptArg, options, env) {
8910
9096
  const prompt = await resolvePrompt(promptArg, env);
8911
9097
  const client = env.createClient();
@@ -8931,14 +9117,37 @@ async function executeComplete(promptArg, options, env) {
8931
9117
  const llmLogDir = llmLogsEnabled ? env.session?.logDir : void 0;
8932
9118
  if (llmLogDir) {
8933
9119
  const filename = "0001.request";
8934
- const content = formatLlmRequest(messages);
9120
+ const content = formatLlmRequest2(messages);
8935
9121
  await writeLogFile(llmLogDir, filename, content);
8936
9122
  }
9123
+ let reasoning;
9124
+ if (options.reasoning === false) {
9125
+ reasoning = { enabled: false };
9126
+ } else if (options.reasoning !== void 0 || options.reasoningBudget !== void 0) {
9127
+ const effort = typeof options.reasoning === "string" ? options.reasoning : void 0;
9128
+ reasoning = {
9129
+ enabled: true,
9130
+ ...effort && { effort },
9131
+ ...options.reasoningBudget && { budgetTokens: options.reasoningBudget }
9132
+ };
9133
+ } else if (options.profileReasoning) {
9134
+ const cfg = options.profileReasoning;
9135
+ if (cfg.enabled === false) {
9136
+ reasoning = { enabled: false };
9137
+ } else {
9138
+ reasoning = {
9139
+ enabled: true,
9140
+ ...cfg.effort && { effort: cfg.effort },
9141
+ ...cfg["budget-tokens"] && { budgetTokens: cfg["budget-tokens"] }
9142
+ };
9143
+ }
9144
+ }
8937
9145
  const stream = client.stream({
8938
9146
  model,
8939
9147
  messages,
8940
9148
  temperature: options.temperature,
8941
- maxTokens: options.maxTokens
9149
+ maxTokens: options.maxTokens,
9150
+ ...reasoning && { reasoning }
8942
9151
  });
8943
9152
  const printer = new StreamPrinter(env.stdout);
8944
9153
  const stderrTTY = env.stderr.isTTY === true;
@@ -8958,6 +9167,12 @@ async function executeComplete(promptArg, options, env) {
8958
9167
  progress.setOutputTokens(chunk.usage.outputTokens, false);
8959
9168
  }
8960
9169
  }
9170
+ if (chunk.thinking?.content) {
9171
+ if (stderrTTY && !options.quiet) {
9172
+ progress.pause();
9173
+ env.stderr.write(`\x1B[2m${chunk.thinking.content}\x1B[0m`);
9174
+ }
9175
+ }
8961
9176
  if (chunk.text) {
8962
9177
  progress.pause();
8963
9178
  accumulatedResponse += chunk.text;
@@ -8991,7 +9206,8 @@ function registerCompleteCommand(program, env, config, globalRateLimits, globalR
8991
9206
  const mergedOptions = {
8992
9207
  ...options,
8993
9208
  globalRateLimits,
8994
- globalRetry
9209
+ globalRetry,
9210
+ profileReasoning: config?.reasoning
8995
9211
  };
8996
9212
  return executeComplete(prompt, mergedOptions, env);
8997
9213
  }, env)