@pushpalsdev/cli 1.0.22 → 1.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -369,6 +369,8 @@ function loadPushPalsConfig(options = {}) {
369
369
  const debugHttp = parseBoolEnv("PUSHPALS_DEBUG_HTTP") ?? asBoolean(serverNode.debug_http, false);
370
370
  const staleClaimTtlMs = Math.max(5000, asInt(parseIntEnv("PUSHPALS_STALE_CLAIM_TTL_MS") ?? serverNode.stale_claim_ttl_ms, 120000));
371
371
  const staleClaimSweepIntervalMs = Math.max(1000, asInt(parseIntEnv("PUSHPALS_STALE_CLAIM_SWEEP_INTERVAL_MS") ?? serverNode.stale_claim_sweep_interval_ms, 5000));
372
+ const sessionTokenBudget = Math.max(0, asInt(parseIntEnv("PUSHPALS_SESSION_TOKEN_BUDGET") ?? serverNode.session_token_budget, 1e6));
373
+ const sessionTokenBudgetAction = "pause";
372
374
  const globalStatusHeartbeatMs = parseIntEnv("PUSHPALS_STATUS_HEARTBEAT_MS");
373
375
  const localNode = getObject(merged, "localbuddy");
374
376
  const localEnabled = parseBoolEnv("LOCALBUDDY_ENABLED") ?? asBoolean(localNode.enabled, false);
@@ -619,7 +621,9 @@ function loadPushPalsConfig(options = {}) {
619
621
  port: serverPort,
620
622
  debugHttp,
621
623
  staleClaimTtlMs,
622
- staleClaimSweepIntervalMs
624
+ staleClaimSweepIntervalMs,
625
+ sessionTokenBudget,
626
+ sessionTokenBudgetAction
623
627
  },
624
628
  localbuddy: {
625
629
  enabled: localEnabled,
@@ -1684,7 +1688,7 @@ async function ensureBundledMonitoringHubRoot() {
1684
1688
  return resolveBundledMonitoringHubRoot();
1685
1689
  }
1686
1690
  function repoLooksLikePushPalsSourceCheckout(repoRoot) {
1687
- return existsSync4(join2(repoRoot, "configs", "default.toml")) || existsSync4(join2(repoRoot, "config", "default.toml"));
1691
+ return existsSync4(join2(repoRoot, "configs", "default.toml"));
1688
1692
  }
1689
1693
  function parseSemverFromPackageVersion(value) {
1690
1694
  const raw = String(value ?? "").trim();
@@ -2154,6 +2158,52 @@ function stopRuntimeServices(services) {
2154
2158
  } catch {}
2155
2159
  }
2156
2160
  }
2161
+ function resolveGracefulShutdownPriority(name) {
2162
+ if (name === "source_control_manager")
2163
+ return 0;
2164
+ if (name === "remotebuddy")
2165
+ return 1;
2166
+ if (name === "localbuddy")
2167
+ return 2;
2168
+ return 3;
2169
+ }
2170
+ async function waitForRuntimeServicesExit(services, timeoutMs) {
2171
+ if (services.length === 0)
2172
+ return true;
2173
+ const deadline = Date.now() + Math.max(0, timeoutMs);
2174
+ while (Date.now() < deadline) {
2175
+ if (services.every((service) => service.exited))
2176
+ return true;
2177
+ await Bun.sleep(100);
2178
+ }
2179
+ return services.every((service) => service.exited);
2180
+ }
2181
+ async function stopRuntimeServicesGracefully(services, timeoutMs = 1e4) {
2182
+ if (services.length === 0)
2183
+ return;
2184
+ const running = services.filter((service) => !service.exited);
2185
+ if (running.length === 0)
2186
+ return;
2187
+ const ordered = [...running].sort((a, b) => resolveGracefulShutdownPriority(a.name) - resolveGracefulShutdownPriority(b.name));
2188
+ const nonServer = ordered.filter((service) => service.name !== "server");
2189
+ const server = ordered.filter((service) => service.name === "server");
2190
+ for (const service of nonServer) {
2191
+ try {
2192
+ service.proc.kill("SIGTERM");
2193
+ } catch {}
2194
+ }
2195
+ await waitForRuntimeServicesExit(nonServer, Math.max(1000, timeoutMs - 2000));
2196
+ for (const service of server) {
2197
+ try {
2198
+ service.proc.kill("SIGTERM");
2199
+ } catch {}
2200
+ }
2201
+ await waitForRuntimeServicesExit(server, Math.min(3000, timeoutMs));
2202
+ const remaining = ordered.filter((service) => !service.exited);
2203
+ if (remaining.length > 0) {
2204
+ stopRuntimeServices(remaining);
2205
+ }
2206
+ }
2157
2207
  function prependExecutableDirToPath(env, executablePath, platform = process.platform) {
2158
2208
  const resolvedPath = String(executablePath ?? "").trim();
2159
2209
  if (!resolvedPath)
@@ -2700,7 +2750,7 @@ function removeCliClearTarget(target) {
2700
2750
  };
2701
2751
  }
2702
2752
  }
2703
- async function requestLocalRuntimeShutdownForClear(serverUrl, repoRoot) {
2753
+ async function requestLocalRuntimeShutdown(serverUrl, repoRoot, reason) {
2704
2754
  if (!await probeServer(serverUrl)) {
2705
2755
  return { attempted: false, accepted: false };
2706
2756
  }
@@ -2717,7 +2767,7 @@ async function requestLocalRuntimeShutdownForClear(serverUrl, repoRoot) {
2717
2767
  const response = await fetchWithTimeout(`${serverUrl}/admin/shutdown`, {
2718
2768
  method: "POST",
2719
2769
  headers: { "Content-Type": "application/json" },
2720
- body: JSON.stringify({ reason: "pushpals --clear" })
2770
+ body: JSON.stringify({ reason })
2721
2771
  }, 5000);
2722
2772
  if (!response.ok) {
2723
2773
  const detail = await response.text().catch(() => "");
@@ -2738,7 +2788,7 @@ async function requestLocalRuntimeShutdownForClear(serverUrl, repoRoot) {
2738
2788
  }
2739
2789
  async function clearPushpalsState(opts) {
2740
2790
  console.log("[pushpals] Clear requested. Removing repo-local PushPals state.");
2741
- const shutdown = await requestLocalRuntimeShutdownForClear(opts.serverUrl, opts.repoRoot);
2791
+ const shutdown = await requestLocalRuntimeShutdown(opts.serverUrl, opts.repoRoot, "pushpals --clear");
2742
2792
  if (shutdown.attempted && shutdown.accepted) {
2743
2793
  console.log("[pushpals] Local runtime shutdown accepted; waiting for services to exit...");
2744
2794
  await Bun.sleep(1500);
@@ -3875,6 +3925,22 @@ async function main() {
3875
3925
  stopRuntimeServices(autoStartedServices);
3876
3926
  autoStartedServices = [];
3877
3927
  };
3928
+ const stopAutoStartedServicesGracefully = async (reason) => {
3929
+ if (autoStartedServices.length === 0)
3930
+ return;
3931
+ const services = autoStartedServices;
3932
+ autoStartedServices = [];
3933
+ const shutdown = await requestLocalRuntimeShutdown(serverUrl, repoRoot, reason);
3934
+ if (shutdown.attempted && shutdown.accepted) {
3935
+ console.log("[pushpals] Local runtime shutdown accepted; waiting for services to exit...");
3936
+ await Bun.sleep(1500);
3937
+ } else if (shutdown.attempted) {
3938
+ console.warn(`[pushpals] Local runtime shutdown request was not accepted${shutdown.detail ? `: ${shutdown.detail}` : "."}`);
3939
+ } else if (shutdown.detail) {
3940
+ console.warn(`[pushpals] ${shutdown.detail}`);
3941
+ }
3942
+ await stopRuntimeServicesGracefully(services);
3943
+ };
3878
3944
  let serverHealthy = await probeServer(serverUrl);
3879
3945
  const serverWasAlreadyHealthy = serverHealthy;
3880
3946
  if (!serverHealthy && workerpalDockerPrecheck.status === "failed") {
@@ -4042,26 +4108,36 @@ ${line}
4042
4108
  console.log(line);
4043
4109
  };
4044
4110
  const streamTask = parsed.noStream ? Promise.resolve() : parsed.runtimeOnly ? Promise.resolve() : runSessionStream(serverUrl, activeSessionId, cliClient, printIncoming, streamAbort.signal);
4045
- let shuttingDown = false;
4111
+ let stopPromise = null;
4046
4112
  const requestStop = () => {
4047
- if (shuttingDown)
4048
- return;
4049
- shuttingDown = true;
4050
- console.log("[pushpals] Shutting down CLI session...");
4051
- streamAbort.abort();
4052
- if (rl)
4053
- rl.close();
4054
- try {
4055
- monitoringHub?.stop();
4056
- } catch {}
4057
- if (autoStartedServices.length > 0) {
4058
- console.log("[pushpals] Stopping embedded runtime services...");
4059
- }
4060
- stopAutoStartedServices();
4113
+ if (stopPromise)
4114
+ return stopPromise;
4115
+ stopPromise = (async () => {
4116
+ console.log("[pushpals] Shutting down CLI session...");
4117
+ streamAbort.abort();
4118
+ const activeRl = rl;
4119
+ rl = null;
4120
+ if (activeRl)
4121
+ activeRl.close();
4122
+ try {
4123
+ monitoringHub?.stop();
4124
+ } catch {}
4125
+ if (autoStartedServices.length > 0) {
4126
+ console.log("[pushpals] Stopping embedded runtime services...");
4127
+ }
4128
+ await stopAutoStartedServicesGracefully("pushpals CLI exit");
4129
+ })();
4130
+ return stopPromise;
4061
4131
  };
4062
- process.once("SIGINT", requestStop);
4063
- process.once("SIGTERM", requestStop);
4064
- process.once("exit", requestStop);
4132
+ process.once("SIGINT", () => {
4133
+ requestStop();
4134
+ });
4135
+ process.once("SIGTERM", () => {
4136
+ requestStop();
4137
+ });
4138
+ process.once("exit", () => {
4139
+ stopAutoStartedServices();
4140
+ });
4065
4141
  if (parsed.runtimeOnly) {
4066
4142
  console.log("[pushpals] Runtime-only mode is active. Send `exit` on stdin or terminate the process to stop.");
4067
4143
  await new Promise((resolveStop) => {
@@ -4091,7 +4167,7 @@ ${line}
4091
4167
  finish();
4092
4168
  });
4093
4169
  });
4094
- requestStop();
4170
+ await requestStop();
4095
4171
  await Promise.race([streamTask, Bun.sleep(2000)]);
4096
4172
  return;
4097
4173
  }
@@ -4109,7 +4185,7 @@ ${line}
4109
4185
  continue;
4110
4186
  }
4111
4187
  if (isCliExitCommand(text)) {
4112
- requestStop();
4188
+ await requestStop();
4113
4189
  break;
4114
4190
  }
4115
4191
  if (text === "/hub") {
@@ -4149,7 +4225,7 @@ ${line}
4149
4225
  }
4150
4226
  rl.prompt();
4151
4227
  }
4152
- requestStop();
4228
+ await requestStop();
4153
4229
  await Promise.race([streamTask, Bun.sleep(2000)]);
4154
4230
  }
4155
4231
  if (import.meta.main) {
@@ -4171,6 +4247,7 @@ export {
4171
4247
  resolveCliLocalBuddyAutostart,
4172
4248
  resolveBundledRuntimeAssetSource,
4173
4249
  resolveBundledMonitoringHubRoot,
4250
+ repoLooksLikePushPalsSourceCheckout,
4174
4251
  prepareEmbeddedWorkerpalDockerImageIfNeeded,
4175
4252
  prepareCliRuntime,
4176
4253
  precheckWorkerpalDockerAvailability,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pushpalsdev/cli",
3
- "version": "1.0.22",
3
+ "version": "1.0.24",
4
4
  "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -21,6 +21,7 @@ port = 3001
21
21
  debug_http = false
22
22
  stale_claim_ttl_ms = 120000
23
23
  stale_claim_sweep_interval_ms = 5000
24
+ session_token_budget = 2000000
24
25
 
25
26
  [localbuddy]
26
27
  enabled = false
@@ -8,7 +8,7 @@
8
8
 
9
9
  [localbuddy.llm]
10
10
  backend = "openai_codex"
11
- model = "gpt-5-codex"
11
+ model = "gpt-5.4"
12
12
  codex_auth_mode = "chatgpt"
13
13
  codex_bin = "bun x --yes @openai/codex"
14
14
  codex_timeout_ms = 120000
@@ -16,7 +16,7 @@ reasoning_effort = "high"
16
16
 
17
17
  [remotebuddy.llm]
18
18
  backend = "openai_codex"
19
- model = "gpt-5-codex"
19
+ model = "gpt-5.4"
20
20
  codex_auth_mode = "chatgpt"
21
21
  codex_bin = "bun x --yes @openai/codex"
22
22
  codex_timeout_ms = 120000
@@ -42,7 +42,7 @@ retention_days = 30
42
42
 
43
43
  [workerpals.llm]
44
44
  backend = "openai_codex"
45
- model = "gpt-5-codex"
45
+ model = "gpt-5.4"
46
46
  codex_auth_mode = "chatgpt"
47
47
  codex_bin = "bun x --yes @openai/codex"
48
48
  codex_timeout_ms = 120000
@@ -57,6 +57,7 @@ from executor_base import (
57
57
  log_git_status,
58
58
  looks_local_base_url,
59
59
  parse_task_execute_payload,
60
+ prompts_root_for_runtime_assets,
60
61
  resolve_llm_config,
61
62
  setting_int,
62
63
  setting_str,
@@ -992,11 +993,7 @@ def _is_git_porcelain_status_command(cmd: str) -> bool:
992
993
 
993
994
 
994
995
  def _repo_root_for_prompt_loading() -> Path:
995
- current = Path(__file__).resolve()
996
- for parent in current.parents:
997
- if (parent / "prompts").is_dir():
998
- return parent
999
- return current.parents[5]
996
+ return prompts_root_for_runtime_assets()
1000
997
 
1001
998
 
1002
999
  def _resolve_prompt_file(relative_path: str) -> Path:
@@ -35,6 +35,7 @@ from executor_base import (
35
35
  log_git_status,
36
36
  looks_local_base_url,
37
37
  parse_task_execute_payload,
38
+ prompts_root_for_runtime_assets,
38
39
  resolve_llm_config,
39
40
  summarize_git_changes,
40
41
  to_int,
@@ -42,7 +43,7 @@ from executor_base import (
42
43
  )
43
44
 
44
45
  LOG_PREFIX = "[OpenAICodexExecutor]"
45
- DEFAULT_CODEX_MODEL = "gpt-5-codex"
46
+ DEFAULT_CODEX_MODEL = "gpt-5.4"
46
47
  _ACTIVE_CHILD: Optional[subprocess.Popen[str]] = None
47
48
  _INTERRUPTED_SIGNAL: Optional[int] = None
48
49
  log = Logger(LOG_PREFIX)
@@ -79,7 +80,19 @@ _VALID_APPROVAL_POLICIES = {"untrusted", "on-failure", "on-request", "never"}
79
80
  _VALID_SANDBOX_POLICIES = {"read-only", "workspace-write", "danger-full-access"}
80
81
  _VALID_COLORS = {"always", "never", "auto"}
81
82
  _VALID_AUTH_MODES = {"auto", "api_key", "chatgpt"}
82
- _VALID_REASONING_EFFORTS = {"low", "medium", "high"}
83
+ _VALID_REASONING_EFFORTS = {"low", "medium", "high", "xhigh"}
84
+
85
+
86
+ def _model_supports_xhigh_reasoning(model: str) -> bool:
87
+ normalized = str(model or "").strip().lower()
88
+ if not normalized:
89
+ return False
90
+ return not (
91
+ normalized == "gpt-5.4"
92
+ or normalized.startswith("gpt-5.4-")
93
+ or normalized == "codex-1p"
94
+ or normalized.startswith("codex-1p-")
95
+ )
83
96
 
84
97
 
85
98
  @dataclass(frozen=True)
@@ -188,12 +201,7 @@ def _truncate(text: str, max_chars: int = 4000) -> str:
188
201
 
189
202
 
190
203
  def _repo_root_for_prompt_loading() -> Path:
191
- current = Path(__file__).resolve()
192
- for parent in current.parents:
193
- if (parent / "prompts").is_dir():
194
- return parent
195
- # Fallback to historical layout depth if prompts/ cannot be discovered.
196
- return current.parents[5]
204
+ return prompts_root_for_runtime_assets()
197
205
 
198
206
 
199
207
  def _resolve_prompt_file(relative_path: str) -> Path:
@@ -320,14 +328,21 @@ def _resolve_communicate_timeout_seconds(config: OpenAICodexRuntimeConfig) -> Op
320
328
  return max(1, timeout_ms // 1000)
321
329
 
322
330
 
323
- def _resolve_reasoning_effort(config: OpenAICodexRuntimeConfig) -> str:
331
+ def _resolve_reasoning_effort(config: OpenAICodexRuntimeConfig, model: str = DEFAULT_CODEX_MODEL) -> str:
324
332
  raw = config.reasoning_effort
325
333
  normalized = str(raw).strip().lower()
334
+ if normalized in {"extra high", "extra-high", "extrahigh", "x-high"}:
335
+ normalized = "xhigh"
336
+ if normalized == "xhigh" and not _model_supports_xhigh_reasoning(model):
337
+ log.info(
338
+ f"Downgrading workerpals.openai_codex.reasoning_effort='xhigh' to 'high' for model {model!r}."
339
+ )
340
+ return "high"
326
341
  if normalized in _VALID_REASONING_EFFORTS:
327
342
  return normalized
328
343
  log.info(
329
344
  "Invalid workerpals.openai_codex.reasoning_effort="
330
- f"{raw!r}; using default 'high'. Allowed: low, medium, high."
345
+ f"{raw!r}; using default 'high'. Allowed: low, medium, high, xhigh."
331
346
  )
332
347
  return "high"
333
348
 
@@ -419,6 +434,88 @@ def _contains_reasoning_marker(value: str) -> bool:
419
434
  return "reasoning" in lowered or "thinking" in lowered
420
435
 
421
436
 
437
+ def _coerce_non_negative_int(value: Any) -> Optional[int]:
438
+ try:
439
+ parsed = int(value)
440
+ except Exception:
441
+ return None
442
+ if parsed < 0:
443
+ return None
444
+ return parsed
445
+
446
+
447
+ def _normalize_usage_counts(
448
+ prompt_tokens: Optional[int],
449
+ completion_tokens: Optional[int],
450
+ total_tokens: Optional[int],
451
+ ) -> Optional[Dict[str, int]]:
452
+ if prompt_tokens is None and completion_tokens is None and total_tokens is None:
453
+ return None
454
+ prompt = prompt_tokens if prompt_tokens is not None else 0
455
+ completion = completion_tokens if completion_tokens is not None else 0
456
+ total = total_tokens if total_tokens is not None else prompt + completion
457
+ if prompt_tokens is None and total_tokens is not None and completion_tokens is not None:
458
+ prompt = max(0, total - completion)
459
+ if completion_tokens is None and total_tokens is not None and prompt_tokens is not None:
460
+ completion = max(0, total - prompt)
461
+ total = max(total, prompt + completion)
462
+ if total <= 0:
463
+ return None
464
+ return {
465
+ "prompt_tokens": int(prompt),
466
+ "completion_tokens": int(completion),
467
+ "total_tokens": int(total),
468
+ }
469
+
470
+
471
+ def _extract_usage_counts(value: Any) -> Optional[Dict[str, int]]:
472
+ best: Optional[Dict[str, int]] = None
473
+ stack: List[Any] = [value]
474
+ visited = 0
475
+ max_nodes = 256
476
+
477
+ while stack and visited < max_nodes:
478
+ current = stack.pop()
479
+ visited += 1
480
+ if isinstance(current, list):
481
+ for item in reversed(current[:80]):
482
+ if isinstance(item, (dict, list)):
483
+ stack.append(item)
484
+ continue
485
+ if not isinstance(current, dict):
486
+ continue
487
+
488
+ prompt_tokens = _coerce_non_negative_int(
489
+ current.get("prompt_tokens")
490
+ or current.get("promptTokens")
491
+ or current.get("input_tokens")
492
+ or current.get("inputTokens")
493
+ )
494
+ completion_tokens = _coerce_non_negative_int(
495
+ current.get("completion_tokens")
496
+ or current.get("completionTokens")
497
+ or current.get("output_tokens")
498
+ or current.get("outputTokens")
499
+ )
500
+ total_tokens = _coerce_non_negative_int(
501
+ current.get("total_tokens") or current.get("totalTokens")
502
+ )
503
+ normalized = _normalize_usage_counts(prompt_tokens, completion_tokens, total_tokens)
504
+ if normalized is not None:
505
+ if best is None or normalized["total_tokens"] > best["total_tokens"]:
506
+ best = normalized
507
+
508
+ usage_node = current.get("usage")
509
+ if isinstance(usage_node, (dict, list)):
510
+ stack.append(usage_node)
511
+
512
+ for nested in current.values():
513
+ if isinstance(nested, (dict, list)):
514
+ stack.append(nested)
515
+
516
+ return best
517
+
518
+
422
519
  def _event_contains_reasoning(value: Any) -> bool:
423
520
  max_nodes = 256
424
521
  visited = 0
@@ -581,6 +678,9 @@ def _empty_codex_trace() -> Dict[str, Any]:
581
678
  "raw_logged": 0,
582
679
  "raw_omitted": 0,
583
680
  "reasoning_events": 0,
681
+ "prompt_tokens": 0,
682
+ "completion_tokens": 0,
683
+ "total_tokens": 0,
584
684
  }
585
685
 
586
686
 
@@ -611,6 +711,17 @@ def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, A
611
711
  return
612
712
 
613
713
  if isinstance(parsed, dict):
714
+ usage = _extract_usage_counts(parsed)
715
+ if usage is not None:
716
+ trace["prompt_tokens"] = max(
717
+ to_int(trace.get("prompt_tokens"), 0), usage["prompt_tokens"]
718
+ )
719
+ trace["completion_tokens"] = max(
720
+ to_int(trace.get("completion_tokens"), 0), usage["completion_tokens"]
721
+ )
722
+ trace["total_tokens"] = max(
723
+ to_int(trace.get("total_tokens"), 0), usage["total_tokens"]
724
+ )
614
725
  event_type = (
615
726
  str(parsed.get("type") or parsed.get("event") or parsed.get("kind") or "event")
616
727
  .strip()
@@ -677,10 +788,17 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
677
788
  if raw_omitted > 0:
678
789
  log.info(f"[codex/raw] ... {raw_omitted} additional line(s) omitted.")
679
790
  reasoning_events = to_int(trace.get("reasoning_events"), 0)
791
+ prompt_tokens = to_int(trace.get("prompt_tokens"), 0)
792
+ completion_tokens = to_int(trace.get("completion_tokens"), 0)
793
+ total_tokens = to_int(trace.get("total_tokens"), 0)
680
794
  if reasoning_events > 0:
681
795
  log.info(f"[codex] Reasoning-like event(s): {reasoning_events}")
682
796
  elif use_json and valid_json > 0:
683
797
  log.info("[codex] No reasoning-like events observed in this run.")
798
+ if total_tokens > 0:
799
+ log.info(
800
+ f"[codex] Usage observed: prompt={prompt_tokens} completion={completion_tokens} total={total_tokens}"
801
+ )
684
802
 
685
803
  if not summaries and event_type_counts:
686
804
  ranked = sorted(event_type_counts.items(), key=lambda item: item[1], reverse=True)
@@ -694,9 +812,41 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
694
812
  "summaries": summaries,
695
813
  "event_type_counts": event_type_counts,
696
814
  "reasoning_events": reasoning_events,
815
+ "prompt_tokens": prompt_tokens,
816
+ "completion_tokens": completion_tokens,
817
+ "total_tokens": total_tokens,
697
818
  }
698
819
 
699
820
 
821
+ def _estimated_usage(prompt: str, output_text: str, *, model: str) -> Dict[str, Any]:
822
+ prompt_tokens = max(0, int(len(str(prompt or "")) / 3 + 0.999999))
823
+ completion_tokens = max(0, int(len(str(output_text or "")) / 3 + 0.999999))
824
+ return {
825
+ "promptTokens": prompt_tokens,
826
+ "completionTokens": completion_tokens,
827
+ "totalTokens": prompt_tokens + completion_tokens,
828
+ "estimated": True,
829
+ "backend": "openai_codex",
830
+ "modelId": model,
831
+ }
832
+
833
+
834
+ def _usage_from_trace_or_estimate(trace: Dict[str, Any], prompt: str, output_text: str, *, model: str) -> Dict[str, Any]:
835
+ total_tokens = to_int(trace.get("total_tokens"), 0)
836
+ if total_tokens > 0:
837
+ prompt_tokens = to_int(trace.get("prompt_tokens"), 0)
838
+ completion_tokens = to_int(trace.get("completion_tokens"), 0)
839
+ return {
840
+ "promptTokens": prompt_tokens,
841
+ "completionTokens": completion_tokens,
842
+ "totalTokens": max(total_tokens, prompt_tokens + completion_tokens),
843
+ "estimated": False,
844
+ "backend": "openai_codex",
845
+ "modelId": model,
846
+ }
847
+ return _estimated_usage(prompt, output_text, model=model)
848
+
849
+
700
850
  def _log_stderr(stderr: str) -> None:
701
851
  lines = [line.strip() for line in stderr.splitlines() if line.strip()]
702
852
  if not lines:
@@ -873,7 +1023,7 @@ def _run_codex_task(
873
1023
  )
874
1024
  # JSON event output is noisy by default; prefer plain text + output-last-message.
875
1025
  use_json = runtime_config.json_output
876
- reasoning_effort = _resolve_reasoning_effort(runtime_config)
1026
+ reasoning_effort = _resolve_reasoning_effort(runtime_config, model)
877
1027
  communicate_timeout_s = _resolve_communicate_timeout_seconds(runtime_config)
878
1028
  prompt = _build_instruction(instruction, supplemental_guidance)
879
1029
  baseline_changes = summarize_git_changes(repo)
@@ -1125,6 +1275,10 @@ def _run_codex_task(
1125
1275
  stdout_trace = _finalize_codex_stdout_trace(stdout_trace_state, use_json)
1126
1276
  trace_excerpt = _format_codex_trace_excerpt(stdout_trace)
1127
1277
  _log_stderr(stderr)
1278
+ usage_output_text = "\n\n".join(
1279
+ part for part in (stdout, stderr, trace_excerpt) if str(part or "").strip()
1280
+ )
1281
+ usage = _usage_from_trace_or_estimate(stdout_trace, prompt, usage_output_text, model=model)
1128
1282
 
1129
1283
  if timed_out:
1130
1284
  detail = (
@@ -1140,6 +1294,7 @@ def _run_codex_task(
1140
1294
  "stdout": _truncate(stdout),
1141
1295
  "stderr": _truncate(f"{detail}\n{stderr}".strip()),
1142
1296
  "exitCode": 124,
1297
+ "usage": usage,
1143
1298
  }
1144
1299
 
1145
1300
  last_message = _read_text_if_exists(last_message_path)
@@ -1152,6 +1307,7 @@ def _run_codex_task(
1152
1307
  "stdout": _truncate(stdout),
1153
1308
  "stderr": _truncate(stderr),
1154
1309
  "exitCode": 128 + int(_INTERRUPTED_SIGNAL),
1310
+ "usage": usage,
1155
1311
  }
1156
1312
 
1157
1313
  if return_code is None:
@@ -1161,6 +1317,7 @@ def _run_codex_task(
1161
1317
  "stdout": _truncate(stdout),
1162
1318
  "stderr": _truncate(stderr),
1163
1319
  "exitCode": 1,
1320
+ "usage": usage,
1164
1321
  }
1165
1322
 
1166
1323
  exit_code = int(return_code)
@@ -1177,6 +1334,7 @@ def _run_codex_task(
1177
1334
  "stdout": _truncate(stdout),
1178
1335
  "stderr": _truncate(detail),
1179
1336
  "exitCode": exit_code,
1337
+ "usage": usage,
1180
1338
  }
1181
1339
 
1182
1340
  policy_signal = _detect_codex_workaround_signal(last_message)
@@ -1199,6 +1357,7 @@ def _run_codex_task(
1199
1357
  "stdout": _truncate(stdout),
1200
1358
  "stderr": _truncate(detail),
1201
1359
  "exitCode": 5,
1360
+ "usage": usage,
1202
1361
  }
1203
1362
 
1204
1363
  changed_paths = summarize_git_changes(repo)
@@ -1220,6 +1379,7 @@ def _run_codex_task(
1220
1379
  "stdout": "\n\n".join(stdout_parts),
1221
1380
  "stderr": "",
1222
1381
  "exitCode": 0,
1382
+ "usage": usage,
1223
1383
  }
1224
1384
 
1225
1385
  if not stdout_parts:
@@ -1230,6 +1390,7 @@ def _run_codex_task(
1230
1390
  "stdout": "\n\n".join(stdout_parts),
1231
1391
  "stderr": "",
1232
1392
  "exitCode": 0,
1393
+ "usage": usage,
1233
1394
  }
1234
1395
 
1235
1396