@pushpalsdev/cli 1.0.21 → 1.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -369,6 +369,8 @@ function loadPushPalsConfig(options = {}) {
369
369
  const debugHttp = parseBoolEnv("PUSHPALS_DEBUG_HTTP") ?? asBoolean(serverNode.debug_http, false);
370
370
  const staleClaimTtlMs = Math.max(5000, asInt(parseIntEnv("PUSHPALS_STALE_CLAIM_TTL_MS") ?? serverNode.stale_claim_ttl_ms, 120000));
371
371
  const staleClaimSweepIntervalMs = Math.max(1000, asInt(parseIntEnv("PUSHPALS_STALE_CLAIM_SWEEP_INTERVAL_MS") ?? serverNode.stale_claim_sweep_interval_ms, 5000));
372
+ const sessionTokenBudget = Math.max(0, asInt(parseIntEnv("PUSHPALS_SESSION_TOKEN_BUDGET") ?? serverNode.session_token_budget, 1e6));
373
+ const sessionTokenBudgetAction = "pause";
372
374
  const globalStatusHeartbeatMs = parseIntEnv("PUSHPALS_STATUS_HEARTBEAT_MS");
373
375
  const localNode = getObject(merged, "localbuddy");
374
376
  const localEnabled = parseBoolEnv("LOCALBUDDY_ENABLED") ?? asBoolean(localNode.enabled, false);
@@ -619,7 +621,9 @@ function loadPushPalsConfig(options = {}) {
619
621
  port: serverPort,
620
622
  debugHttp,
621
623
  staleClaimTtlMs,
622
- staleClaimSweepIntervalMs
624
+ staleClaimSweepIntervalMs,
625
+ sessionTokenBudget,
626
+ sessionTokenBudgetAction
623
627
  },
624
628
  localbuddy: {
625
629
  enabled: localEnabled,
@@ -1684,7 +1688,7 @@ async function ensureBundledMonitoringHubRoot() {
1684
1688
  return resolveBundledMonitoringHubRoot();
1685
1689
  }
1686
1690
  function repoLooksLikePushPalsSourceCheckout(repoRoot) {
1687
- return existsSync4(join2(repoRoot, "configs", "default.toml")) || existsSync4(join2(repoRoot, "config", "default.toml"));
1691
+ return existsSync4(join2(repoRoot, "configs", "default.toml"));
1688
1692
  }
1689
1693
  function parseSemverFromPackageVersion(value) {
1690
1694
  const raw = String(value ?? "").trim();
@@ -4171,6 +4175,7 @@ export {
4171
4175
  resolveCliLocalBuddyAutostart,
4172
4176
  resolveBundledRuntimeAssetSource,
4173
4177
  resolveBundledMonitoringHubRoot,
4178
+ repoLooksLikePushPalsSourceCheckout,
4174
4179
  prepareEmbeddedWorkerpalDockerImageIfNeeded,
4175
4180
  prepareCliRuntime,
4176
4181
  precheckWorkerpalDockerAvailability,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pushpalsdev/cli",
3
- "version": "1.0.21",
3
+ "version": "1.0.23",
4
4
  "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -21,6 +21,7 @@ port = 3001
21
21
  debug_http = false
22
22
  stale_claim_ttl_ms = 120000
23
23
  stale_claim_sweep_interval_ms = 5000
24
+ session_token_budget = 2000000
24
25
 
25
26
  [localbuddy]
26
27
  enabled = false
@@ -197,7 +198,7 @@ session_id = "workerpals-dev"
197
198
  [workerpals.openai_codex]
198
199
  timeout_ms = 7200000
199
200
  progress_log_interval_s = 30
200
- reasoning_effort = "high"
201
+ reasoning_effort = "xhigh"
201
202
  approval_policy = "never"
202
203
  sandbox = "workspace-write"
203
204
  color = "never"
@@ -8,19 +8,19 @@
8
8
 
9
9
  [localbuddy.llm]
10
10
  backend = "openai_codex"
11
- model = "gpt-5-codex"
11
+ model = "gpt-5.4"
12
12
  codex_auth_mode = "chatgpt"
13
13
  codex_bin = "bun x --yes @openai/codex"
14
14
  codex_timeout_ms = 120000
15
- reasoning_effort = "high"
15
+ reasoning_effort = "xhigh"
16
16
 
17
17
  [remotebuddy.llm]
18
18
  backend = "openai_codex"
19
- model = "gpt-5-codex"
19
+ model = "gpt-5.4"
20
20
  codex_auth_mode = "chatgpt"
21
21
  codex_bin = "bun x --yes @openai/codex"
22
22
  codex_timeout_ms = 120000
23
- reasoning_effort = "high"
23
+ reasoning_effort = "xhigh"
24
24
 
25
25
  [remotebuddy]
26
26
  max_workerpals = 10
@@ -42,11 +42,11 @@ retention_days = 30
42
42
 
43
43
  [workerpals.llm]
44
44
  backend = "openai_codex"
45
- model = "gpt-5-codex"
45
+ model = "gpt-5.4"
46
46
  codex_auth_mode = "chatgpt"
47
47
  codex_bin = "bun x --yes @openai/codex"
48
48
  codex_timeout_ms = 120000
49
- reasoning_effort = "high"
49
+ reasoning_effort = "xhigh"
50
50
 
51
51
  [workerpals]
52
52
  executor = "openai_codex"
@@ -92,7 +92,7 @@ bin = "bun x --yes @openai/codex"
92
92
  timeout_ms = 7200000
93
93
  progress_log_interval_s = 30
94
94
  # timeout_s = 120 # optional; if set, overrides timeout_ms
95
- reasoning_effort = "high"
95
+ reasoning_effort = "xhigh"
96
96
  approval_policy = "never"
97
97
  sandbox = "workspace-write"
98
98
  color = "never"
@@ -57,6 +57,7 @@ from executor_base import (
57
57
  log_git_status,
58
58
  looks_local_base_url,
59
59
  parse_task_execute_payload,
60
+ prompts_root_for_runtime_assets,
60
61
  resolve_llm_config,
61
62
  setting_int,
62
63
  setting_str,
@@ -992,11 +993,7 @@ def _is_git_porcelain_status_command(cmd: str) -> bool:
992
993
 
993
994
 
994
995
  def _repo_root_for_prompt_loading() -> Path:
995
- current = Path(__file__).resolve()
996
- for parent in current.parents:
997
- if (parent / "prompts").is_dir():
998
- return parent
999
- return current.parents[5]
996
+ return prompts_root_for_runtime_assets()
1000
997
 
1001
998
 
1002
999
  def _resolve_prompt_file(relative_path: str) -> Path:
@@ -35,6 +35,7 @@ from executor_base import (
35
35
  log_git_status,
36
36
  looks_local_base_url,
37
37
  parse_task_execute_payload,
38
+ prompts_root_for_runtime_assets,
38
39
  resolve_llm_config,
39
40
  summarize_git_changes,
40
41
  to_int,
@@ -42,7 +43,7 @@ from executor_base import (
42
43
  )
43
44
 
44
45
  LOG_PREFIX = "[OpenAICodexExecutor]"
45
- DEFAULT_CODEX_MODEL = "gpt-5-codex"
46
+ DEFAULT_CODEX_MODEL = "gpt-5.4"
46
47
  _ACTIVE_CHILD: Optional[subprocess.Popen[str]] = None
47
48
  _INTERRUPTED_SIGNAL: Optional[int] = None
48
49
  log = Logger(LOG_PREFIX)
@@ -79,7 +80,7 @@ _VALID_APPROVAL_POLICIES = {"untrusted", "on-failure", "on-request", "never"}
79
80
  _VALID_SANDBOX_POLICIES = {"read-only", "workspace-write", "danger-full-access"}
80
81
  _VALID_COLORS = {"always", "never", "auto"}
81
82
  _VALID_AUTH_MODES = {"auto", "api_key", "chatgpt"}
82
- _VALID_REASONING_EFFORTS = {"low", "medium", "high"}
83
+ _VALID_REASONING_EFFORTS = {"low", "medium", "high", "xhigh"}
83
84
 
84
85
 
85
86
  @dataclass(frozen=True)
@@ -151,7 +152,7 @@ class OpenAICodexRuntimeConfig:
151
152
  reasoning_effort=cfg.get_str(
152
153
  env_names=("WORKERPALS_LLM_REASONING_EFFORT", "WORKERPALS_OPENAI_CODEX_REASONING_EFFORT"),
153
154
  config_paths=("workerpals.llm.reasoning_effort", "workerpals.openai_codex.reasoning_effort"),
154
- default="high",
155
+ default="xhigh",
155
156
  ),
156
157
  approval_policy=cfg.get_str(
157
158
  env_names=("WORKERPALS_OPENAI_CODEX_APPROVAL_POLICY",),
@@ -188,12 +189,7 @@ def _truncate(text: str, max_chars: int = 4000) -> str:
188
189
 
189
190
 
190
191
  def _repo_root_for_prompt_loading() -> Path:
191
- current = Path(__file__).resolve()
192
- for parent in current.parents:
193
- if (parent / "prompts").is_dir():
194
- return parent
195
- # Fallback to historical layout depth if prompts/ cannot be discovered.
196
- return current.parents[5]
192
+ return prompts_root_for_runtime_assets()
197
193
 
198
194
 
199
195
  def _resolve_prompt_file(relative_path: str) -> Path:
@@ -323,13 +319,15 @@ def _resolve_communicate_timeout_seconds(config: OpenAICodexRuntimeConfig) -> Op
323
319
  def _resolve_reasoning_effort(config: OpenAICodexRuntimeConfig) -> str:
324
320
  raw = config.reasoning_effort
325
321
  normalized = str(raw).strip().lower()
322
+ if normalized in {"extra high", "extra-high", "extrahigh", "x-high"}:
323
+ normalized = "xhigh"
326
324
  if normalized in _VALID_REASONING_EFFORTS:
327
325
  return normalized
328
326
  log.info(
329
327
  "Invalid workerpals.openai_codex.reasoning_effort="
330
- f"{raw!r}; using default 'high'. Allowed: low, medium, high."
328
+ f"{raw!r}; using default 'xhigh'. Allowed: low, medium, high, xhigh."
331
329
  )
332
- return "high"
330
+ return "xhigh"
333
331
 
334
332
 
335
333
  def _resolve_progress_log_interval_seconds(config: OpenAICodexRuntimeConfig) -> int:
@@ -419,6 +417,88 @@ def _contains_reasoning_marker(value: str) -> bool:
419
417
  return "reasoning" in lowered or "thinking" in lowered
420
418
 
421
419
 
420
+ def _coerce_non_negative_int(value: Any) -> Optional[int]:
421
+ try:
422
+ parsed = int(value)
423
+ except Exception:
424
+ return None
425
+ if parsed < 0:
426
+ return None
427
+ return parsed
428
+
429
+
430
+ def _normalize_usage_counts(
431
+ prompt_tokens: Optional[int],
432
+ completion_tokens: Optional[int],
433
+ total_tokens: Optional[int],
434
+ ) -> Optional[Dict[str, int]]:
435
+ if prompt_tokens is None and completion_tokens is None and total_tokens is None:
436
+ return None
437
+ prompt = prompt_tokens if prompt_tokens is not None else 0
438
+ completion = completion_tokens if completion_tokens is not None else 0
439
+ total = total_tokens if total_tokens is not None else prompt + completion
440
+ if prompt_tokens is None and total_tokens is not None and completion_tokens is not None:
441
+ prompt = max(0, total - completion)
442
+ if completion_tokens is None and total_tokens is not None and prompt_tokens is not None:
443
+ completion = max(0, total - prompt)
444
+ total = max(total, prompt + completion)
445
+ if total <= 0:
446
+ return None
447
+ return {
448
+ "prompt_tokens": int(prompt),
449
+ "completion_tokens": int(completion),
450
+ "total_tokens": int(total),
451
+ }
452
+
453
+
454
+ def _extract_usage_counts(value: Any) -> Optional[Dict[str, int]]:
455
+ best: Optional[Dict[str, int]] = None
456
+ stack: List[Any] = [value]
457
+ visited = 0
458
+ max_nodes = 256
459
+
460
+ while stack and visited < max_nodes:
461
+ current = stack.pop()
462
+ visited += 1
463
+ if isinstance(current, list):
464
+ for item in reversed(current[:80]):
465
+ if isinstance(item, (dict, list)):
466
+ stack.append(item)
467
+ continue
468
+ if not isinstance(current, dict):
469
+ continue
470
+
471
+ prompt_tokens = _coerce_non_negative_int(
472
+ current.get("prompt_tokens")
473
+ or current.get("promptTokens")
474
+ or current.get("input_tokens")
475
+ or current.get("inputTokens")
476
+ )
477
+ completion_tokens = _coerce_non_negative_int(
478
+ current.get("completion_tokens")
479
+ or current.get("completionTokens")
480
+ or current.get("output_tokens")
481
+ or current.get("outputTokens")
482
+ )
483
+ total_tokens = _coerce_non_negative_int(
484
+ current.get("total_tokens") or current.get("totalTokens")
485
+ )
486
+ normalized = _normalize_usage_counts(prompt_tokens, completion_tokens, total_tokens)
487
+ if normalized is not None:
488
+ if best is None or normalized["total_tokens"] > best["total_tokens"]:
489
+ best = normalized
490
+
491
+ usage_node = current.get("usage")
492
+ if isinstance(usage_node, (dict, list)):
493
+ stack.append(usage_node)
494
+
495
+ for nested in current.values():
496
+ if isinstance(nested, (dict, list)):
497
+ stack.append(nested)
498
+
499
+ return best
500
+
501
+
422
502
  def _event_contains_reasoning(value: Any) -> bool:
423
503
  max_nodes = 256
424
504
  visited = 0
@@ -581,6 +661,9 @@ def _empty_codex_trace() -> Dict[str, Any]:
581
661
  "raw_logged": 0,
582
662
  "raw_omitted": 0,
583
663
  "reasoning_events": 0,
664
+ "prompt_tokens": 0,
665
+ "completion_tokens": 0,
666
+ "total_tokens": 0,
584
667
  }
585
668
 
586
669
 
@@ -611,6 +694,17 @@ def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, A
611
694
  return
612
695
 
613
696
  if isinstance(parsed, dict):
697
+ usage = _extract_usage_counts(parsed)
698
+ if usage is not None:
699
+ trace["prompt_tokens"] = max(
700
+ to_int(trace.get("prompt_tokens"), 0), usage["prompt_tokens"]
701
+ )
702
+ trace["completion_tokens"] = max(
703
+ to_int(trace.get("completion_tokens"), 0), usage["completion_tokens"]
704
+ )
705
+ trace["total_tokens"] = max(
706
+ to_int(trace.get("total_tokens"), 0), usage["total_tokens"]
707
+ )
614
708
  event_type = (
615
709
  str(parsed.get("type") or parsed.get("event") or parsed.get("kind") or "event")
616
710
  .strip()
@@ -677,10 +771,17 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
677
771
  if raw_omitted > 0:
678
772
  log.info(f"[codex/raw] ... {raw_omitted} additional line(s) omitted.")
679
773
  reasoning_events = to_int(trace.get("reasoning_events"), 0)
774
+ prompt_tokens = to_int(trace.get("prompt_tokens"), 0)
775
+ completion_tokens = to_int(trace.get("completion_tokens"), 0)
776
+ total_tokens = to_int(trace.get("total_tokens"), 0)
680
777
  if reasoning_events > 0:
681
778
  log.info(f"[codex] Reasoning-like event(s): {reasoning_events}")
682
779
  elif use_json and valid_json > 0:
683
780
  log.info("[codex] No reasoning-like events observed in this run.")
781
+ if total_tokens > 0:
782
+ log.info(
783
+ f"[codex] Usage observed: prompt={prompt_tokens} completion={completion_tokens} total={total_tokens}"
784
+ )
684
785
 
685
786
  if not summaries and event_type_counts:
686
787
  ranked = sorted(event_type_counts.items(), key=lambda item: item[1], reverse=True)
@@ -694,9 +795,41 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
694
795
  "summaries": summaries,
695
796
  "event_type_counts": event_type_counts,
696
797
  "reasoning_events": reasoning_events,
798
+ "prompt_tokens": prompt_tokens,
799
+ "completion_tokens": completion_tokens,
800
+ "total_tokens": total_tokens,
801
+ }
802
+
803
+
804
+ def _estimated_usage(prompt: str, output_text: str, *, model: str) -> Dict[str, Any]:
805
+ prompt_tokens = max(0, int(len(str(prompt or "")) / 3 + 0.999999))
806
+ completion_tokens = max(0, int(len(str(output_text or "")) / 3 + 0.999999))
807
+ return {
808
+ "promptTokens": prompt_tokens,
809
+ "completionTokens": completion_tokens,
810
+ "totalTokens": prompt_tokens + completion_tokens,
811
+ "estimated": True,
812
+ "backend": "openai_codex",
813
+ "modelId": model,
697
814
  }
698
815
 
699
816
 
817
+ def _usage_from_trace_or_estimate(trace: Dict[str, Any], prompt: str, output_text: str, *, model: str) -> Dict[str, Any]:
818
+ total_tokens = to_int(trace.get("total_tokens"), 0)
819
+ if total_tokens > 0:
820
+ prompt_tokens = to_int(trace.get("prompt_tokens"), 0)
821
+ completion_tokens = to_int(trace.get("completion_tokens"), 0)
822
+ return {
823
+ "promptTokens": prompt_tokens,
824
+ "completionTokens": completion_tokens,
825
+ "totalTokens": max(total_tokens, prompt_tokens + completion_tokens),
826
+ "estimated": False,
827
+ "backend": "openai_codex",
828
+ "modelId": model,
829
+ }
830
+ return _estimated_usage(prompt, output_text, model=model)
831
+
832
+
700
833
  def _log_stderr(stderr: str) -> None:
701
834
  lines = [line.strip() for line in stderr.splitlines() if line.strip()]
702
835
  if not lines:
@@ -1125,6 +1258,10 @@ def _run_codex_task(
1125
1258
  stdout_trace = _finalize_codex_stdout_trace(stdout_trace_state, use_json)
1126
1259
  trace_excerpt = _format_codex_trace_excerpt(stdout_trace)
1127
1260
  _log_stderr(stderr)
1261
+ usage_output_text = "\n\n".join(
1262
+ part for part in (stdout, stderr, trace_excerpt) if str(part or "").strip()
1263
+ )
1264
+ usage = _usage_from_trace_or_estimate(stdout_trace, prompt, usage_output_text, model=model)
1128
1265
 
1129
1266
  if timed_out:
1130
1267
  detail = (
@@ -1140,6 +1277,7 @@ def _run_codex_task(
1140
1277
  "stdout": _truncate(stdout),
1141
1278
  "stderr": _truncate(f"{detail}\n{stderr}".strip()),
1142
1279
  "exitCode": 124,
1280
+ "usage": usage,
1143
1281
  }
1144
1282
 
1145
1283
  last_message = _read_text_if_exists(last_message_path)
@@ -1152,6 +1290,7 @@ def _run_codex_task(
1152
1290
  "stdout": _truncate(stdout),
1153
1291
  "stderr": _truncate(stderr),
1154
1292
  "exitCode": 128 + int(_INTERRUPTED_SIGNAL),
1293
+ "usage": usage,
1155
1294
  }
1156
1295
 
1157
1296
  if return_code is None:
@@ -1161,6 +1300,7 @@ def _run_codex_task(
1161
1300
  "stdout": _truncate(stdout),
1162
1301
  "stderr": _truncate(stderr),
1163
1302
  "exitCode": 1,
1303
+ "usage": usage,
1164
1304
  }
1165
1305
 
1166
1306
  exit_code = int(return_code)
@@ -1177,6 +1317,7 @@ def _run_codex_task(
1177
1317
  "stdout": _truncate(stdout),
1178
1318
  "stderr": _truncate(detail),
1179
1319
  "exitCode": exit_code,
1320
+ "usage": usage,
1180
1321
  }
1181
1322
 
1182
1323
  policy_signal = _detect_codex_workaround_signal(last_message)
@@ -1199,6 +1340,7 @@ def _run_codex_task(
1199
1340
  "stdout": _truncate(stdout),
1200
1341
  "stderr": _truncate(detail),
1201
1342
  "exitCode": 5,
1343
+ "usage": usage,
1202
1344
  }
1203
1345
 
1204
1346
  changed_paths = summarize_git_changes(repo)
@@ -1220,6 +1362,7 @@ def _run_codex_task(
1220
1362
  "stdout": "\n\n".join(stdout_parts),
1221
1363
  "stderr": "",
1222
1364
  "exitCode": 0,
1365
+ "usage": usage,
1223
1366
  }
1224
1367
 
1225
1368
  if not stdout_parts:
@@ -1230,6 +1373,7 @@ def _run_codex_task(
1230
1373
  "stdout": "\n\n".join(stdout_parts),
1231
1374
  "stderr": "",
1232
1375
  "exitCode": 0,
1376
+ "usage": usage,
1233
1377
  }
1234
1378
 
1235
1379
 
@@ -1,5 +1,7 @@
1
+ import os
1
2
  import sys
2
3
  import unittest
4
+ import tempfile
3
5
  from pathlib import Path
4
6
 
5
7
  _HERE = Path(__file__).resolve().parent
@@ -8,13 +10,16 @@ for path in (_HERE, _SHARED):
8
10
  if str(path) not in sys.path:
9
11
  sys.path.insert(0, str(path))
10
12
 
11
- from executor_base import SettingsResolver
13
+ from executor_base import SettingsResolver, config_dir_for_runtime_config, runtime_config
12
14
  from openai_codex_executor import (
13
15
  OpenAICodexRuntimeConfig,
16
+ _resolve_reasoning_effort,
14
17
  _build_instruction,
15
18
  _detect_codex_workaround_signal,
19
+ _extract_usage_counts,
16
20
  _load_prompt_template,
17
21
  _repo_root_for_prompt_loading,
22
+ _usage_from_trace_or_estimate,
18
23
  )
19
24
 
20
25
 
@@ -55,8 +60,60 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
55
60
  self.assertEqual(cfg.approval_policy, "never")
56
61
  self.assertEqual(cfg.sandbox, "workspace-write")
57
62
  self.assertEqual(cfg.color, "never")
63
+ self.assertEqual(cfg.reasoning_effort, "xhigh")
58
64
  self.assertFalse(cfg.json_output)
59
65
 
66
+ def test_reasoning_effort_accepts_extra_high_alias(self) -> None:
67
+ cfg = OpenAICodexRuntimeConfig.from_sources(
68
+ SettingsResolver(
69
+ env={"WORKERPALS_OPENAI_CODEX_REASONING_EFFORT": "extra high"},
70
+ config_loader=lambda: {},
71
+ ),
72
+ )
73
+ self.assertEqual(_resolve_reasoning_effort(cfg), "xhigh")
74
+
75
+ def test_runtime_config_prefers_explicit_config_dir_override(self) -> None:
76
+ import executor_base
77
+
78
+ with tempfile.TemporaryDirectory(prefix="pushpals-openai-codex-config-") as root:
79
+ repo_root = Path(root) / "repo"
80
+ runtime_config_dir = Path(root) / "runtime" / "configs"
81
+ repo_config_dir = repo_root / "configs"
82
+ runtime_config_dir.mkdir(parents=True, exist_ok=True)
83
+ repo_config_dir.mkdir(parents=True, exist_ok=True)
84
+
85
+ (runtime_config_dir / "default.toml").write_text(
86
+ 'profile = "dev"\n[workerpals.openai_codex]\njson = true\n',
87
+ encoding="utf-8",
88
+ )
89
+ (repo_config_dir / "default.toml").write_text(
90
+ 'profile = "dev"\n[workerpals.openai_codex]\njson = false\n',
91
+ encoding="utf-8",
92
+ )
93
+
94
+ previous_env = {
95
+ "PUSHPALS_REPO_PATH": os.environ.get("PUSHPALS_REPO_PATH"),
96
+ "PUSHPALS_CONFIG_DIR_OVERRIDE": os.environ.get("PUSHPALS_CONFIG_DIR_OVERRIDE"),
97
+ "PUSHPALS_PROFILE": os.environ.get("PUSHPALS_PROFILE"),
98
+ }
99
+ previous_cache = executor_base._CONFIG_CACHE
100
+ try:
101
+ os.environ["PUSHPALS_REPO_PATH"] = str(repo_root)
102
+ os.environ["PUSHPALS_CONFIG_DIR_OVERRIDE"] = str(runtime_config_dir)
103
+ os.environ["PUSHPALS_PROFILE"] = "dev"
104
+ executor_base._CONFIG_CACHE = None
105
+
106
+ self.assertEqual(config_dir_for_runtime_config(), runtime_config_dir)
107
+ cfg = runtime_config()
108
+ self.assertTrue(cfg["workerpals"]["openai_codex"]["json"])
109
+ finally:
110
+ executor_base._CONFIG_CACHE = previous_cache
111
+ for key, value in previous_env.items():
112
+ if value is None:
113
+ os.environ.pop(key, None)
114
+ else:
115
+ os.environ[key] = value
116
+
60
117
  def test_build_instruction_includes_codex_runtime_invariants(self) -> None:
61
118
  prompt = _build_instruction("Add two tests for localbuddy", [])
62
119
  self.assertIn("Codex CLI is required infrastructure", prompt)
@@ -105,6 +162,32 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
105
162
  template = _load_prompt_template("workerpals/openai_codex_task_execute_system_prompt.md")
106
163
  self.assertIn("Codex CLI is required infrastructure", template)
107
164
 
165
+ def test_extracts_usage_counts_from_nested_json_event(self) -> None:
166
+ usage = _extract_usage_counts(
167
+ {
168
+ "type": "response.completed",
169
+ "response": {
170
+ "usage": {
171
+ "input_tokens": 120,
172
+ "output_tokens": 30,
173
+ "total_tokens": 150,
174
+ }
175
+ },
176
+ }
177
+ )
178
+ self.assertEqual(
179
+ usage,
180
+ {"prompt_tokens": 120, "completion_tokens": 30, "total_tokens": 150},
181
+ )
182
+
183
+ def test_usage_falls_back_to_estimate_when_trace_has_no_usage(self) -> None:
184
+ usage = _usage_from_trace_or_estimate({}, "abc" * 30, "done", model="gpt-5.4")
185
+ self.assertTrue(usage["estimated"])
186
+ self.assertEqual(usage["backend"], "openai_codex")
187
+ self.assertEqual(usage["modelId"], "gpt-5.4")
188
+ self.assertGreater(usage["promptTokens"], 0)
189
+ self.assertGreater(usage["totalTokens"], usage["completionTokens"])
190
+
108
191
 
109
192
  if __name__ == "__main__":
110
193
  unittest.main()
@@ -28,7 +28,7 @@ from executor_base import (
28
28
  log_git_status,
29
29
  looks_local_base_url,
30
30
  parse_task_execute_payload,
31
- repo_root_for_runtime_config,
31
+ prompts_root_for_runtime_assets,
32
32
  resolve_llm_config,
33
33
  setting_int,
34
34
  setting_str,
@@ -78,7 +78,7 @@ def _session_hint_headers(session_user: str) -> Dict[str, str]:
78
78
 
79
79
 
80
80
  def _repo_root_for_prompt_loading() -> Path:
81
- return repo_root_for_runtime_config()
81
+ return prompts_root_for_runtime_assets()
82
82
 
83
83
 
84
84
  def _resolve_prompt_file(relative_path: str) -> Path:
@@ -0,0 +1,57 @@
1
+ import os
2
+ import sys
3
+ import tempfile
4
+ import unittest
5
+ from pathlib import Path
6
+
7
+ _HERE = Path(__file__).resolve().parent
8
+ _SHARED = _HERE.parent / "shared"
9
+ for path in (_HERE, _SHARED):
10
+ if str(path) not in sys.path:
11
+ sys.path.insert(0, str(path))
12
+
13
+ from openhands_executor import _PROMPT_TEMPLATE_CACHE, _load_prompt_template, _resolve_prompt_file
14
+
15
+
16
+ class OpenHandsRuntimePathTests(unittest.TestCase):
17
+ def test_prompt_resolution_prefers_explicit_prompt_root_override(self) -> None:
18
+ with tempfile.TemporaryDirectory(prefix="pushpals-openhands-prompts-") as root:
19
+ repo_root = Path(root) / "repo"
20
+ runtime_root = Path(root) / "runtime"
21
+ repo_prompt = repo_root / "prompts" / "workerpals" / "openhands_strict_tool_use_message.md"
22
+ runtime_prompt = (
23
+ runtime_root / "prompts" / "workerpals" / "openhands_strict_tool_use_message.md"
24
+ )
25
+ repo_prompt.parent.mkdir(parents=True, exist_ok=True)
26
+ runtime_prompt.parent.mkdir(parents=True, exist_ok=True)
27
+ repo_prompt.write_text("repo prompt", encoding="utf-8")
28
+ runtime_prompt.write_text("runtime prompt", encoding="utf-8")
29
+
30
+ previous_env = {
31
+ "PUSHPALS_REPO_PATH": os.environ.get("PUSHPALS_REPO_PATH"),
32
+ "PUSHPALS_PROMPTS_ROOT_OVERRIDE": os.environ.get("PUSHPALS_PROMPTS_ROOT_OVERRIDE"),
33
+ }
34
+ previous_cache = dict(_PROMPT_TEMPLATE_CACHE)
35
+ try:
36
+ os.environ["PUSHPALS_REPO_PATH"] = str(repo_root)
37
+ os.environ["PUSHPALS_PROMPTS_ROOT_OVERRIDE"] = str(runtime_root)
38
+ _PROMPT_TEMPLATE_CACHE.clear()
39
+
40
+ resolved = _resolve_prompt_file("workerpals/openhands_strict_tool_use_message.md")
41
+ self.assertEqual(resolved, runtime_prompt)
42
+ self.assertEqual(
43
+ _load_prompt_template("workerpals/openhands_strict_tool_use_message.md"),
44
+ "runtime prompt",
45
+ )
46
+ finally:
47
+ _PROMPT_TEMPLATE_CACHE.clear()
48
+ _PROMPT_TEMPLATE_CACHE.update(previous_cache)
49
+ for key, value in previous_env.items():
50
+ if value is None:
51
+ os.environ.pop(key, None)
52
+ else:
53
+ os.environ[key] = value
54
+
55
+
56
+ if __name__ == "__main__":
57
+ unittest.main()
@@ -8,7 +8,7 @@
8
8
 
9
9
  import { existsSync } from "fs";
10
10
  import { resolve } from "path";
11
- import type { JobResult } from "../common/types.js";
11
+ import type { JobResult, JobTokenUsage } from "../common/types.js";
12
12
  import type { WorkerpalsRuntimeConfig } from "../common/executor_backend.js";
13
13
  import {
14
14
  truncate,
@@ -24,6 +24,83 @@ const OPENHANDS_SCRIPT_PATH = resolve(import.meta.dir, "openhands", "openhands_e
24
24
 
25
25
  // ---- OpenHands-specific helpers ----------------------------------------------
26
26
 
27
+ function estimateTokensFromText(text: string): number {
28
+ return Math.max(0, Math.ceil(String(text ?? "").length / 3));
29
+ }
30
+
31
+ function estimateJobTokenUsage(
32
+ runtimeConfig: WorkerpalsRuntimeConfig,
33
+ params: Record<string, unknown>,
34
+ summary: string,
35
+ stdout: string,
36
+ stderr: string,
37
+ ): JobTokenUsage {
38
+ const promptSource = (() => {
39
+ try {
40
+ return JSON.stringify(params);
41
+ } catch {
42
+ return String(params?.instruction ?? params?.prompt ?? "");
43
+ }
44
+ })();
45
+ const completionSource = [summary, stdout, stderr].filter(Boolean).join("\n\n");
46
+ const promptTokens = estimateTokensFromText(promptSource);
47
+ const completionTokens = estimateTokensFromText(completionSource);
48
+ return {
49
+ promptTokens,
50
+ completionTokens,
51
+ totalTokens: promptTokens + completionTokens,
52
+ estimated: true,
53
+ backend: "openhands",
54
+ modelId: runtimeConfig.workerpals.llm.model.trim(),
55
+ };
56
+ }
57
+
58
+ function coerceJobTokenUsage(
59
+ value: unknown,
60
+ fallback: JobTokenUsage,
61
+ ): JobTokenUsage {
62
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
63
+ return fallback;
64
+ }
65
+ const raw = value as Record<string, unknown>;
66
+ const promptTokens = Number(raw.promptTokens ?? raw.prompt_tokens);
67
+ const completionTokens = Number(raw.completionTokens ?? raw.completion_tokens);
68
+ const totalTokens = Number(raw.totalTokens ?? raw.total_tokens);
69
+ const hasPrompt = Number.isFinite(promptTokens) && promptTokens >= 0;
70
+ const hasCompletion = Number.isFinite(completionTokens) && completionTokens >= 0;
71
+ const hasTotal = Number.isFinite(totalTokens) && totalTokens >= 0;
72
+ if (!hasPrompt && !hasCompletion && !hasTotal) {
73
+ return fallback;
74
+ }
75
+ const normalizedPrompt = hasPrompt
76
+ ? Math.round(promptTokens)
77
+ : hasTotal
78
+ ? Math.max(0, Math.round(totalTokens) - fallback.completionTokens)
79
+ : fallback.promptTokens;
80
+ const normalizedCompletion = hasCompletion
81
+ ? Math.round(completionTokens)
82
+ : hasTotal
83
+ ? Math.max(0, Math.round(totalTokens) - normalizedPrompt)
84
+ : fallback.completionTokens;
85
+ const normalizedTotal = hasTotal
86
+ ? Math.round(totalTokens)
87
+ : normalizedPrompt + normalizedCompletion;
88
+ return {
89
+ promptTokens: normalizedPrompt,
90
+ completionTokens: normalizedCompletion,
91
+ totalTokens: normalizedTotal,
92
+ estimated: typeof raw.estimated === "boolean" ? raw.estimated : false,
93
+ backend:
94
+ typeof raw.backend === "string" && raw.backend.trim().length > 0
95
+ ? raw.backend.trim()
96
+ : fallback.backend,
97
+ modelId:
98
+ typeof raw.modelId === "string" && raw.modelId.trim().length > 0
99
+ ? raw.modelId.trim()
100
+ : fallback.modelId,
101
+ };
102
+ }
103
+
27
104
  function classifyShellCommand(cmd: string): "explore" | "progress" {
28
105
  const trimmed = cmd.trim().toLowerCase();
29
106
  if (!trimmed) return "explore";
@@ -450,6 +527,7 @@ export async function executeWithOpenHands(
450
527
 
451
528
  const parsed = parseStructuredResult(stdout, outputPolicy.executorResultPrefix);
452
529
  const filteredStdout = filterResultLines(stdout, outputPolicy.executorResultPrefix);
530
+ const fallbackUsage = estimateJobTokenUsage(runtimeConfig, params, "", filteredStdout, stderr);
453
531
 
454
532
  if (!parsed) {
455
533
  if (timedOut) {
@@ -464,6 +542,7 @@ export async function executeWithOpenHands(
464
542
  stdout: truncate(filteredStdout, outputPolicy),
465
543
  stderr: truncate(stderr, outputPolicy),
466
544
  exitCode: exitCode === 0 ? 124 : exitCode,
545
+ usage: fallbackUsage,
467
546
  };
468
547
  }
469
548
  return {
@@ -472,6 +551,7 @@ export async function executeWithOpenHands(
472
551
  stdout: truncate(filteredStdout, outputPolicy),
473
552
  stderr: truncate(stderr, outputPolicy),
474
553
  exitCode,
554
+ usage: fallbackUsage,
475
555
  };
476
556
  }
477
557
 
@@ -483,6 +563,10 @@ export async function executeWithOpenHands(
483
563
  : `${kind} failed via OpenHands (exit ${exitCode})`;
484
564
  const parsedStdout = typeof parsed.stdout === "string" ? parsed.stdout : filteredStdout;
485
565
  const parsedStderr = typeof parsed.stderr === "string" ? parsed.stderr : stderr;
566
+ const usage = coerceJobTokenUsage(
567
+ parsed.usage,
568
+ estimateJobTokenUsage(runtimeConfig, params, summary, parsedStdout, parsedStderr),
569
+ );
486
570
  const parsedExitCode =
487
571
  typeof parsed.exitCode === "number" && Number.isFinite(parsed.exitCode)
488
572
  ? parsed.exitCode
@@ -502,6 +586,7 @@ export async function executeWithOpenHands(
502
586
  stdout: truncate(filteredStdout || String(parsedStdout ?? ""), outputPolicy),
503
587
  stderr: truncate(`Clarification needed: ${clarificationQuestion}`, outputPolicy),
504
588
  exitCode: 0,
589
+ usage,
505
590
  };
506
591
  }
507
592
  }
@@ -512,12 +597,20 @@ export async function executeWithOpenHands(
512
597
  stdout: truncate(parsedStdout ?? "", outputPolicy),
513
598
  stderr: truncate(parsedStderr ?? "", outputPolicy),
514
599
  exitCode: parsedExitCode,
600
+ usage,
515
601
  };
516
602
  } catch (err) {
517
603
  return {
518
604
  ok: false,
519
605
  summary: `OpenHands wrapper execution error for ${kind}: ${String(err)}`,
520
606
  exitCode: 1,
607
+ usage: estimateJobTokenUsage(
608
+ runtimeConfig,
609
+ params,
610
+ `OpenHands wrapper execution error for ${kind}: ${String(err)}`,
611
+ "",
612
+ "",
613
+ ),
521
614
  };
522
615
  } finally {
523
616
  if (warningTimer) {
@@ -203,6 +203,24 @@ def repo_root_for_runtime_config() -> Path:
203
203
  return Path(__file__).resolve().parents[3]
204
204
 
205
205
 
206
+ def config_dir_for_runtime_config() -> Path:
207
+ explicit = (os.environ.get("PUSHPALS_CONFIG_DIR_OVERRIDE") or "").strip()
208
+ if explicit:
209
+ return Path(explicit)
210
+ return repo_root_for_runtime_config() / "configs"
211
+
212
+
213
+ def prompts_root_for_runtime_assets() -> Path:
214
+ explicit = (os.environ.get("PUSHPALS_PROMPTS_ROOT_OVERRIDE") or "").strip()
215
+ if explicit:
216
+ return Path(explicit)
217
+ current = Path(__file__).resolve()
218
+ for parent in current.parents:
219
+ if (parent / "prompts").is_dir():
220
+ return parent
221
+ return repo_root_for_runtime_config()
222
+
223
+
206
224
  def _parse_toml_file(path: Path) -> Dict[str, Any]:
207
225
  if not path.exists() or not tomllib:
208
226
  return {}
@@ -217,12 +235,7 @@ def runtime_config() -> Dict[str, Any]:
217
235
  global _CONFIG_CACHE
218
236
  if _CONFIG_CACHE is not None:
219
237
  return _CONFIG_CACHE
220
- repo_root = repo_root_for_runtime_config()
221
- legacy_config_dir = repo_root / "config"
222
- config_dir = repo_root / "configs"
223
- if not (config_dir / "default.toml").exists():
224
- if (legacy_config_dir / "default.toml").exists():
225
- config_dir = legacy_config_dir
238
+ config_dir = config_dir_for_runtime_config()
226
239
  default_cfg = _parse_toml_file(config_dir / "default.toml")
227
240
  profile = (
228
241
  (os.environ.get("PUSHPALS_PROFILE") or "").strip()
@@ -231,12 +244,6 @@ def runtime_config() -> Dict[str, Any]:
231
244
  )
232
245
  profile_cfg = _parse_toml_file(config_dir / f"{profile}.toml")
233
246
  local_cfg = _parse_toml_file(config_dir / "local.toml")
234
- if (
235
- not local_cfg
236
- and config_dir != legacy_config_dir
237
- and (legacy_config_dir / "local.toml").exists()
238
- ):
239
- local_cfg = _parse_toml_file(legacy_config_dir / "local.toml")
240
247
  _CONFIG_CACHE = _deep_merge(_deep_merge(default_cfg, profile_cfg), local_cfg)
241
248
  return _CONFIG_CACHE
242
249
 
@@ -9,7 +9,7 @@
9
9
 
10
10
  import { existsSync } from "fs";
11
11
  import { resolve } from "path";
12
- import type { JobResult } from "./types.js";
12
+ import type { JobResult, JobTokenUsage } from "./types.js";
13
13
  import type { WorkerpalsRuntimeConfig } from "./executor_backend.js";
14
14
  import type { BackendTaskExecutor } from "../backends/types.js";
15
15
  import {
@@ -26,6 +26,84 @@ interface GenericPythonExecutorConfig {
26
26
  timeoutConfigKey: string;
27
27
  }
28
28
 
29
+ function estimateTokensFromText(text: string): number {
30
+ return Math.max(0, Math.ceil(String(text ?? "").length / 3));
31
+ }
32
+
33
+ function estimateJobTokenUsage(
34
+ backendName: string,
35
+ modelId: string,
36
+ params: Record<string, unknown>,
37
+ summary: string,
38
+ stdout: string,
39
+ stderr: string,
40
+ ): JobTokenUsage {
41
+ const promptSource = (() => {
42
+ try {
43
+ return JSON.stringify(params);
44
+ } catch {
45
+ return String(params?.instruction ?? params?.prompt ?? "");
46
+ }
47
+ })();
48
+ const completionSource = [summary, stdout, stderr].filter(Boolean).join("\n\n");
49
+ const promptTokens = estimateTokensFromText(promptSource);
50
+ const completionTokens = estimateTokensFromText(completionSource);
51
+ return {
52
+ promptTokens,
53
+ completionTokens,
54
+ totalTokens: promptTokens + completionTokens,
55
+ estimated: true,
56
+ backend: backendName,
57
+ modelId,
58
+ };
59
+ }
60
+
61
+ function coerceJobTokenUsage(
62
+ value: unknown,
63
+ fallback: JobTokenUsage,
64
+ ): JobTokenUsage {
65
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
66
+ return fallback;
67
+ }
68
+ const raw = value as Record<string, unknown>;
69
+ const promptTokens = Number(raw.promptTokens ?? raw.prompt_tokens);
70
+ const completionTokens = Number(raw.completionTokens ?? raw.completion_tokens);
71
+ const totalTokens = Number(raw.totalTokens ?? raw.total_tokens);
72
+ const hasPrompt = Number.isFinite(promptTokens) && promptTokens >= 0;
73
+ const hasCompletion = Number.isFinite(completionTokens) && completionTokens >= 0;
74
+ const hasTotal = Number.isFinite(totalTokens) && totalTokens >= 0;
75
+ if (!hasPrompt && !hasCompletion && !hasTotal) {
76
+ return fallback;
77
+ }
78
+ const normalizedPrompt = hasPrompt
79
+ ? Math.round(promptTokens)
80
+ : hasTotal
81
+ ? Math.max(0, Math.round(totalTokens) - fallback.completionTokens)
82
+ : fallback.promptTokens;
83
+ const normalizedCompletion = hasCompletion
84
+ ? Math.round(completionTokens)
85
+ : hasTotal
86
+ ? Math.max(0, Math.round(totalTokens) - normalizedPrompt)
87
+ : fallback.completionTokens;
88
+ const normalizedTotal = hasTotal
89
+ ? Math.round(totalTokens)
90
+ : normalizedPrompt + normalizedCompletion;
91
+ return {
92
+ promptTokens: normalizedPrompt,
93
+ completionTokens: normalizedCompletion,
94
+ totalTokens: normalizedTotal,
95
+ estimated: typeof raw.estimated === "boolean" ? raw.estimated : false,
96
+ backend:
97
+ typeof raw.backend === "string" && raw.backend.trim().length > 0
98
+ ? raw.backend.trim()
99
+ : fallback.backend,
100
+ modelId:
101
+ typeof raw.modelId === "string" && raw.modelId.trim().length > 0
102
+ ? raw.modelId.trim()
103
+ : fallback.modelId,
104
+ };
105
+ }
106
+
29
107
  function resolveRuntimeSettings(
30
108
  config: GenericPythonExecutorConfig,
31
109
  runtimeConfig: WorkerpalsRuntimeConfig,
@@ -69,6 +147,7 @@ export function createGenericPythonExecutor(
69
147
  config,
70
148
  runtimeConfig,
71
149
  );
150
+ const modelId = runtimeConfig.workerpals.llm.model.trim();
72
151
  const executionBudgetMs =
73
152
  typeof budgets?.executionBudgetMs === "number" && Number.isFinite(budgets.executionBudgetMs)
74
153
  ? Math.max(10_000, Math.floor(budgets.executionBudgetMs))
@@ -158,6 +237,14 @@ export function createGenericPythonExecutor(
158
237
 
159
238
  const parsed = parseStructuredResult(stdout, outputPolicy.executorResultPrefix);
160
239
  const filteredStdout = filterResultLines(stdout, outputPolicy.executorResultPrefix);
240
+ const fallbackUsage = estimateJobTokenUsage(
241
+ backendName,
242
+ modelId,
243
+ params,
244
+ "",
245
+ filteredStdout,
246
+ stderr,
247
+ );
161
248
 
162
249
  if (!parsed) {
163
250
  if (timedOut) {
@@ -167,6 +254,7 @@ export function createGenericPythonExecutor(
167
254
  stdout: truncate(filteredStdout, outputPolicy),
168
255
  stderr: truncate(stderr, outputPolicy),
169
256
  exitCode: exitCode === 0 ? 124 : exitCode,
257
+ usage: fallbackUsage,
170
258
  };
171
259
  }
172
260
  return {
@@ -175,35 +263,47 @@ export function createGenericPythonExecutor(
175
263
  stdout: truncate(filteredStdout, outputPolicy),
176
264
  stderr: truncate(stderr, outputPolicy),
177
265
  exitCode,
266
+ usage: fallbackUsage,
178
267
  };
179
268
  }
180
269
 
270
+ const summary =
271
+ typeof parsed.summary === "string"
272
+ ? parsed.summary
273
+ : exitCode === 0
274
+ ? `${kind} passed via ${backendName}`
275
+ : `${kind} failed via ${backendName} (exit ${exitCode})`;
276
+ const parsedStdout = typeof parsed.stdout === "string" ? parsed.stdout : filteredStdout;
277
+ const parsedStderr = typeof parsed.stderr === "string" ? parsed.stderr : stderr;
278
+ const usage = coerceJobTokenUsage(
279
+ parsed.usage,
280
+ estimateJobTokenUsage(backendName, modelId, params, summary, parsedStdout, parsedStderr),
281
+ );
282
+
181
283
  return {
182
284
  ok: typeof parsed.ok === "boolean" ? parsed.ok : exitCode === 0,
183
- summary:
184
- typeof parsed.summary === "string"
185
- ? parsed.summary
186
- : exitCode === 0
187
- ? `${kind} passed via ${backendName}`
188
- : `${kind} failed via ${backendName} (exit ${exitCode})`,
189
- stdout: truncate(
190
- typeof parsed.stdout === "string" ? parsed.stdout : filteredStdout,
191
- outputPolicy,
192
- ),
193
- stderr: truncate(
194
- typeof parsed.stderr === "string" ? parsed.stderr : stderr,
195
- outputPolicy,
196
- ),
285
+ summary,
286
+ stdout: truncate(parsedStdout, outputPolicy),
287
+ stderr: truncate(parsedStderr, outputPolicy),
197
288
  exitCode:
198
289
  typeof parsed.exitCode === "number" && Number.isFinite(parsed.exitCode)
199
290
  ? parsed.exitCode
200
291
  : exitCode,
292
+ usage,
201
293
  };
202
294
  } catch (err) {
203
295
  return {
204
296
  ok: false,
205
297
  summary: `${backendName} wrapper execution error for ${kind}: ${String(err)}`,
206
298
  exitCode: 1,
299
+ usage: estimateJobTokenUsage(
300
+ backendName,
301
+ runtimeConfig.workerpals.llm.model.trim(),
302
+ params,
303
+ `${backendName} wrapper execution error for ${kind}: ${String(err)}`,
304
+ "",
305
+ "",
306
+ ),
207
307
  };
208
308
  }
209
309
  };
@@ -1,9 +1,19 @@
1
1
  export type ExecutorBackend = string;
2
2
 
3
+ export interface JobTokenUsage {
4
+ promptTokens: number;
5
+ completionTokens: number;
6
+ totalTokens?: number;
7
+ estimated?: boolean;
8
+ backend?: string;
9
+ modelId?: string;
10
+ }
11
+
3
12
  export interface JobResult {
4
13
  ok: boolean;
5
14
  summary: string;
6
15
  stdout?: string;
7
16
  stderr?: string;
8
17
  exitCode?: number;
18
+ usage?: JobTokenUsage;
9
19
  }
@@ -2022,14 +2022,29 @@ export function shouldUseCodexCliForExecutor(executor: string): boolean {
2022
2022
  return executor.trim().toLowerCase() === "openai_codex";
2023
2023
  }
2024
2024
 
2025
- function normalizeCodexReasoningEffort(value: unknown): "low" | "medium" | "high" {
2025
+ function normalizeCodexReasoningEffort(
2026
+ value: unknown,
2027
+ ): "low" | "medium" | "high" | "xhigh" {
2026
2028
  const normalized = String(value ?? "")
2027
2029
  .trim()
2028
2030
  .toLowerCase();
2029
- if (normalized === "low" || normalized === "medium" || normalized === "high") {
2031
+ if (
2032
+ normalized === "low" ||
2033
+ normalized === "medium" ||
2034
+ normalized === "high" ||
2035
+ normalized === "xhigh"
2036
+ ) {
2030
2037
  return normalized;
2031
2038
  }
2032
- return "high";
2039
+ if (
2040
+ normalized === "extra high" ||
2041
+ normalized === "extra-high" ||
2042
+ normalized === "extrahigh" ||
2043
+ normalized === "x-high"
2044
+ ) {
2045
+ return "xhigh";
2046
+ }
2047
+ return "xhigh";
2033
2048
  }
2034
2049
 
2035
2050
  async function generateCommitMessageFromDiff(
@@ -91,6 +91,99 @@ function workerLlmConfig(runtimeConfig: ReturnType<typeof loadPushPalsConfig>):
91
91
  };
92
92
  }
93
93
 
94
+ function estimateTokensFromText(text: string): number {
95
+ return Math.max(0, Math.ceil(String(text ?? "").length / 3));
96
+ }
97
+
98
+ function buildWorkerLlmUsageEvent(
99
+ job: {
100
+ kind: string;
101
+ sessionId?: string | null;
102
+ params?: Record<string, unknown> | null;
103
+ },
104
+ result: WorkerJobResult,
105
+ ): Record<string, unknown> | null {
106
+ const sessionId = String(job.sessionId ?? CONFIG.sessionId ?? "").trim();
107
+ if (!sessionId) return null;
108
+ const llmConfig = workerLlmConfig(CONFIG);
109
+ const explicitUsage = result.usage;
110
+ if (
111
+ explicitUsage &&
112
+ Number.isFinite(explicitUsage.promptTokens) &&
113
+ explicitUsage.promptTokens >= 0 &&
114
+ Number.isFinite(explicitUsage.completionTokens) &&
115
+ explicitUsage.completionTokens >= 0
116
+ ) {
117
+ const promptTokens = Math.round(explicitUsage.promptTokens);
118
+ const completionTokens = Math.round(explicitUsage.completionTokens);
119
+ const totalTokens =
120
+ Number.isFinite(explicitUsage.totalTokens) && (explicitUsage.totalTokens ?? 0) >= 0
121
+ ? Math.round(explicitUsage.totalTokens ?? promptTokens + completionTokens)
122
+ : promptTokens + completionTokens;
123
+ return {
124
+ service: "workerpals",
125
+ sessionId,
126
+ backend: String(explicitUsage.backend ?? resolveExecutor(CONFIG)).trim() || resolveExecutor(CONFIG),
127
+ modelId: String(explicitUsage.modelId ?? llmConfig.model).trim() || llmConfig.model,
128
+ promptTokens,
129
+ completionTokens,
130
+ totalTokens,
131
+ estimated: explicitUsage.estimated === true,
132
+ };
133
+ }
134
+
135
+ const promptSource = (() => {
136
+ try {
137
+ return JSON.stringify({
138
+ kind: job.kind,
139
+ params: job.params ?? {},
140
+ });
141
+ } catch {
142
+ return `${job.kind}\n${String(job.params?.instruction ?? job.params?.prompt ?? "")}`.trim();
143
+ }
144
+ })();
145
+ const completionSource = [result.summary, result.stdout ?? "", result.stderr ?? ""]
146
+ .filter(Boolean)
147
+ .join("\n\n");
148
+ const promptTokens = estimateTokensFromText(promptSource);
149
+ const completionTokens = estimateTokensFromText(completionSource);
150
+ return {
151
+ service: "workerpals",
152
+ sessionId,
153
+ backend: resolveExecutor(CONFIG),
154
+ modelId: llmConfig.model,
155
+ promptTokens,
156
+ completionTokens,
157
+ totalTokens: promptTokens + completionTokens,
158
+ estimated: true,
159
+ };
160
+ }
161
+
162
+ async function reportWorkerLlmUsage(
163
+ server: string,
164
+ headers: Record<string, string>,
165
+ job: {
166
+ kind: string;
167
+ sessionId?: string | null;
168
+ params?: Record<string, unknown> | null;
169
+ },
170
+ result: WorkerJobResult,
171
+ ): Promise<void> {
172
+ const payload = buildWorkerLlmUsageEvent(job, result);
173
+ if (!payload) return;
174
+ const response = await fetch(`${server}/telemetry/llm-usage`, {
175
+ method: "POST",
176
+ headers,
177
+ body: JSON.stringify(payload),
178
+ });
179
+ if (!response.ok) {
180
+ const detail = await response.text().catch(() => "");
181
+ throw new Error(
182
+ `usage telemetry rejected (${response.status})${detail ? `: ${detail.trim()}` : ""}`,
183
+ );
184
+ }
185
+ }
186
+
94
187
  function integrationBranchName(): string {
95
188
  const configuredBaseRef = CONFIG.workerpals.baseRef.trim();
96
189
  if (!configuredBaseRef) return "main_agents";
@@ -1047,6 +1140,15 @@ async function workerLoop(
1047
1140
  const jobDurationMs = Math.max(0, Date.now() - jobStartedAtMs);
1048
1141
 
1049
1142
  await logChain;
1143
+ try {
1144
+ await reportWorkerLlmUsage(opts.server, headers, jobData, result);
1145
+ } catch (err) {
1146
+ console.warn(
1147
+ `[WorkerPals] Failed to report LLM usage for job ${job.id}: ${
1148
+ err instanceof Error ? err.message : String(err)
1149
+ }`,
1150
+ );
1151
+ }
1050
1152
 
1051
1153
  let completionCommit: CommitRef | null = null;
1052
1154
  if (result.ok && shouldCommit(job.kind, CONFIG)) {
@@ -21,6 +21,7 @@ port = 3001
21
21
  debug_http = false
22
22
  stale_claim_ttl_ms = 120000
23
23
  stale_claim_sweep_interval_ms = 5000
24
+ session_token_budget = 2000000
24
25
 
25
26
  [localbuddy]
26
27
  enabled = false
@@ -197,7 +198,7 @@ session_id = "workerpals-dev"
197
198
  [workerpals.openai_codex]
198
199
  timeout_ms = 7200000
199
200
  progress_log_interval_s = 30
200
- reasoning_effort = "high"
201
+ reasoning_effort = "xhigh"
201
202
  approval_policy = "never"
202
203
  sandbox = "workspace-write"
203
204
  color = "never"
@@ -8,19 +8,19 @@
8
8
 
9
9
  [localbuddy.llm]
10
10
  backend = "openai_codex"
11
- model = "gpt-5-codex"
11
+ model = "gpt-5.4"
12
12
  codex_auth_mode = "chatgpt"
13
13
  codex_bin = "bun x --yes @openai/codex"
14
14
  codex_timeout_ms = 120000
15
- reasoning_effort = "high"
15
+ reasoning_effort = "xhigh"
16
16
 
17
17
  [remotebuddy.llm]
18
18
  backend = "openai_codex"
19
- model = "gpt-5-codex"
19
+ model = "gpt-5.4"
20
20
  codex_auth_mode = "chatgpt"
21
21
  codex_bin = "bun x --yes @openai/codex"
22
22
  codex_timeout_ms = 120000
23
- reasoning_effort = "high"
23
+ reasoning_effort = "xhigh"
24
24
 
25
25
  [remotebuddy]
26
26
  max_workerpals = 10
@@ -42,11 +42,11 @@ retention_days = 30
42
42
 
43
43
  [workerpals.llm]
44
44
  backend = "openai_codex"
45
- model = "gpt-5-codex"
45
+ model = "gpt-5.4"
46
46
  codex_auth_mode = "chatgpt"
47
47
  codex_bin = "bun x --yes @openai/codex"
48
48
  codex_timeout_ms = 120000
49
- reasoning_effort = "high"
49
+ reasoning_effort = "xhigh"
50
50
 
51
51
  [workerpals]
52
52
  executor = "openai_codex"
@@ -92,7 +92,7 @@ bin = "bun x --yes @openai/codex"
92
92
  timeout_ms = 7200000
93
93
  progress_log_interval_s = 30
94
94
  # timeout_s = 120 # optional; if set, overrides timeout_ms
95
- reasoning_effort = "high"
95
+ reasoning_effort = "xhigh"
96
96
  approval_policy = "never"
97
97
  sandbox = "workspace-write"
98
98
  color = "never"
@@ -82,6 +82,8 @@ export interface PushPalsConfig {
82
82
  debugHttp: boolean;
83
83
  staleClaimTtlMs: number;
84
84
  staleClaimSweepIntervalMs: number;
85
+ sessionTokenBudget: number;
86
+ sessionTokenBudgetAction: "pause";
85
87
  };
86
88
  localbuddy: {
87
89
  enabled: boolean;
@@ -682,6 +684,14 @@ export function loadPushPalsConfig(options: LoadOptions = {}): PushPalsConfig {
682
684
  5_000,
683
685
  ),
684
686
  );
687
+ const sessionTokenBudget = Math.max(
688
+ 0,
689
+ asInt(
690
+ parseIntEnv("PUSHPALS_SESSION_TOKEN_BUDGET") ?? serverNode.session_token_budget,
691
+ 1_000_000,
692
+ ),
693
+ );
694
+ const sessionTokenBudgetAction: "pause" = "pause";
685
695
 
686
696
  const globalStatusHeartbeatMs = parseIntEnv("PUSHPALS_STATUS_HEARTBEAT_MS");
687
697
 
@@ -1467,6 +1477,8 @@ export function loadPushPalsConfig(options: LoadOptions = {}): PushPalsConfig {
1467
1477
  debugHttp,
1468
1478
  staleClaimTtlMs,
1469
1479
  staleClaimSweepIntervalMs,
1480
+ sessionTokenBudget,
1481
+ sessionTokenBudgetAction,
1470
1482
  },
1471
1483
  localbuddy: {
1472
1484
  enabled: localEnabled,