@pushpalsdev/cli 1.0.22 → 1.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
+ import os
1
2
  import sys
2
3
  import unittest
4
+ import tempfile
3
5
  from pathlib import Path
4
6
 
5
7
  _HERE = Path(__file__).resolve().parent
@@ -8,13 +10,16 @@ for path in (_HERE, _SHARED):
8
10
  if str(path) not in sys.path:
9
11
  sys.path.insert(0, str(path))
10
12
 
11
- from executor_base import SettingsResolver
13
+ from executor_base import SettingsResolver, config_dir_for_runtime_config, runtime_config
12
14
  from openai_codex_executor import (
13
15
  OpenAICodexRuntimeConfig,
16
+ _resolve_reasoning_effort,
14
17
  _build_instruction,
15
18
  _detect_codex_workaround_signal,
19
+ _extract_usage_counts,
16
20
  _load_prompt_template,
17
21
  _repo_root_for_prompt_loading,
22
+ _usage_from_trace_or_estimate,
18
23
  )
19
24
 
20
25
 
@@ -55,8 +60,69 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
55
60
  self.assertEqual(cfg.approval_policy, "never")
56
61
  self.assertEqual(cfg.sandbox, "workspace-write")
57
62
  self.assertEqual(cfg.color, "never")
63
+ self.assertEqual(cfg.reasoning_effort, "high")
58
64
  self.assertFalse(cfg.json_output)
59
65
 
66
+ def test_reasoning_effort_caps_extra_high_for_gpt_5_4(self) -> None:
67
+ cfg = OpenAICodexRuntimeConfig.from_sources(
68
+ SettingsResolver(
69
+ env={"WORKERPALS_OPENAI_CODEX_REASONING_EFFORT": "extra high"},
70
+ config_loader=lambda: {},
71
+ ),
72
+ )
73
+ self.assertEqual(_resolve_reasoning_effort(cfg), "high")
74
+
75
+ def test_reasoning_effort_preserves_extra_high_for_future_models(self) -> None:
76
+ cfg = OpenAICodexRuntimeConfig.from_sources(
77
+ SettingsResolver(
78
+ env={"WORKERPALS_OPENAI_CODEX_REASONING_EFFORT": "extra high"},
79
+ config_loader=lambda: {},
80
+ ),
81
+ )
82
+ self.assertEqual(_resolve_reasoning_effort(cfg, model="gpt-6-preview"), "xhigh")
83
+
84
+ def test_runtime_config_prefers_explicit_config_dir_override(self) -> None:
85
+ import executor_base
86
+
87
+ with tempfile.TemporaryDirectory(prefix="pushpals-openai-codex-config-") as root:
88
+ repo_root = Path(root) / "repo"
89
+ runtime_config_dir = Path(root) / "runtime" / "configs"
90
+ repo_config_dir = repo_root / "configs"
91
+ runtime_config_dir.mkdir(parents=True, exist_ok=True)
92
+ repo_config_dir.mkdir(parents=True, exist_ok=True)
93
+
94
+ (runtime_config_dir / "default.toml").write_text(
95
+ 'profile = "dev"\n[workerpals.openai_codex]\njson = true\n',
96
+ encoding="utf-8",
97
+ )
98
+ (repo_config_dir / "default.toml").write_text(
99
+ 'profile = "dev"\n[workerpals.openai_codex]\njson = false\n',
100
+ encoding="utf-8",
101
+ )
102
+
103
+ previous_env = {
104
+ "PUSHPALS_REPO_PATH": os.environ.get("PUSHPALS_REPO_PATH"),
105
+ "PUSHPALS_CONFIG_DIR_OVERRIDE": os.environ.get("PUSHPALS_CONFIG_DIR_OVERRIDE"),
106
+ "PUSHPALS_PROFILE": os.environ.get("PUSHPALS_PROFILE"),
107
+ }
108
+ previous_cache = executor_base._CONFIG_CACHE
109
+ try:
110
+ os.environ["PUSHPALS_REPO_PATH"] = str(repo_root)
111
+ os.environ["PUSHPALS_CONFIG_DIR_OVERRIDE"] = str(runtime_config_dir)
112
+ os.environ["PUSHPALS_PROFILE"] = "dev"
113
+ executor_base._CONFIG_CACHE = None
114
+
115
+ self.assertEqual(config_dir_for_runtime_config(), runtime_config_dir)
116
+ cfg = runtime_config()
117
+ self.assertTrue(cfg["workerpals"]["openai_codex"]["json"])
118
+ finally:
119
+ executor_base._CONFIG_CACHE = previous_cache
120
+ for key, value in previous_env.items():
121
+ if value is None:
122
+ os.environ.pop(key, None)
123
+ else:
124
+ os.environ[key] = value
125
+
60
126
  def test_build_instruction_includes_codex_runtime_invariants(self) -> None:
61
127
  prompt = _build_instruction("Add two tests for localbuddy", [])
62
128
  self.assertIn("Codex CLI is required infrastructure", prompt)
@@ -105,6 +171,32 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
105
171
  template = _load_prompt_template("workerpals/openai_codex_task_execute_system_prompt.md")
106
172
  self.assertIn("Codex CLI is required infrastructure", template)
107
173
 
174
+ def test_extracts_usage_counts_from_nested_json_event(self) -> None:
175
+ usage = _extract_usage_counts(
176
+ {
177
+ "type": "response.completed",
178
+ "response": {
179
+ "usage": {
180
+ "input_tokens": 120,
181
+ "output_tokens": 30,
182
+ "total_tokens": 150,
183
+ }
184
+ },
185
+ }
186
+ )
187
+ self.assertEqual(
188
+ usage,
189
+ {"prompt_tokens": 120, "completion_tokens": 30, "total_tokens": 150},
190
+ )
191
+
192
+ def test_usage_falls_back_to_estimate_when_trace_has_no_usage(self) -> None:
193
+ usage = _usage_from_trace_or_estimate({}, "abc" * 30, "done", model="gpt-5.4")
194
+ self.assertTrue(usage["estimated"])
195
+ self.assertEqual(usage["backend"], "openai_codex")
196
+ self.assertEqual(usage["modelId"], "gpt-5.4")
197
+ self.assertGreater(usage["promptTokens"], 0)
198
+ self.assertGreater(usage["totalTokens"], usage["completionTokens"])
199
+
108
200
 
109
201
  if __name__ == "__main__":
110
202
  unittest.main()
@@ -28,7 +28,7 @@ from executor_base import (
28
28
  log_git_status,
29
29
  looks_local_base_url,
30
30
  parse_task_execute_payload,
31
- repo_root_for_runtime_config,
31
+ prompts_root_for_runtime_assets,
32
32
  resolve_llm_config,
33
33
  setting_int,
34
34
  setting_str,
@@ -78,7 +78,7 @@ def _session_hint_headers(session_user: str) -> Dict[str, str]:
78
78
 
79
79
 
80
80
  def _repo_root_for_prompt_loading() -> Path:
81
- return repo_root_for_runtime_config()
81
+ return prompts_root_for_runtime_assets()
82
82
 
83
83
 
84
84
  def _resolve_prompt_file(relative_path: str) -> Path:
@@ -0,0 +1,57 @@
1
+ import os
2
+ import sys
3
+ import tempfile
4
+ import unittest
5
+ from pathlib import Path
6
+
7
+ _HERE = Path(__file__).resolve().parent
8
+ _SHARED = _HERE.parent / "shared"
9
+ for path in (_HERE, _SHARED):
10
+ if str(path) not in sys.path:
11
+ sys.path.insert(0, str(path))
12
+
13
+ from openhands_executor import _PROMPT_TEMPLATE_CACHE, _load_prompt_template, _resolve_prompt_file
14
+
15
+
16
+ class OpenHandsRuntimePathTests(unittest.TestCase):
17
+ def test_prompt_resolution_prefers_explicit_prompt_root_override(self) -> None:
18
+ with tempfile.TemporaryDirectory(prefix="pushpals-openhands-prompts-") as root:
19
+ repo_root = Path(root) / "repo"
20
+ runtime_root = Path(root) / "runtime"
21
+ repo_prompt = repo_root / "prompts" / "workerpals" / "openhands_strict_tool_use_message.md"
22
+ runtime_prompt = (
23
+ runtime_root / "prompts" / "workerpals" / "openhands_strict_tool_use_message.md"
24
+ )
25
+ repo_prompt.parent.mkdir(parents=True, exist_ok=True)
26
+ runtime_prompt.parent.mkdir(parents=True, exist_ok=True)
27
+ repo_prompt.write_text("repo prompt", encoding="utf-8")
28
+ runtime_prompt.write_text("runtime prompt", encoding="utf-8")
29
+
30
+ previous_env = {
31
+ "PUSHPALS_REPO_PATH": os.environ.get("PUSHPALS_REPO_PATH"),
32
+ "PUSHPALS_PROMPTS_ROOT_OVERRIDE": os.environ.get("PUSHPALS_PROMPTS_ROOT_OVERRIDE"),
33
+ }
34
+ previous_cache = dict(_PROMPT_TEMPLATE_CACHE)
35
+ try:
36
+ os.environ["PUSHPALS_REPO_PATH"] = str(repo_root)
37
+ os.environ["PUSHPALS_PROMPTS_ROOT_OVERRIDE"] = str(runtime_root)
38
+ _PROMPT_TEMPLATE_CACHE.clear()
39
+
40
+ resolved = _resolve_prompt_file("workerpals/openhands_strict_tool_use_message.md")
41
+ self.assertEqual(resolved, runtime_prompt)
42
+ self.assertEqual(
43
+ _load_prompt_template("workerpals/openhands_strict_tool_use_message.md"),
44
+ "runtime prompt",
45
+ )
46
+ finally:
47
+ _PROMPT_TEMPLATE_CACHE.clear()
48
+ _PROMPT_TEMPLATE_CACHE.update(previous_cache)
49
+ for key, value in previous_env.items():
50
+ if value is None:
51
+ os.environ.pop(key, None)
52
+ else:
53
+ os.environ[key] = value
54
+
55
+
56
+ if __name__ == "__main__":
57
+ unittest.main()
@@ -8,7 +8,7 @@
8
8
 
9
9
  import { existsSync } from "fs";
10
10
  import { resolve } from "path";
11
- import type { JobResult } from "../common/types.js";
11
+ import type { JobResult, JobTokenUsage } from "../common/types.js";
12
12
  import type { WorkerpalsRuntimeConfig } from "../common/executor_backend.js";
13
13
  import {
14
14
  truncate,
@@ -24,6 +24,83 @@ const OPENHANDS_SCRIPT_PATH = resolve(import.meta.dir, "openhands", "openhands_e
24
24
 
25
25
  // ---- OpenHands-specific helpers ----------------------------------------------
26
26
 
27
+ function estimateTokensFromText(text: string): number {
28
+ return Math.max(0, Math.ceil(String(text ?? "").length / 3));
29
+ }
30
+
31
+ function estimateJobTokenUsage(
32
+ runtimeConfig: WorkerpalsRuntimeConfig,
33
+ params: Record<string, unknown>,
34
+ summary: string,
35
+ stdout: string,
36
+ stderr: string,
37
+ ): JobTokenUsage {
38
+ const promptSource = (() => {
39
+ try {
40
+ return JSON.stringify(params);
41
+ } catch {
42
+ return String(params?.instruction ?? params?.prompt ?? "");
43
+ }
44
+ })();
45
+ const completionSource = [summary, stdout, stderr].filter(Boolean).join("\n\n");
46
+ const promptTokens = estimateTokensFromText(promptSource);
47
+ const completionTokens = estimateTokensFromText(completionSource);
48
+ return {
49
+ promptTokens,
50
+ completionTokens,
51
+ totalTokens: promptTokens + completionTokens,
52
+ estimated: true,
53
+ backend: "openhands",
54
+ modelId: runtimeConfig.workerpals.llm.model.trim(),
55
+ };
56
+ }
57
+
58
+ function coerceJobTokenUsage(
59
+ value: unknown,
60
+ fallback: JobTokenUsage,
61
+ ): JobTokenUsage {
62
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
63
+ return fallback;
64
+ }
65
+ const raw = value as Record<string, unknown>;
66
+ const promptTokens = Number(raw.promptTokens ?? raw.prompt_tokens);
67
+ const completionTokens = Number(raw.completionTokens ?? raw.completion_tokens);
68
+ const totalTokens = Number(raw.totalTokens ?? raw.total_tokens);
69
+ const hasPrompt = Number.isFinite(promptTokens) && promptTokens >= 0;
70
+ const hasCompletion = Number.isFinite(completionTokens) && completionTokens >= 0;
71
+ const hasTotal = Number.isFinite(totalTokens) && totalTokens >= 0;
72
+ if (!hasPrompt && !hasCompletion && !hasTotal) {
73
+ return fallback;
74
+ }
75
+ const normalizedPrompt = hasPrompt
76
+ ? Math.round(promptTokens)
77
+ : hasTotal
78
+ ? Math.max(0, Math.round(totalTokens) - fallback.completionTokens)
79
+ : fallback.promptTokens;
80
+ const normalizedCompletion = hasCompletion
81
+ ? Math.round(completionTokens)
82
+ : hasTotal
83
+ ? Math.max(0, Math.round(totalTokens) - normalizedPrompt)
84
+ : fallback.completionTokens;
85
+ const normalizedTotal = hasTotal
86
+ ? Math.round(totalTokens)
87
+ : normalizedPrompt + normalizedCompletion;
88
+ return {
89
+ promptTokens: normalizedPrompt,
90
+ completionTokens: normalizedCompletion,
91
+ totalTokens: normalizedTotal,
92
+ estimated: typeof raw.estimated === "boolean" ? raw.estimated : false,
93
+ backend:
94
+ typeof raw.backend === "string" && raw.backend.trim().length > 0
95
+ ? raw.backend.trim()
96
+ : fallback.backend,
97
+ modelId:
98
+ typeof raw.modelId === "string" && raw.modelId.trim().length > 0
99
+ ? raw.modelId.trim()
100
+ : fallback.modelId,
101
+ };
102
+ }
103
+
27
104
  function classifyShellCommand(cmd: string): "explore" | "progress" {
28
105
  const trimmed = cmd.trim().toLowerCase();
29
106
  if (!trimmed) return "explore";
@@ -450,6 +527,7 @@ export async function executeWithOpenHands(
450
527
 
451
528
  const parsed = parseStructuredResult(stdout, outputPolicy.executorResultPrefix);
452
529
  const filteredStdout = filterResultLines(stdout, outputPolicy.executorResultPrefix);
530
+ const fallbackUsage = estimateJobTokenUsage(runtimeConfig, params, "", filteredStdout, stderr);
453
531
 
454
532
  if (!parsed) {
455
533
  if (timedOut) {
@@ -464,6 +542,7 @@ export async function executeWithOpenHands(
464
542
  stdout: truncate(filteredStdout, outputPolicy),
465
543
  stderr: truncate(stderr, outputPolicy),
466
544
  exitCode: exitCode === 0 ? 124 : exitCode,
545
+ usage: fallbackUsage,
467
546
  };
468
547
  }
469
548
  return {
@@ -472,6 +551,7 @@ export async function executeWithOpenHands(
472
551
  stdout: truncate(filteredStdout, outputPolicy),
473
552
  stderr: truncate(stderr, outputPolicy),
474
553
  exitCode,
554
+ usage: fallbackUsage,
475
555
  };
476
556
  }
477
557
 
@@ -483,6 +563,10 @@ export async function executeWithOpenHands(
483
563
  : `${kind} failed via OpenHands (exit ${exitCode})`;
484
564
  const parsedStdout = typeof parsed.stdout === "string" ? parsed.stdout : filteredStdout;
485
565
  const parsedStderr = typeof parsed.stderr === "string" ? parsed.stderr : stderr;
566
+ const usage = coerceJobTokenUsage(
567
+ parsed.usage,
568
+ estimateJobTokenUsage(runtimeConfig, params, summary, parsedStdout, parsedStderr),
569
+ );
486
570
  const parsedExitCode =
487
571
  typeof parsed.exitCode === "number" && Number.isFinite(parsed.exitCode)
488
572
  ? parsed.exitCode
@@ -502,6 +586,7 @@ export async function executeWithOpenHands(
502
586
  stdout: truncate(filteredStdout || String(parsedStdout ?? ""), outputPolicy),
503
587
  stderr: truncate(`Clarification needed: ${clarificationQuestion}`, outputPolicy),
504
588
  exitCode: 0,
589
+ usage,
505
590
  };
506
591
  }
507
592
  }
@@ -512,12 +597,20 @@ export async function executeWithOpenHands(
512
597
  stdout: truncate(parsedStdout ?? "", outputPolicy),
513
598
  stderr: truncate(parsedStderr ?? "", outputPolicy),
514
599
  exitCode: parsedExitCode,
600
+ usage,
515
601
  };
516
602
  } catch (err) {
517
603
  return {
518
604
  ok: false,
519
605
  summary: `OpenHands wrapper execution error for ${kind}: ${String(err)}`,
520
606
  exitCode: 1,
607
+ usage: estimateJobTokenUsage(
608
+ runtimeConfig,
609
+ params,
610
+ `OpenHands wrapper execution error for ${kind}: ${String(err)}`,
611
+ "",
612
+ "",
613
+ ),
521
614
  };
522
615
  } finally {
523
616
  if (warningTimer) {
@@ -203,6 +203,24 @@ def repo_root_for_runtime_config() -> Path:
203
203
  return Path(__file__).resolve().parents[3]
204
204
 
205
205
 
206
+ def config_dir_for_runtime_config() -> Path:
207
+ explicit = (os.environ.get("PUSHPALS_CONFIG_DIR_OVERRIDE") or "").strip()
208
+ if explicit:
209
+ return Path(explicit)
210
+ return repo_root_for_runtime_config() / "configs"
211
+
212
+
213
+ def prompts_root_for_runtime_assets() -> Path:
214
+ explicit = (os.environ.get("PUSHPALS_PROMPTS_ROOT_OVERRIDE") or "").strip()
215
+ if explicit:
216
+ return Path(explicit)
217
+ current = Path(__file__).resolve()
218
+ for parent in current.parents:
219
+ if (parent / "prompts").is_dir():
220
+ return parent
221
+ return repo_root_for_runtime_config()
222
+
223
+
206
224
  def _parse_toml_file(path: Path) -> Dict[str, Any]:
207
225
  if not path.exists() or not tomllib:
208
226
  return {}
@@ -217,12 +235,7 @@ def runtime_config() -> Dict[str, Any]:
217
235
  global _CONFIG_CACHE
218
236
  if _CONFIG_CACHE is not None:
219
237
  return _CONFIG_CACHE
220
- repo_root = repo_root_for_runtime_config()
221
- legacy_config_dir = repo_root / "config"
222
- config_dir = repo_root / "configs"
223
- if not (config_dir / "default.toml").exists():
224
- if (legacy_config_dir / "default.toml").exists():
225
- config_dir = legacy_config_dir
238
+ config_dir = config_dir_for_runtime_config()
226
239
  default_cfg = _parse_toml_file(config_dir / "default.toml")
227
240
  profile = (
228
241
  (os.environ.get("PUSHPALS_PROFILE") or "").strip()
@@ -231,12 +244,6 @@ def runtime_config() -> Dict[str, Any]:
231
244
  )
232
245
  profile_cfg = _parse_toml_file(config_dir / f"{profile}.toml")
233
246
  local_cfg = _parse_toml_file(config_dir / "local.toml")
234
- if (
235
- not local_cfg
236
- and config_dir != legacy_config_dir
237
- and (legacy_config_dir / "local.toml").exists()
238
- ):
239
- local_cfg = _parse_toml_file(legacy_config_dir / "local.toml")
240
247
  _CONFIG_CACHE = _deep_merge(_deep_merge(default_cfg, profile_cfg), local_cfg)
241
248
  return _CONFIG_CACHE
242
249
 
@@ -9,7 +9,7 @@
9
9
 
10
10
  import { existsSync } from "fs";
11
11
  import { resolve } from "path";
12
- import type { JobResult } from "./types.js";
12
+ import type { JobResult, JobTokenUsage } from "./types.js";
13
13
  import type { WorkerpalsRuntimeConfig } from "./executor_backend.js";
14
14
  import type { BackendTaskExecutor } from "../backends/types.js";
15
15
  import {
@@ -26,6 +26,84 @@ interface GenericPythonExecutorConfig {
26
26
  timeoutConfigKey: string;
27
27
  }
28
28
 
29
+ function estimateTokensFromText(text: string): number {
30
+ return Math.max(0, Math.ceil(String(text ?? "").length / 3));
31
+ }
32
+
33
+ function estimateJobTokenUsage(
34
+ backendName: string,
35
+ modelId: string,
36
+ params: Record<string, unknown>,
37
+ summary: string,
38
+ stdout: string,
39
+ stderr: string,
40
+ ): JobTokenUsage {
41
+ const promptSource = (() => {
42
+ try {
43
+ return JSON.stringify(params);
44
+ } catch {
45
+ return String(params?.instruction ?? params?.prompt ?? "");
46
+ }
47
+ })();
48
+ const completionSource = [summary, stdout, stderr].filter(Boolean).join("\n\n");
49
+ const promptTokens = estimateTokensFromText(promptSource);
50
+ const completionTokens = estimateTokensFromText(completionSource);
51
+ return {
52
+ promptTokens,
53
+ completionTokens,
54
+ totalTokens: promptTokens + completionTokens,
55
+ estimated: true,
56
+ backend: backendName,
57
+ modelId,
58
+ };
59
+ }
60
+
61
+ function coerceJobTokenUsage(
62
+ value: unknown,
63
+ fallback: JobTokenUsage,
64
+ ): JobTokenUsage {
65
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
66
+ return fallback;
67
+ }
68
+ const raw = value as Record<string, unknown>;
69
+ const promptTokens = Number(raw.promptTokens ?? raw.prompt_tokens);
70
+ const completionTokens = Number(raw.completionTokens ?? raw.completion_tokens);
71
+ const totalTokens = Number(raw.totalTokens ?? raw.total_tokens);
72
+ const hasPrompt = Number.isFinite(promptTokens) && promptTokens >= 0;
73
+ const hasCompletion = Number.isFinite(completionTokens) && completionTokens >= 0;
74
+ const hasTotal = Number.isFinite(totalTokens) && totalTokens >= 0;
75
+ if (!hasPrompt && !hasCompletion && !hasTotal) {
76
+ return fallback;
77
+ }
78
+ const normalizedPrompt = hasPrompt
79
+ ? Math.round(promptTokens)
80
+ : hasTotal
81
+ ? Math.max(0, Math.round(totalTokens) - fallback.completionTokens)
82
+ : fallback.promptTokens;
83
+ const normalizedCompletion = hasCompletion
84
+ ? Math.round(completionTokens)
85
+ : hasTotal
86
+ ? Math.max(0, Math.round(totalTokens) - normalizedPrompt)
87
+ : fallback.completionTokens;
88
+ const normalizedTotal = hasTotal
89
+ ? Math.round(totalTokens)
90
+ : normalizedPrompt + normalizedCompletion;
91
+ return {
92
+ promptTokens: normalizedPrompt,
93
+ completionTokens: normalizedCompletion,
94
+ totalTokens: normalizedTotal,
95
+ estimated: typeof raw.estimated === "boolean" ? raw.estimated : false,
96
+ backend:
97
+ typeof raw.backend === "string" && raw.backend.trim().length > 0
98
+ ? raw.backend.trim()
99
+ : fallback.backend,
100
+ modelId:
101
+ typeof raw.modelId === "string" && raw.modelId.trim().length > 0
102
+ ? raw.modelId.trim()
103
+ : fallback.modelId,
104
+ };
105
+ }
106
+
29
107
  function resolveRuntimeSettings(
30
108
  config: GenericPythonExecutorConfig,
31
109
  runtimeConfig: WorkerpalsRuntimeConfig,
@@ -69,6 +147,7 @@ export function createGenericPythonExecutor(
69
147
  config,
70
148
  runtimeConfig,
71
149
  );
150
+ const modelId = runtimeConfig.workerpals.llm.model.trim();
72
151
  const executionBudgetMs =
73
152
  typeof budgets?.executionBudgetMs === "number" && Number.isFinite(budgets.executionBudgetMs)
74
153
  ? Math.max(10_000, Math.floor(budgets.executionBudgetMs))
@@ -158,6 +237,14 @@ export function createGenericPythonExecutor(
158
237
 
159
238
  const parsed = parseStructuredResult(stdout, outputPolicy.executorResultPrefix);
160
239
  const filteredStdout = filterResultLines(stdout, outputPolicy.executorResultPrefix);
240
+ const fallbackUsage = estimateJobTokenUsage(
241
+ backendName,
242
+ modelId,
243
+ params,
244
+ "",
245
+ filteredStdout,
246
+ stderr,
247
+ );
161
248
 
162
249
  if (!parsed) {
163
250
  if (timedOut) {
@@ -167,6 +254,7 @@ export function createGenericPythonExecutor(
167
254
  stdout: truncate(filteredStdout, outputPolicy),
168
255
  stderr: truncate(stderr, outputPolicy),
169
256
  exitCode: exitCode === 0 ? 124 : exitCode,
257
+ usage: fallbackUsage,
170
258
  };
171
259
  }
172
260
  return {
@@ -175,35 +263,47 @@ export function createGenericPythonExecutor(
175
263
  stdout: truncate(filteredStdout, outputPolicy),
176
264
  stderr: truncate(stderr, outputPolicy),
177
265
  exitCode,
266
+ usage: fallbackUsage,
178
267
  };
179
268
  }
180
269
 
270
+ const summary =
271
+ typeof parsed.summary === "string"
272
+ ? parsed.summary
273
+ : exitCode === 0
274
+ ? `${kind} passed via ${backendName}`
275
+ : `${kind} failed via ${backendName} (exit ${exitCode})`;
276
+ const parsedStdout = typeof parsed.stdout === "string" ? parsed.stdout : filteredStdout;
277
+ const parsedStderr = typeof parsed.stderr === "string" ? parsed.stderr : stderr;
278
+ const usage = coerceJobTokenUsage(
279
+ parsed.usage,
280
+ estimateJobTokenUsage(backendName, modelId, params, summary, parsedStdout, parsedStderr),
281
+ );
282
+
181
283
  return {
182
284
  ok: typeof parsed.ok === "boolean" ? parsed.ok : exitCode === 0,
183
- summary:
184
- typeof parsed.summary === "string"
185
- ? parsed.summary
186
- : exitCode === 0
187
- ? `${kind} passed via ${backendName}`
188
- : `${kind} failed via ${backendName} (exit ${exitCode})`,
189
- stdout: truncate(
190
- typeof parsed.stdout === "string" ? parsed.stdout : filteredStdout,
191
- outputPolicy,
192
- ),
193
- stderr: truncate(
194
- typeof parsed.stderr === "string" ? parsed.stderr : stderr,
195
- outputPolicy,
196
- ),
285
+ summary,
286
+ stdout: truncate(parsedStdout, outputPolicy),
287
+ stderr: truncate(parsedStderr, outputPolicy),
197
288
  exitCode:
198
289
  typeof parsed.exitCode === "number" && Number.isFinite(parsed.exitCode)
199
290
  ? parsed.exitCode
200
291
  : exitCode,
292
+ usage,
201
293
  };
202
294
  } catch (err) {
203
295
  return {
204
296
  ok: false,
205
297
  summary: `${backendName} wrapper execution error for ${kind}: ${String(err)}`,
206
298
  exitCode: 1,
299
+ usage: estimateJobTokenUsage(
300
+ backendName,
301
+ runtimeConfig.workerpals.llm.model.trim(),
302
+ params,
303
+ `${backendName} wrapper execution error for ${kind}: ${String(err)}`,
304
+ "",
305
+ "",
306
+ ),
207
307
  };
208
308
  }
209
309
  };
@@ -1,9 +1,19 @@
1
1
  export type ExecutorBackend = string;
2
2
 
3
+ export interface JobTokenUsage {
4
+ promptTokens: number;
5
+ completionTokens: number;
6
+ totalTokens?: number;
7
+ estimated?: boolean;
8
+ backend?: string;
9
+ modelId?: string;
10
+ }
11
+
3
12
  export interface JobResult {
4
13
  ok: boolean;
5
14
  summary: string;
6
15
  stdout?: string;
7
16
  stderr?: string;
8
17
  exitCode?: number;
18
+ usage?: JobTokenUsage;
9
19
  }
@@ -1048,6 +1048,7 @@ export class DockerExecutor {
1048
1048
 
1049
1049
  const worktreeRelPath = relative(this.options.repo, worktreePath).replace(/\\/g, "/");
1050
1050
  const containerWorktreePath = `/repo/${worktreeRelPath}`;
1051
+ await this.waitForWorktreePathInWarmContainer(containerWorktreePath);
1051
1052
 
1052
1053
  const args: string[] = [
1053
1054
  "exec",
@@ -1124,6 +1125,26 @@ export class DockerExecutor {
1124
1125
  return result;
1125
1126
  }
1126
1127
 
1128
+ private async waitForWorktreePathInWarmContainer(
1129
+ containerWorktreePath: string,
1130
+ timeoutMs = 5_000,
1131
+ ): Promise<void> {
1132
+ const deadline = Date.now() + timeoutMs;
1133
+ let lastDetail = "";
1134
+ const command = `test -d ${shellSingleQuote(containerWorktreePath)}`;
1135
+ while (Date.now() < deadline) {
1136
+ const result = await this.runWarmShell(command);
1137
+ if (result.ok) return;
1138
+ lastDetail = [result.stdout, result.stderr].filter(Boolean).join("\n").trim();
1139
+ await this.sleep(100);
1140
+ }
1141
+ throw new Error(
1142
+ `worktree path not visible inside warm container after ${timeoutMs}ms: ${containerWorktreePath}${
1143
+ lastDetail ? ` (${lastDetail})` : ""
1144
+ }`,
1145
+ );
1146
+ }
1147
+
1127
1148
  private normalizeProvider(raw: string): string {
1128
1149
  const value = raw.trim().toLowerCase();
1129
1150
  if (!value) return "auto";
@@ -1450,6 +1471,9 @@ export class DockerExecutor {
1450
1471
  /\btemporary failure\b/i,
1451
1472
  /\bopenhands wrapper timed out\b/i,
1452
1473
  /\bjob timed out in docker executor\b/i,
1474
+ /\bworktree path not visible inside warm container\b/i,
1475
+ /\bchdir to cwd\b/i,
1476
+ /\bunable to start container process\b/i,
1453
1477
  ];
1454
1478
  return transientPatterns.some((pattern) => pattern.test(text));
1455
1479
  }