@pushpalsdev/cli 1.0.21 → 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +7 -2
- package/package.json +1 -1
- package/runtime/configs/default.toml +2 -1
- package/runtime/configs/local.example.toml +7 -7
- package/runtime/sandbox/apps/workerpals/src/backends/miniswe/miniswe_executor.py +2 -5
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +155 -11
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +84 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openhands/openhands_executor.py +2 -2
- package/runtime/sandbox/apps/workerpals/src/backends/openhands/test_openhands_runtime_paths.py +57 -0
- package/runtime/sandbox/apps/workerpals/src/backends/openhands_task_execute.ts +94 -1
- package/runtime/sandbox/apps/workerpals/src/backends/shared/executor_base.py +19 -12
- package/runtime/sandbox/apps/workerpals/src/common/generic_python_executor.ts +115 -15
- package/runtime/sandbox/apps/workerpals/src/common/types.ts +10 -0
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +18 -3
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +102 -0
- package/runtime/sandbox/configs/default.toml +2 -1
- package/runtime/sandbox/configs/local.example.toml +7 -7
- package/runtime/sandbox/packages/shared/src/config.ts +12 -0
package/dist/pushpals-cli.js
CHANGED
|
@@ -369,6 +369,8 @@ function loadPushPalsConfig(options = {}) {
|
|
|
369
369
|
const debugHttp = parseBoolEnv("PUSHPALS_DEBUG_HTTP") ?? asBoolean(serverNode.debug_http, false);
|
|
370
370
|
const staleClaimTtlMs = Math.max(5000, asInt(parseIntEnv("PUSHPALS_STALE_CLAIM_TTL_MS") ?? serverNode.stale_claim_ttl_ms, 120000));
|
|
371
371
|
const staleClaimSweepIntervalMs = Math.max(1000, asInt(parseIntEnv("PUSHPALS_STALE_CLAIM_SWEEP_INTERVAL_MS") ?? serverNode.stale_claim_sweep_interval_ms, 5000));
|
|
372
|
+
const sessionTokenBudget = Math.max(0, asInt(parseIntEnv("PUSHPALS_SESSION_TOKEN_BUDGET") ?? serverNode.session_token_budget, 1e6));
|
|
373
|
+
const sessionTokenBudgetAction = "pause";
|
|
372
374
|
const globalStatusHeartbeatMs = parseIntEnv("PUSHPALS_STATUS_HEARTBEAT_MS");
|
|
373
375
|
const localNode = getObject(merged, "localbuddy");
|
|
374
376
|
const localEnabled = parseBoolEnv("LOCALBUDDY_ENABLED") ?? asBoolean(localNode.enabled, false);
|
|
@@ -619,7 +621,9 @@ function loadPushPalsConfig(options = {}) {
|
|
|
619
621
|
port: serverPort,
|
|
620
622
|
debugHttp,
|
|
621
623
|
staleClaimTtlMs,
|
|
622
|
-
staleClaimSweepIntervalMs
|
|
624
|
+
staleClaimSweepIntervalMs,
|
|
625
|
+
sessionTokenBudget,
|
|
626
|
+
sessionTokenBudgetAction
|
|
623
627
|
},
|
|
624
628
|
localbuddy: {
|
|
625
629
|
enabled: localEnabled,
|
|
@@ -1684,7 +1688,7 @@ async function ensureBundledMonitoringHubRoot() {
|
|
|
1684
1688
|
return resolveBundledMonitoringHubRoot();
|
|
1685
1689
|
}
|
|
1686
1690
|
function repoLooksLikePushPalsSourceCheckout(repoRoot) {
|
|
1687
|
-
return existsSync4(join2(repoRoot, "configs", "default.toml"))
|
|
1691
|
+
return existsSync4(join2(repoRoot, "configs", "default.toml"));
|
|
1688
1692
|
}
|
|
1689
1693
|
function parseSemverFromPackageVersion(value) {
|
|
1690
1694
|
const raw = String(value ?? "").trim();
|
|
@@ -4171,6 +4175,7 @@ export {
|
|
|
4171
4175
|
resolveCliLocalBuddyAutostart,
|
|
4172
4176
|
resolveBundledRuntimeAssetSource,
|
|
4173
4177
|
resolveBundledMonitoringHubRoot,
|
|
4178
|
+
repoLooksLikePushPalsSourceCheckout,
|
|
4174
4179
|
prepareEmbeddedWorkerpalDockerImageIfNeeded,
|
|
4175
4180
|
prepareCliRuntime,
|
|
4176
4181
|
precheckWorkerpalDockerAvailability,
|
package/package.json
CHANGED
|
@@ -21,6 +21,7 @@ port = 3001
|
|
|
21
21
|
debug_http = false
|
|
22
22
|
stale_claim_ttl_ms = 120000
|
|
23
23
|
stale_claim_sweep_interval_ms = 5000
|
|
24
|
+
session_token_budget = 2000000
|
|
24
25
|
|
|
25
26
|
[localbuddy]
|
|
26
27
|
enabled = false
|
|
@@ -197,7 +198,7 @@ session_id = "workerpals-dev"
|
|
|
197
198
|
[workerpals.openai_codex]
|
|
198
199
|
timeout_ms = 7200000
|
|
199
200
|
progress_log_interval_s = 30
|
|
200
|
-
reasoning_effort = "
|
|
201
|
+
reasoning_effort = "xhigh"
|
|
201
202
|
approval_policy = "never"
|
|
202
203
|
sandbox = "workspace-write"
|
|
203
204
|
color = "never"
|
|
@@ -8,19 +8,19 @@
|
|
|
8
8
|
|
|
9
9
|
[localbuddy.llm]
|
|
10
10
|
backend = "openai_codex"
|
|
11
|
-
model = "gpt-5
|
|
11
|
+
model = "gpt-5.4"
|
|
12
12
|
codex_auth_mode = "chatgpt"
|
|
13
13
|
codex_bin = "bun x --yes @openai/codex"
|
|
14
14
|
codex_timeout_ms = 120000
|
|
15
|
-
reasoning_effort = "
|
|
15
|
+
reasoning_effort = "xhigh"
|
|
16
16
|
|
|
17
17
|
[remotebuddy.llm]
|
|
18
18
|
backend = "openai_codex"
|
|
19
|
-
model = "gpt-5
|
|
19
|
+
model = "gpt-5.4"
|
|
20
20
|
codex_auth_mode = "chatgpt"
|
|
21
21
|
codex_bin = "bun x --yes @openai/codex"
|
|
22
22
|
codex_timeout_ms = 120000
|
|
23
|
-
reasoning_effort = "
|
|
23
|
+
reasoning_effort = "xhigh"
|
|
24
24
|
|
|
25
25
|
[remotebuddy]
|
|
26
26
|
max_workerpals = 10
|
|
@@ -42,11 +42,11 @@ retention_days = 30
|
|
|
42
42
|
|
|
43
43
|
[workerpals.llm]
|
|
44
44
|
backend = "openai_codex"
|
|
45
|
-
model = "gpt-5
|
|
45
|
+
model = "gpt-5.4"
|
|
46
46
|
codex_auth_mode = "chatgpt"
|
|
47
47
|
codex_bin = "bun x --yes @openai/codex"
|
|
48
48
|
codex_timeout_ms = 120000
|
|
49
|
-
reasoning_effort = "
|
|
49
|
+
reasoning_effort = "xhigh"
|
|
50
50
|
|
|
51
51
|
[workerpals]
|
|
52
52
|
executor = "openai_codex"
|
|
@@ -92,7 +92,7 @@ bin = "bun x --yes @openai/codex"
|
|
|
92
92
|
timeout_ms = 7200000
|
|
93
93
|
progress_log_interval_s = 30
|
|
94
94
|
# timeout_s = 120 # optional; if set, overrides timeout_ms
|
|
95
|
-
reasoning_effort = "
|
|
95
|
+
reasoning_effort = "xhigh"
|
|
96
96
|
approval_policy = "never"
|
|
97
97
|
sandbox = "workspace-write"
|
|
98
98
|
color = "never"
|
|
@@ -57,6 +57,7 @@ from executor_base import (
|
|
|
57
57
|
log_git_status,
|
|
58
58
|
looks_local_base_url,
|
|
59
59
|
parse_task_execute_payload,
|
|
60
|
+
prompts_root_for_runtime_assets,
|
|
60
61
|
resolve_llm_config,
|
|
61
62
|
setting_int,
|
|
62
63
|
setting_str,
|
|
@@ -992,11 +993,7 @@ def _is_git_porcelain_status_command(cmd: str) -> bool:
|
|
|
992
993
|
|
|
993
994
|
|
|
994
995
|
def _repo_root_for_prompt_loading() -> Path:
|
|
995
|
-
|
|
996
|
-
for parent in current.parents:
|
|
997
|
-
if (parent / "prompts").is_dir():
|
|
998
|
-
return parent
|
|
999
|
-
return current.parents[5]
|
|
996
|
+
return prompts_root_for_runtime_assets()
|
|
1000
997
|
|
|
1001
998
|
|
|
1002
999
|
def _resolve_prompt_file(relative_path: str) -> Path:
|
|
@@ -35,6 +35,7 @@ from executor_base import (
|
|
|
35
35
|
log_git_status,
|
|
36
36
|
looks_local_base_url,
|
|
37
37
|
parse_task_execute_payload,
|
|
38
|
+
prompts_root_for_runtime_assets,
|
|
38
39
|
resolve_llm_config,
|
|
39
40
|
summarize_git_changes,
|
|
40
41
|
to_int,
|
|
@@ -42,7 +43,7 @@ from executor_base import (
|
|
|
42
43
|
)
|
|
43
44
|
|
|
44
45
|
LOG_PREFIX = "[OpenAICodexExecutor]"
|
|
45
|
-
DEFAULT_CODEX_MODEL = "gpt-5
|
|
46
|
+
DEFAULT_CODEX_MODEL = "gpt-5.4"
|
|
46
47
|
_ACTIVE_CHILD: Optional[subprocess.Popen[str]] = None
|
|
47
48
|
_INTERRUPTED_SIGNAL: Optional[int] = None
|
|
48
49
|
log = Logger(LOG_PREFIX)
|
|
@@ -79,7 +80,7 @@ _VALID_APPROVAL_POLICIES = {"untrusted", "on-failure", "on-request", "never"}
|
|
|
79
80
|
_VALID_SANDBOX_POLICIES = {"read-only", "workspace-write", "danger-full-access"}
|
|
80
81
|
_VALID_COLORS = {"always", "never", "auto"}
|
|
81
82
|
_VALID_AUTH_MODES = {"auto", "api_key", "chatgpt"}
|
|
82
|
-
_VALID_REASONING_EFFORTS = {"low", "medium", "high"}
|
|
83
|
+
_VALID_REASONING_EFFORTS = {"low", "medium", "high", "xhigh"}
|
|
83
84
|
|
|
84
85
|
|
|
85
86
|
@dataclass(frozen=True)
|
|
@@ -151,7 +152,7 @@ class OpenAICodexRuntimeConfig:
|
|
|
151
152
|
reasoning_effort=cfg.get_str(
|
|
152
153
|
env_names=("WORKERPALS_LLM_REASONING_EFFORT", "WORKERPALS_OPENAI_CODEX_REASONING_EFFORT"),
|
|
153
154
|
config_paths=("workerpals.llm.reasoning_effort", "workerpals.openai_codex.reasoning_effort"),
|
|
154
|
-
default="
|
|
155
|
+
default="xhigh",
|
|
155
156
|
),
|
|
156
157
|
approval_policy=cfg.get_str(
|
|
157
158
|
env_names=("WORKERPALS_OPENAI_CODEX_APPROVAL_POLICY",),
|
|
@@ -188,12 +189,7 @@ def _truncate(text: str, max_chars: int = 4000) -> str:
|
|
|
188
189
|
|
|
189
190
|
|
|
190
191
|
def _repo_root_for_prompt_loading() -> Path:
|
|
191
|
-
|
|
192
|
-
for parent in current.parents:
|
|
193
|
-
if (parent / "prompts").is_dir():
|
|
194
|
-
return parent
|
|
195
|
-
# Fallback to historical layout depth if prompts/ cannot be discovered.
|
|
196
|
-
return current.parents[5]
|
|
192
|
+
return prompts_root_for_runtime_assets()
|
|
197
193
|
|
|
198
194
|
|
|
199
195
|
def _resolve_prompt_file(relative_path: str) -> Path:
|
|
@@ -323,13 +319,15 @@ def _resolve_communicate_timeout_seconds(config: OpenAICodexRuntimeConfig) -> Op
|
|
|
323
319
|
def _resolve_reasoning_effort(config: OpenAICodexRuntimeConfig) -> str:
|
|
324
320
|
raw = config.reasoning_effort
|
|
325
321
|
normalized = str(raw).strip().lower()
|
|
322
|
+
if normalized in {"extra high", "extra-high", "extrahigh", "x-high"}:
|
|
323
|
+
normalized = "xhigh"
|
|
326
324
|
if normalized in _VALID_REASONING_EFFORTS:
|
|
327
325
|
return normalized
|
|
328
326
|
log.info(
|
|
329
327
|
"Invalid workerpals.openai_codex.reasoning_effort="
|
|
330
|
-
f"{raw!r}; using default '
|
|
328
|
+
f"{raw!r}; using default 'xhigh'. Allowed: low, medium, high, xhigh."
|
|
331
329
|
)
|
|
332
|
-
return "
|
|
330
|
+
return "xhigh"
|
|
333
331
|
|
|
334
332
|
|
|
335
333
|
def _resolve_progress_log_interval_seconds(config: OpenAICodexRuntimeConfig) -> int:
|
|
@@ -419,6 +417,88 @@ def _contains_reasoning_marker(value: str) -> bool:
|
|
|
419
417
|
return "reasoning" in lowered or "thinking" in lowered
|
|
420
418
|
|
|
421
419
|
|
|
420
|
+
def _coerce_non_negative_int(value: Any) -> Optional[int]:
|
|
421
|
+
try:
|
|
422
|
+
parsed = int(value)
|
|
423
|
+
except Exception:
|
|
424
|
+
return None
|
|
425
|
+
if parsed < 0:
|
|
426
|
+
return None
|
|
427
|
+
return parsed
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def _normalize_usage_counts(
|
|
431
|
+
prompt_tokens: Optional[int],
|
|
432
|
+
completion_tokens: Optional[int],
|
|
433
|
+
total_tokens: Optional[int],
|
|
434
|
+
) -> Optional[Dict[str, int]]:
|
|
435
|
+
if prompt_tokens is None and completion_tokens is None and total_tokens is None:
|
|
436
|
+
return None
|
|
437
|
+
prompt = prompt_tokens if prompt_tokens is not None else 0
|
|
438
|
+
completion = completion_tokens if completion_tokens is not None else 0
|
|
439
|
+
total = total_tokens if total_tokens is not None else prompt + completion
|
|
440
|
+
if prompt_tokens is None and total_tokens is not None and completion_tokens is not None:
|
|
441
|
+
prompt = max(0, total - completion)
|
|
442
|
+
if completion_tokens is None and total_tokens is not None and prompt_tokens is not None:
|
|
443
|
+
completion = max(0, total - prompt)
|
|
444
|
+
total = max(total, prompt + completion)
|
|
445
|
+
if total <= 0:
|
|
446
|
+
return None
|
|
447
|
+
return {
|
|
448
|
+
"prompt_tokens": int(prompt),
|
|
449
|
+
"completion_tokens": int(completion),
|
|
450
|
+
"total_tokens": int(total),
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def _extract_usage_counts(value: Any) -> Optional[Dict[str, int]]:
|
|
455
|
+
best: Optional[Dict[str, int]] = None
|
|
456
|
+
stack: List[Any] = [value]
|
|
457
|
+
visited = 0
|
|
458
|
+
max_nodes = 256
|
|
459
|
+
|
|
460
|
+
while stack and visited < max_nodes:
|
|
461
|
+
current = stack.pop()
|
|
462
|
+
visited += 1
|
|
463
|
+
if isinstance(current, list):
|
|
464
|
+
for item in reversed(current[:80]):
|
|
465
|
+
if isinstance(item, (dict, list)):
|
|
466
|
+
stack.append(item)
|
|
467
|
+
continue
|
|
468
|
+
if not isinstance(current, dict):
|
|
469
|
+
continue
|
|
470
|
+
|
|
471
|
+
prompt_tokens = _coerce_non_negative_int(
|
|
472
|
+
current.get("prompt_tokens")
|
|
473
|
+
or current.get("promptTokens")
|
|
474
|
+
or current.get("input_tokens")
|
|
475
|
+
or current.get("inputTokens")
|
|
476
|
+
)
|
|
477
|
+
completion_tokens = _coerce_non_negative_int(
|
|
478
|
+
current.get("completion_tokens")
|
|
479
|
+
or current.get("completionTokens")
|
|
480
|
+
or current.get("output_tokens")
|
|
481
|
+
or current.get("outputTokens")
|
|
482
|
+
)
|
|
483
|
+
total_tokens = _coerce_non_negative_int(
|
|
484
|
+
current.get("total_tokens") or current.get("totalTokens")
|
|
485
|
+
)
|
|
486
|
+
normalized = _normalize_usage_counts(prompt_tokens, completion_tokens, total_tokens)
|
|
487
|
+
if normalized is not None:
|
|
488
|
+
if best is None or normalized["total_tokens"] > best["total_tokens"]:
|
|
489
|
+
best = normalized
|
|
490
|
+
|
|
491
|
+
usage_node = current.get("usage")
|
|
492
|
+
if isinstance(usage_node, (dict, list)):
|
|
493
|
+
stack.append(usage_node)
|
|
494
|
+
|
|
495
|
+
for nested in current.values():
|
|
496
|
+
if isinstance(nested, (dict, list)):
|
|
497
|
+
stack.append(nested)
|
|
498
|
+
|
|
499
|
+
return best
|
|
500
|
+
|
|
501
|
+
|
|
422
502
|
def _event_contains_reasoning(value: Any) -> bool:
|
|
423
503
|
max_nodes = 256
|
|
424
504
|
visited = 0
|
|
@@ -581,6 +661,9 @@ def _empty_codex_trace() -> Dict[str, Any]:
|
|
|
581
661
|
"raw_logged": 0,
|
|
582
662
|
"raw_omitted": 0,
|
|
583
663
|
"reasoning_events": 0,
|
|
664
|
+
"prompt_tokens": 0,
|
|
665
|
+
"completion_tokens": 0,
|
|
666
|
+
"total_tokens": 0,
|
|
584
667
|
}
|
|
585
668
|
|
|
586
669
|
|
|
@@ -611,6 +694,17 @@ def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, A
|
|
|
611
694
|
return
|
|
612
695
|
|
|
613
696
|
if isinstance(parsed, dict):
|
|
697
|
+
usage = _extract_usage_counts(parsed)
|
|
698
|
+
if usage is not None:
|
|
699
|
+
trace["prompt_tokens"] = max(
|
|
700
|
+
to_int(trace.get("prompt_tokens"), 0), usage["prompt_tokens"]
|
|
701
|
+
)
|
|
702
|
+
trace["completion_tokens"] = max(
|
|
703
|
+
to_int(trace.get("completion_tokens"), 0), usage["completion_tokens"]
|
|
704
|
+
)
|
|
705
|
+
trace["total_tokens"] = max(
|
|
706
|
+
to_int(trace.get("total_tokens"), 0), usage["total_tokens"]
|
|
707
|
+
)
|
|
614
708
|
event_type = (
|
|
615
709
|
str(parsed.get("type") or parsed.get("event") or parsed.get("kind") or "event")
|
|
616
710
|
.strip()
|
|
@@ -677,10 +771,17 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
|
|
|
677
771
|
if raw_omitted > 0:
|
|
678
772
|
log.info(f"[codex/raw] ... {raw_omitted} additional line(s) omitted.")
|
|
679
773
|
reasoning_events = to_int(trace.get("reasoning_events"), 0)
|
|
774
|
+
prompt_tokens = to_int(trace.get("prompt_tokens"), 0)
|
|
775
|
+
completion_tokens = to_int(trace.get("completion_tokens"), 0)
|
|
776
|
+
total_tokens = to_int(trace.get("total_tokens"), 0)
|
|
680
777
|
if reasoning_events > 0:
|
|
681
778
|
log.info(f"[codex] Reasoning-like event(s): {reasoning_events}")
|
|
682
779
|
elif use_json and valid_json > 0:
|
|
683
780
|
log.info("[codex] No reasoning-like events observed in this run.")
|
|
781
|
+
if total_tokens > 0:
|
|
782
|
+
log.info(
|
|
783
|
+
f"[codex] Usage observed: prompt={prompt_tokens} completion={completion_tokens} total={total_tokens}"
|
|
784
|
+
)
|
|
684
785
|
|
|
685
786
|
if not summaries and event_type_counts:
|
|
686
787
|
ranked = sorted(event_type_counts.items(), key=lambda item: item[1], reverse=True)
|
|
@@ -694,9 +795,41 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
|
|
|
694
795
|
"summaries": summaries,
|
|
695
796
|
"event_type_counts": event_type_counts,
|
|
696
797
|
"reasoning_events": reasoning_events,
|
|
798
|
+
"prompt_tokens": prompt_tokens,
|
|
799
|
+
"completion_tokens": completion_tokens,
|
|
800
|
+
"total_tokens": total_tokens,
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
def _estimated_usage(prompt: str, output_text: str, *, model: str) -> Dict[str, Any]:
|
|
805
|
+
prompt_tokens = max(0, int(len(str(prompt or "")) / 3 + 0.999999))
|
|
806
|
+
completion_tokens = max(0, int(len(str(output_text or "")) / 3 + 0.999999))
|
|
807
|
+
return {
|
|
808
|
+
"promptTokens": prompt_tokens,
|
|
809
|
+
"completionTokens": completion_tokens,
|
|
810
|
+
"totalTokens": prompt_tokens + completion_tokens,
|
|
811
|
+
"estimated": True,
|
|
812
|
+
"backend": "openai_codex",
|
|
813
|
+
"modelId": model,
|
|
697
814
|
}
|
|
698
815
|
|
|
699
816
|
|
|
817
|
+
def _usage_from_trace_or_estimate(trace: Dict[str, Any], prompt: str, output_text: str, *, model: str) -> Dict[str, Any]:
|
|
818
|
+
total_tokens = to_int(trace.get("total_tokens"), 0)
|
|
819
|
+
if total_tokens > 0:
|
|
820
|
+
prompt_tokens = to_int(trace.get("prompt_tokens"), 0)
|
|
821
|
+
completion_tokens = to_int(trace.get("completion_tokens"), 0)
|
|
822
|
+
return {
|
|
823
|
+
"promptTokens": prompt_tokens,
|
|
824
|
+
"completionTokens": completion_tokens,
|
|
825
|
+
"totalTokens": max(total_tokens, prompt_tokens + completion_tokens),
|
|
826
|
+
"estimated": False,
|
|
827
|
+
"backend": "openai_codex",
|
|
828
|
+
"modelId": model,
|
|
829
|
+
}
|
|
830
|
+
return _estimated_usage(prompt, output_text, model=model)
|
|
831
|
+
|
|
832
|
+
|
|
700
833
|
def _log_stderr(stderr: str) -> None:
|
|
701
834
|
lines = [line.strip() for line in stderr.splitlines() if line.strip()]
|
|
702
835
|
if not lines:
|
|
@@ -1125,6 +1258,10 @@ def _run_codex_task(
|
|
|
1125
1258
|
stdout_trace = _finalize_codex_stdout_trace(stdout_trace_state, use_json)
|
|
1126
1259
|
trace_excerpt = _format_codex_trace_excerpt(stdout_trace)
|
|
1127
1260
|
_log_stderr(stderr)
|
|
1261
|
+
usage_output_text = "\n\n".join(
|
|
1262
|
+
part for part in (stdout, stderr, trace_excerpt) if str(part or "").strip()
|
|
1263
|
+
)
|
|
1264
|
+
usage = _usage_from_trace_or_estimate(stdout_trace, prompt, usage_output_text, model=model)
|
|
1128
1265
|
|
|
1129
1266
|
if timed_out:
|
|
1130
1267
|
detail = (
|
|
@@ -1140,6 +1277,7 @@ def _run_codex_task(
|
|
|
1140
1277
|
"stdout": _truncate(stdout),
|
|
1141
1278
|
"stderr": _truncate(f"{detail}\n{stderr}".strip()),
|
|
1142
1279
|
"exitCode": 124,
|
|
1280
|
+
"usage": usage,
|
|
1143
1281
|
}
|
|
1144
1282
|
|
|
1145
1283
|
last_message = _read_text_if_exists(last_message_path)
|
|
@@ -1152,6 +1290,7 @@ def _run_codex_task(
|
|
|
1152
1290
|
"stdout": _truncate(stdout),
|
|
1153
1291
|
"stderr": _truncate(stderr),
|
|
1154
1292
|
"exitCode": 128 + int(_INTERRUPTED_SIGNAL),
|
|
1293
|
+
"usage": usage,
|
|
1155
1294
|
}
|
|
1156
1295
|
|
|
1157
1296
|
if return_code is None:
|
|
@@ -1161,6 +1300,7 @@ def _run_codex_task(
|
|
|
1161
1300
|
"stdout": _truncate(stdout),
|
|
1162
1301
|
"stderr": _truncate(stderr),
|
|
1163
1302
|
"exitCode": 1,
|
|
1303
|
+
"usage": usage,
|
|
1164
1304
|
}
|
|
1165
1305
|
|
|
1166
1306
|
exit_code = int(return_code)
|
|
@@ -1177,6 +1317,7 @@ def _run_codex_task(
|
|
|
1177
1317
|
"stdout": _truncate(stdout),
|
|
1178
1318
|
"stderr": _truncate(detail),
|
|
1179
1319
|
"exitCode": exit_code,
|
|
1320
|
+
"usage": usage,
|
|
1180
1321
|
}
|
|
1181
1322
|
|
|
1182
1323
|
policy_signal = _detect_codex_workaround_signal(last_message)
|
|
@@ -1199,6 +1340,7 @@ def _run_codex_task(
|
|
|
1199
1340
|
"stdout": _truncate(stdout),
|
|
1200
1341
|
"stderr": _truncate(detail),
|
|
1201
1342
|
"exitCode": 5,
|
|
1343
|
+
"usage": usage,
|
|
1202
1344
|
}
|
|
1203
1345
|
|
|
1204
1346
|
changed_paths = summarize_git_changes(repo)
|
|
@@ -1220,6 +1362,7 @@ def _run_codex_task(
|
|
|
1220
1362
|
"stdout": "\n\n".join(stdout_parts),
|
|
1221
1363
|
"stderr": "",
|
|
1222
1364
|
"exitCode": 0,
|
|
1365
|
+
"usage": usage,
|
|
1223
1366
|
}
|
|
1224
1367
|
|
|
1225
1368
|
if not stdout_parts:
|
|
@@ -1230,6 +1373,7 @@ def _run_codex_task(
|
|
|
1230
1373
|
"stdout": "\n\n".join(stdout_parts),
|
|
1231
1374
|
"stderr": "",
|
|
1232
1375
|
"exitCode": 0,
|
|
1376
|
+
"usage": usage,
|
|
1233
1377
|
}
|
|
1234
1378
|
|
|
1235
1379
|
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
import os
|
|
1
2
|
import sys
|
|
2
3
|
import unittest
|
|
4
|
+
import tempfile
|
|
3
5
|
from pathlib import Path
|
|
4
6
|
|
|
5
7
|
_HERE = Path(__file__).resolve().parent
|
|
@@ -8,13 +10,16 @@ for path in (_HERE, _SHARED):
|
|
|
8
10
|
if str(path) not in sys.path:
|
|
9
11
|
sys.path.insert(0, str(path))
|
|
10
12
|
|
|
11
|
-
from executor_base import SettingsResolver
|
|
13
|
+
from executor_base import SettingsResolver, config_dir_for_runtime_config, runtime_config
|
|
12
14
|
from openai_codex_executor import (
|
|
13
15
|
OpenAICodexRuntimeConfig,
|
|
16
|
+
_resolve_reasoning_effort,
|
|
14
17
|
_build_instruction,
|
|
15
18
|
_detect_codex_workaround_signal,
|
|
19
|
+
_extract_usage_counts,
|
|
16
20
|
_load_prompt_template,
|
|
17
21
|
_repo_root_for_prompt_loading,
|
|
22
|
+
_usage_from_trace_or_estimate,
|
|
18
23
|
)
|
|
19
24
|
|
|
20
25
|
|
|
@@ -55,8 +60,60 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
55
60
|
self.assertEqual(cfg.approval_policy, "never")
|
|
56
61
|
self.assertEqual(cfg.sandbox, "workspace-write")
|
|
57
62
|
self.assertEqual(cfg.color, "never")
|
|
63
|
+
self.assertEqual(cfg.reasoning_effort, "xhigh")
|
|
58
64
|
self.assertFalse(cfg.json_output)
|
|
59
65
|
|
|
66
|
+
def test_reasoning_effort_accepts_extra_high_alias(self) -> None:
|
|
67
|
+
cfg = OpenAICodexRuntimeConfig.from_sources(
|
|
68
|
+
SettingsResolver(
|
|
69
|
+
env={"WORKERPALS_OPENAI_CODEX_REASONING_EFFORT": "extra high"},
|
|
70
|
+
config_loader=lambda: {},
|
|
71
|
+
),
|
|
72
|
+
)
|
|
73
|
+
self.assertEqual(_resolve_reasoning_effort(cfg), "xhigh")
|
|
74
|
+
|
|
75
|
+
def test_runtime_config_prefers_explicit_config_dir_override(self) -> None:
|
|
76
|
+
import executor_base
|
|
77
|
+
|
|
78
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-openai-codex-config-") as root:
|
|
79
|
+
repo_root = Path(root) / "repo"
|
|
80
|
+
runtime_config_dir = Path(root) / "runtime" / "configs"
|
|
81
|
+
repo_config_dir = repo_root / "configs"
|
|
82
|
+
runtime_config_dir.mkdir(parents=True, exist_ok=True)
|
|
83
|
+
repo_config_dir.mkdir(parents=True, exist_ok=True)
|
|
84
|
+
|
|
85
|
+
(runtime_config_dir / "default.toml").write_text(
|
|
86
|
+
'profile = "dev"\n[workerpals.openai_codex]\njson = true\n',
|
|
87
|
+
encoding="utf-8",
|
|
88
|
+
)
|
|
89
|
+
(repo_config_dir / "default.toml").write_text(
|
|
90
|
+
'profile = "dev"\n[workerpals.openai_codex]\njson = false\n',
|
|
91
|
+
encoding="utf-8",
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
previous_env = {
|
|
95
|
+
"PUSHPALS_REPO_PATH": os.environ.get("PUSHPALS_REPO_PATH"),
|
|
96
|
+
"PUSHPALS_CONFIG_DIR_OVERRIDE": os.environ.get("PUSHPALS_CONFIG_DIR_OVERRIDE"),
|
|
97
|
+
"PUSHPALS_PROFILE": os.environ.get("PUSHPALS_PROFILE"),
|
|
98
|
+
}
|
|
99
|
+
previous_cache = executor_base._CONFIG_CACHE
|
|
100
|
+
try:
|
|
101
|
+
os.environ["PUSHPALS_REPO_PATH"] = str(repo_root)
|
|
102
|
+
os.environ["PUSHPALS_CONFIG_DIR_OVERRIDE"] = str(runtime_config_dir)
|
|
103
|
+
os.environ["PUSHPALS_PROFILE"] = "dev"
|
|
104
|
+
executor_base._CONFIG_CACHE = None
|
|
105
|
+
|
|
106
|
+
self.assertEqual(config_dir_for_runtime_config(), runtime_config_dir)
|
|
107
|
+
cfg = runtime_config()
|
|
108
|
+
self.assertTrue(cfg["workerpals"]["openai_codex"]["json"])
|
|
109
|
+
finally:
|
|
110
|
+
executor_base._CONFIG_CACHE = previous_cache
|
|
111
|
+
for key, value in previous_env.items():
|
|
112
|
+
if value is None:
|
|
113
|
+
os.environ.pop(key, None)
|
|
114
|
+
else:
|
|
115
|
+
os.environ[key] = value
|
|
116
|
+
|
|
60
117
|
def test_build_instruction_includes_codex_runtime_invariants(self) -> None:
|
|
61
118
|
prompt = _build_instruction("Add two tests for localbuddy", [])
|
|
62
119
|
self.assertIn("Codex CLI is required infrastructure", prompt)
|
|
@@ -105,6 +162,32 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
105
162
|
template = _load_prompt_template("workerpals/openai_codex_task_execute_system_prompt.md")
|
|
106
163
|
self.assertIn("Codex CLI is required infrastructure", template)
|
|
107
164
|
|
|
165
|
+
def test_extracts_usage_counts_from_nested_json_event(self) -> None:
|
|
166
|
+
usage = _extract_usage_counts(
|
|
167
|
+
{
|
|
168
|
+
"type": "response.completed",
|
|
169
|
+
"response": {
|
|
170
|
+
"usage": {
|
|
171
|
+
"input_tokens": 120,
|
|
172
|
+
"output_tokens": 30,
|
|
173
|
+
"total_tokens": 150,
|
|
174
|
+
}
|
|
175
|
+
},
|
|
176
|
+
}
|
|
177
|
+
)
|
|
178
|
+
self.assertEqual(
|
|
179
|
+
usage,
|
|
180
|
+
{"prompt_tokens": 120, "completion_tokens": 30, "total_tokens": 150},
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
def test_usage_falls_back_to_estimate_when_trace_has_no_usage(self) -> None:
|
|
184
|
+
usage = _usage_from_trace_or_estimate({}, "abc" * 30, "done", model="gpt-5.4")
|
|
185
|
+
self.assertTrue(usage["estimated"])
|
|
186
|
+
self.assertEqual(usage["backend"], "openai_codex")
|
|
187
|
+
self.assertEqual(usage["modelId"], "gpt-5.4")
|
|
188
|
+
self.assertGreater(usage["promptTokens"], 0)
|
|
189
|
+
self.assertGreater(usage["totalTokens"], usage["completionTokens"])
|
|
190
|
+
|
|
108
191
|
|
|
109
192
|
if __name__ == "__main__":
|
|
110
193
|
unittest.main()
|
|
@@ -28,7 +28,7 @@ from executor_base import (
|
|
|
28
28
|
log_git_status,
|
|
29
29
|
looks_local_base_url,
|
|
30
30
|
parse_task_execute_payload,
|
|
31
|
-
|
|
31
|
+
prompts_root_for_runtime_assets,
|
|
32
32
|
resolve_llm_config,
|
|
33
33
|
setting_int,
|
|
34
34
|
setting_str,
|
|
@@ -78,7 +78,7 @@ def _session_hint_headers(session_user: str) -> Dict[str, str]:
|
|
|
78
78
|
|
|
79
79
|
|
|
80
80
|
def _repo_root_for_prompt_loading() -> Path:
|
|
81
|
-
return
|
|
81
|
+
return prompts_root_for_runtime_assets()
|
|
82
82
|
|
|
83
83
|
|
|
84
84
|
def _resolve_prompt_file(relative_path: str) -> Path:
|
package/runtime/sandbox/apps/workerpals/src/backends/openhands/test_openhands_runtime_paths.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import tempfile
|
|
4
|
+
import unittest
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
_HERE = Path(__file__).resolve().parent
|
|
8
|
+
_SHARED = _HERE.parent / "shared"
|
|
9
|
+
for path in (_HERE, _SHARED):
|
|
10
|
+
if str(path) not in sys.path:
|
|
11
|
+
sys.path.insert(0, str(path))
|
|
12
|
+
|
|
13
|
+
from openhands_executor import _PROMPT_TEMPLATE_CACHE, _load_prompt_template, _resolve_prompt_file
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class OpenHandsRuntimePathTests(unittest.TestCase):
|
|
17
|
+
def test_prompt_resolution_prefers_explicit_prompt_root_override(self) -> None:
|
|
18
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-openhands-prompts-") as root:
|
|
19
|
+
repo_root = Path(root) / "repo"
|
|
20
|
+
runtime_root = Path(root) / "runtime"
|
|
21
|
+
repo_prompt = repo_root / "prompts" / "workerpals" / "openhands_strict_tool_use_message.md"
|
|
22
|
+
runtime_prompt = (
|
|
23
|
+
runtime_root / "prompts" / "workerpals" / "openhands_strict_tool_use_message.md"
|
|
24
|
+
)
|
|
25
|
+
repo_prompt.parent.mkdir(parents=True, exist_ok=True)
|
|
26
|
+
runtime_prompt.parent.mkdir(parents=True, exist_ok=True)
|
|
27
|
+
repo_prompt.write_text("repo prompt", encoding="utf-8")
|
|
28
|
+
runtime_prompt.write_text("runtime prompt", encoding="utf-8")
|
|
29
|
+
|
|
30
|
+
previous_env = {
|
|
31
|
+
"PUSHPALS_REPO_PATH": os.environ.get("PUSHPALS_REPO_PATH"),
|
|
32
|
+
"PUSHPALS_PROMPTS_ROOT_OVERRIDE": os.environ.get("PUSHPALS_PROMPTS_ROOT_OVERRIDE"),
|
|
33
|
+
}
|
|
34
|
+
previous_cache = dict(_PROMPT_TEMPLATE_CACHE)
|
|
35
|
+
try:
|
|
36
|
+
os.environ["PUSHPALS_REPO_PATH"] = str(repo_root)
|
|
37
|
+
os.environ["PUSHPALS_PROMPTS_ROOT_OVERRIDE"] = str(runtime_root)
|
|
38
|
+
_PROMPT_TEMPLATE_CACHE.clear()
|
|
39
|
+
|
|
40
|
+
resolved = _resolve_prompt_file("workerpals/openhands_strict_tool_use_message.md")
|
|
41
|
+
self.assertEqual(resolved, runtime_prompt)
|
|
42
|
+
self.assertEqual(
|
|
43
|
+
_load_prompt_template("workerpals/openhands_strict_tool_use_message.md"),
|
|
44
|
+
"runtime prompt",
|
|
45
|
+
)
|
|
46
|
+
finally:
|
|
47
|
+
_PROMPT_TEMPLATE_CACHE.clear()
|
|
48
|
+
_PROMPT_TEMPLATE_CACHE.update(previous_cache)
|
|
49
|
+
for key, value in previous_env.items():
|
|
50
|
+
if value is None:
|
|
51
|
+
os.environ.pop(key, None)
|
|
52
|
+
else:
|
|
53
|
+
os.environ[key] = value
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
if __name__ == "__main__":
|
|
57
|
+
unittest.main()
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
import { existsSync } from "fs";
|
|
10
10
|
import { resolve } from "path";
|
|
11
|
-
import type { JobResult } from "../common/types.js";
|
|
11
|
+
import type { JobResult, JobTokenUsage } from "../common/types.js";
|
|
12
12
|
import type { WorkerpalsRuntimeConfig } from "../common/executor_backend.js";
|
|
13
13
|
import {
|
|
14
14
|
truncate,
|
|
@@ -24,6 +24,83 @@ const OPENHANDS_SCRIPT_PATH = resolve(import.meta.dir, "openhands", "openhands_e
|
|
|
24
24
|
|
|
25
25
|
// ---- OpenHands-specific helpers ----------------------------------------------
|
|
26
26
|
|
|
27
|
+
function estimateTokensFromText(text: string): number {
|
|
28
|
+
return Math.max(0, Math.ceil(String(text ?? "").length / 3));
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function estimateJobTokenUsage(
|
|
32
|
+
runtimeConfig: WorkerpalsRuntimeConfig,
|
|
33
|
+
params: Record<string, unknown>,
|
|
34
|
+
summary: string,
|
|
35
|
+
stdout: string,
|
|
36
|
+
stderr: string,
|
|
37
|
+
): JobTokenUsage {
|
|
38
|
+
const promptSource = (() => {
|
|
39
|
+
try {
|
|
40
|
+
return JSON.stringify(params);
|
|
41
|
+
} catch {
|
|
42
|
+
return String(params?.instruction ?? params?.prompt ?? "");
|
|
43
|
+
}
|
|
44
|
+
})();
|
|
45
|
+
const completionSource = [summary, stdout, stderr].filter(Boolean).join("\n\n");
|
|
46
|
+
const promptTokens = estimateTokensFromText(promptSource);
|
|
47
|
+
const completionTokens = estimateTokensFromText(completionSource);
|
|
48
|
+
return {
|
|
49
|
+
promptTokens,
|
|
50
|
+
completionTokens,
|
|
51
|
+
totalTokens: promptTokens + completionTokens,
|
|
52
|
+
estimated: true,
|
|
53
|
+
backend: "openhands",
|
|
54
|
+
modelId: runtimeConfig.workerpals.llm.model.trim(),
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function coerceJobTokenUsage(
|
|
59
|
+
value: unknown,
|
|
60
|
+
fallback: JobTokenUsage,
|
|
61
|
+
): JobTokenUsage {
|
|
62
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
63
|
+
return fallback;
|
|
64
|
+
}
|
|
65
|
+
const raw = value as Record<string, unknown>;
|
|
66
|
+
const promptTokens = Number(raw.promptTokens ?? raw.prompt_tokens);
|
|
67
|
+
const completionTokens = Number(raw.completionTokens ?? raw.completion_tokens);
|
|
68
|
+
const totalTokens = Number(raw.totalTokens ?? raw.total_tokens);
|
|
69
|
+
const hasPrompt = Number.isFinite(promptTokens) && promptTokens >= 0;
|
|
70
|
+
const hasCompletion = Number.isFinite(completionTokens) && completionTokens >= 0;
|
|
71
|
+
const hasTotal = Number.isFinite(totalTokens) && totalTokens >= 0;
|
|
72
|
+
if (!hasPrompt && !hasCompletion && !hasTotal) {
|
|
73
|
+
return fallback;
|
|
74
|
+
}
|
|
75
|
+
const normalizedPrompt = hasPrompt
|
|
76
|
+
? Math.round(promptTokens)
|
|
77
|
+
: hasTotal
|
|
78
|
+
? Math.max(0, Math.round(totalTokens) - fallback.completionTokens)
|
|
79
|
+
: fallback.promptTokens;
|
|
80
|
+
const normalizedCompletion = hasCompletion
|
|
81
|
+
? Math.round(completionTokens)
|
|
82
|
+
: hasTotal
|
|
83
|
+
? Math.max(0, Math.round(totalTokens) - normalizedPrompt)
|
|
84
|
+
: fallback.completionTokens;
|
|
85
|
+
const normalizedTotal = hasTotal
|
|
86
|
+
? Math.round(totalTokens)
|
|
87
|
+
: normalizedPrompt + normalizedCompletion;
|
|
88
|
+
return {
|
|
89
|
+
promptTokens: normalizedPrompt,
|
|
90
|
+
completionTokens: normalizedCompletion,
|
|
91
|
+
totalTokens: normalizedTotal,
|
|
92
|
+
estimated: typeof raw.estimated === "boolean" ? raw.estimated : false,
|
|
93
|
+
backend:
|
|
94
|
+
typeof raw.backend === "string" && raw.backend.trim().length > 0
|
|
95
|
+
? raw.backend.trim()
|
|
96
|
+
: fallback.backend,
|
|
97
|
+
modelId:
|
|
98
|
+
typeof raw.modelId === "string" && raw.modelId.trim().length > 0
|
|
99
|
+
? raw.modelId.trim()
|
|
100
|
+
: fallback.modelId,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
27
104
|
function classifyShellCommand(cmd: string): "explore" | "progress" {
|
|
28
105
|
const trimmed = cmd.trim().toLowerCase();
|
|
29
106
|
if (!trimmed) return "explore";
|
|
@@ -450,6 +527,7 @@ export async function executeWithOpenHands(
|
|
|
450
527
|
|
|
451
528
|
const parsed = parseStructuredResult(stdout, outputPolicy.executorResultPrefix);
|
|
452
529
|
const filteredStdout = filterResultLines(stdout, outputPolicy.executorResultPrefix);
|
|
530
|
+
const fallbackUsage = estimateJobTokenUsage(runtimeConfig, params, "", filteredStdout, stderr);
|
|
453
531
|
|
|
454
532
|
if (!parsed) {
|
|
455
533
|
if (timedOut) {
|
|
@@ -464,6 +542,7 @@ export async function executeWithOpenHands(
|
|
|
464
542
|
stdout: truncate(filteredStdout, outputPolicy),
|
|
465
543
|
stderr: truncate(stderr, outputPolicy),
|
|
466
544
|
exitCode: exitCode === 0 ? 124 : exitCode,
|
|
545
|
+
usage: fallbackUsage,
|
|
467
546
|
};
|
|
468
547
|
}
|
|
469
548
|
return {
|
|
@@ -472,6 +551,7 @@ export async function executeWithOpenHands(
|
|
|
472
551
|
stdout: truncate(filteredStdout, outputPolicy),
|
|
473
552
|
stderr: truncate(stderr, outputPolicy),
|
|
474
553
|
exitCode,
|
|
554
|
+
usage: fallbackUsage,
|
|
475
555
|
};
|
|
476
556
|
}
|
|
477
557
|
|
|
@@ -483,6 +563,10 @@ export async function executeWithOpenHands(
|
|
|
483
563
|
: `${kind} failed via OpenHands (exit ${exitCode})`;
|
|
484
564
|
const parsedStdout = typeof parsed.stdout === "string" ? parsed.stdout : filteredStdout;
|
|
485
565
|
const parsedStderr = typeof parsed.stderr === "string" ? parsed.stderr : stderr;
|
|
566
|
+
const usage = coerceJobTokenUsage(
|
|
567
|
+
parsed.usage,
|
|
568
|
+
estimateJobTokenUsage(runtimeConfig, params, summary, parsedStdout, parsedStderr),
|
|
569
|
+
);
|
|
486
570
|
const parsedExitCode =
|
|
487
571
|
typeof parsed.exitCode === "number" && Number.isFinite(parsed.exitCode)
|
|
488
572
|
? parsed.exitCode
|
|
@@ -502,6 +586,7 @@ export async function executeWithOpenHands(
|
|
|
502
586
|
stdout: truncate(filteredStdout || String(parsedStdout ?? ""), outputPolicy),
|
|
503
587
|
stderr: truncate(`Clarification needed: ${clarificationQuestion}`, outputPolicy),
|
|
504
588
|
exitCode: 0,
|
|
589
|
+
usage,
|
|
505
590
|
};
|
|
506
591
|
}
|
|
507
592
|
}
|
|
@@ -512,12 +597,20 @@ export async function executeWithOpenHands(
|
|
|
512
597
|
stdout: truncate(parsedStdout ?? "", outputPolicy),
|
|
513
598
|
stderr: truncate(parsedStderr ?? "", outputPolicy),
|
|
514
599
|
exitCode: parsedExitCode,
|
|
600
|
+
usage,
|
|
515
601
|
};
|
|
516
602
|
} catch (err) {
|
|
517
603
|
return {
|
|
518
604
|
ok: false,
|
|
519
605
|
summary: `OpenHands wrapper execution error for ${kind}: ${String(err)}`,
|
|
520
606
|
exitCode: 1,
|
|
607
|
+
usage: estimateJobTokenUsage(
|
|
608
|
+
runtimeConfig,
|
|
609
|
+
params,
|
|
610
|
+
`OpenHands wrapper execution error for ${kind}: ${String(err)}`,
|
|
611
|
+
"",
|
|
612
|
+
"",
|
|
613
|
+
),
|
|
521
614
|
};
|
|
522
615
|
} finally {
|
|
523
616
|
if (warningTimer) {
|
|
@@ -203,6 +203,24 @@ def repo_root_for_runtime_config() -> Path:
|
|
|
203
203
|
return Path(__file__).resolve().parents[3]
|
|
204
204
|
|
|
205
205
|
|
|
206
|
+
def config_dir_for_runtime_config() -> Path:
|
|
207
|
+
explicit = (os.environ.get("PUSHPALS_CONFIG_DIR_OVERRIDE") or "").strip()
|
|
208
|
+
if explicit:
|
|
209
|
+
return Path(explicit)
|
|
210
|
+
return repo_root_for_runtime_config() / "configs"
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def prompts_root_for_runtime_assets() -> Path:
|
|
214
|
+
explicit = (os.environ.get("PUSHPALS_PROMPTS_ROOT_OVERRIDE") or "").strip()
|
|
215
|
+
if explicit:
|
|
216
|
+
return Path(explicit)
|
|
217
|
+
current = Path(__file__).resolve()
|
|
218
|
+
for parent in current.parents:
|
|
219
|
+
if (parent / "prompts").is_dir():
|
|
220
|
+
return parent
|
|
221
|
+
return repo_root_for_runtime_config()
|
|
222
|
+
|
|
223
|
+
|
|
206
224
|
def _parse_toml_file(path: Path) -> Dict[str, Any]:
|
|
207
225
|
if not path.exists() or not tomllib:
|
|
208
226
|
return {}
|
|
@@ -217,12 +235,7 @@ def runtime_config() -> Dict[str, Any]:
|
|
|
217
235
|
global _CONFIG_CACHE
|
|
218
236
|
if _CONFIG_CACHE is not None:
|
|
219
237
|
return _CONFIG_CACHE
|
|
220
|
-
|
|
221
|
-
legacy_config_dir = repo_root / "config"
|
|
222
|
-
config_dir = repo_root / "configs"
|
|
223
|
-
if not (config_dir / "default.toml").exists():
|
|
224
|
-
if (legacy_config_dir / "default.toml").exists():
|
|
225
|
-
config_dir = legacy_config_dir
|
|
238
|
+
config_dir = config_dir_for_runtime_config()
|
|
226
239
|
default_cfg = _parse_toml_file(config_dir / "default.toml")
|
|
227
240
|
profile = (
|
|
228
241
|
(os.environ.get("PUSHPALS_PROFILE") or "").strip()
|
|
@@ -231,12 +244,6 @@ def runtime_config() -> Dict[str, Any]:
|
|
|
231
244
|
)
|
|
232
245
|
profile_cfg = _parse_toml_file(config_dir / f"{profile}.toml")
|
|
233
246
|
local_cfg = _parse_toml_file(config_dir / "local.toml")
|
|
234
|
-
if (
|
|
235
|
-
not local_cfg
|
|
236
|
-
and config_dir != legacy_config_dir
|
|
237
|
-
and (legacy_config_dir / "local.toml").exists()
|
|
238
|
-
):
|
|
239
|
-
local_cfg = _parse_toml_file(legacy_config_dir / "local.toml")
|
|
240
247
|
_CONFIG_CACHE = _deep_merge(_deep_merge(default_cfg, profile_cfg), local_cfg)
|
|
241
248
|
return _CONFIG_CACHE
|
|
242
249
|
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
import { existsSync } from "fs";
|
|
11
11
|
import { resolve } from "path";
|
|
12
|
-
import type { JobResult } from "./types.js";
|
|
12
|
+
import type { JobResult, JobTokenUsage } from "./types.js";
|
|
13
13
|
import type { WorkerpalsRuntimeConfig } from "./executor_backend.js";
|
|
14
14
|
import type { BackendTaskExecutor } from "../backends/types.js";
|
|
15
15
|
import {
|
|
@@ -26,6 +26,84 @@ interface GenericPythonExecutorConfig {
|
|
|
26
26
|
timeoutConfigKey: string;
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
+
function estimateTokensFromText(text: string): number {
|
|
30
|
+
return Math.max(0, Math.ceil(String(text ?? "").length / 3));
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function estimateJobTokenUsage(
|
|
34
|
+
backendName: string,
|
|
35
|
+
modelId: string,
|
|
36
|
+
params: Record<string, unknown>,
|
|
37
|
+
summary: string,
|
|
38
|
+
stdout: string,
|
|
39
|
+
stderr: string,
|
|
40
|
+
): JobTokenUsage {
|
|
41
|
+
const promptSource = (() => {
|
|
42
|
+
try {
|
|
43
|
+
return JSON.stringify(params);
|
|
44
|
+
} catch {
|
|
45
|
+
return String(params?.instruction ?? params?.prompt ?? "");
|
|
46
|
+
}
|
|
47
|
+
})();
|
|
48
|
+
const completionSource = [summary, stdout, stderr].filter(Boolean).join("\n\n");
|
|
49
|
+
const promptTokens = estimateTokensFromText(promptSource);
|
|
50
|
+
const completionTokens = estimateTokensFromText(completionSource);
|
|
51
|
+
return {
|
|
52
|
+
promptTokens,
|
|
53
|
+
completionTokens,
|
|
54
|
+
totalTokens: promptTokens + completionTokens,
|
|
55
|
+
estimated: true,
|
|
56
|
+
backend: backendName,
|
|
57
|
+
modelId,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function coerceJobTokenUsage(
|
|
62
|
+
value: unknown,
|
|
63
|
+
fallback: JobTokenUsage,
|
|
64
|
+
): JobTokenUsage {
|
|
65
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
66
|
+
return fallback;
|
|
67
|
+
}
|
|
68
|
+
const raw = value as Record<string, unknown>;
|
|
69
|
+
const promptTokens = Number(raw.promptTokens ?? raw.prompt_tokens);
|
|
70
|
+
const completionTokens = Number(raw.completionTokens ?? raw.completion_tokens);
|
|
71
|
+
const totalTokens = Number(raw.totalTokens ?? raw.total_tokens);
|
|
72
|
+
const hasPrompt = Number.isFinite(promptTokens) && promptTokens >= 0;
|
|
73
|
+
const hasCompletion = Number.isFinite(completionTokens) && completionTokens >= 0;
|
|
74
|
+
const hasTotal = Number.isFinite(totalTokens) && totalTokens >= 0;
|
|
75
|
+
if (!hasPrompt && !hasCompletion && !hasTotal) {
|
|
76
|
+
return fallback;
|
|
77
|
+
}
|
|
78
|
+
const normalizedPrompt = hasPrompt
|
|
79
|
+
? Math.round(promptTokens)
|
|
80
|
+
: hasTotal
|
|
81
|
+
? Math.max(0, Math.round(totalTokens) - fallback.completionTokens)
|
|
82
|
+
: fallback.promptTokens;
|
|
83
|
+
const normalizedCompletion = hasCompletion
|
|
84
|
+
? Math.round(completionTokens)
|
|
85
|
+
: hasTotal
|
|
86
|
+
? Math.max(0, Math.round(totalTokens) - normalizedPrompt)
|
|
87
|
+
: fallback.completionTokens;
|
|
88
|
+
const normalizedTotal = hasTotal
|
|
89
|
+
? Math.round(totalTokens)
|
|
90
|
+
: normalizedPrompt + normalizedCompletion;
|
|
91
|
+
return {
|
|
92
|
+
promptTokens: normalizedPrompt,
|
|
93
|
+
completionTokens: normalizedCompletion,
|
|
94
|
+
totalTokens: normalizedTotal,
|
|
95
|
+
estimated: typeof raw.estimated === "boolean" ? raw.estimated : false,
|
|
96
|
+
backend:
|
|
97
|
+
typeof raw.backend === "string" && raw.backend.trim().length > 0
|
|
98
|
+
? raw.backend.trim()
|
|
99
|
+
: fallback.backend,
|
|
100
|
+
modelId:
|
|
101
|
+
typeof raw.modelId === "string" && raw.modelId.trim().length > 0
|
|
102
|
+
? raw.modelId.trim()
|
|
103
|
+
: fallback.modelId,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
|
|
29
107
|
function resolveRuntimeSettings(
|
|
30
108
|
config: GenericPythonExecutorConfig,
|
|
31
109
|
runtimeConfig: WorkerpalsRuntimeConfig,
|
|
@@ -69,6 +147,7 @@ export function createGenericPythonExecutor(
|
|
|
69
147
|
config,
|
|
70
148
|
runtimeConfig,
|
|
71
149
|
);
|
|
150
|
+
const modelId = runtimeConfig.workerpals.llm.model.trim();
|
|
72
151
|
const executionBudgetMs =
|
|
73
152
|
typeof budgets?.executionBudgetMs === "number" && Number.isFinite(budgets.executionBudgetMs)
|
|
74
153
|
? Math.max(10_000, Math.floor(budgets.executionBudgetMs))
|
|
@@ -158,6 +237,14 @@ export function createGenericPythonExecutor(
|
|
|
158
237
|
|
|
159
238
|
const parsed = parseStructuredResult(stdout, outputPolicy.executorResultPrefix);
|
|
160
239
|
const filteredStdout = filterResultLines(stdout, outputPolicy.executorResultPrefix);
|
|
240
|
+
const fallbackUsage = estimateJobTokenUsage(
|
|
241
|
+
backendName,
|
|
242
|
+
modelId,
|
|
243
|
+
params,
|
|
244
|
+
"",
|
|
245
|
+
filteredStdout,
|
|
246
|
+
stderr,
|
|
247
|
+
);
|
|
161
248
|
|
|
162
249
|
if (!parsed) {
|
|
163
250
|
if (timedOut) {
|
|
@@ -167,6 +254,7 @@ export function createGenericPythonExecutor(
|
|
|
167
254
|
stdout: truncate(filteredStdout, outputPolicy),
|
|
168
255
|
stderr: truncate(stderr, outputPolicy),
|
|
169
256
|
exitCode: exitCode === 0 ? 124 : exitCode,
|
|
257
|
+
usage: fallbackUsage,
|
|
170
258
|
};
|
|
171
259
|
}
|
|
172
260
|
return {
|
|
@@ -175,35 +263,47 @@ export function createGenericPythonExecutor(
|
|
|
175
263
|
stdout: truncate(filteredStdout, outputPolicy),
|
|
176
264
|
stderr: truncate(stderr, outputPolicy),
|
|
177
265
|
exitCode,
|
|
266
|
+
usage: fallbackUsage,
|
|
178
267
|
};
|
|
179
268
|
}
|
|
180
269
|
|
|
270
|
+
const summary =
|
|
271
|
+
typeof parsed.summary === "string"
|
|
272
|
+
? parsed.summary
|
|
273
|
+
: exitCode === 0
|
|
274
|
+
? `${kind} passed via ${backendName}`
|
|
275
|
+
: `${kind} failed via ${backendName} (exit ${exitCode})`;
|
|
276
|
+
const parsedStdout = typeof parsed.stdout === "string" ? parsed.stdout : filteredStdout;
|
|
277
|
+
const parsedStderr = typeof parsed.stderr === "string" ? parsed.stderr : stderr;
|
|
278
|
+
const usage = coerceJobTokenUsage(
|
|
279
|
+
parsed.usage,
|
|
280
|
+
estimateJobTokenUsage(backendName, modelId, params, summary, parsedStdout, parsedStderr),
|
|
281
|
+
);
|
|
282
|
+
|
|
181
283
|
return {
|
|
182
284
|
ok: typeof parsed.ok === "boolean" ? parsed.ok : exitCode === 0,
|
|
183
|
-
summary
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
: exitCode === 0
|
|
187
|
-
? `${kind} passed via ${backendName}`
|
|
188
|
-
: `${kind} failed via ${backendName} (exit ${exitCode})`,
|
|
189
|
-
stdout: truncate(
|
|
190
|
-
typeof parsed.stdout === "string" ? parsed.stdout : filteredStdout,
|
|
191
|
-
outputPolicy,
|
|
192
|
-
),
|
|
193
|
-
stderr: truncate(
|
|
194
|
-
typeof parsed.stderr === "string" ? parsed.stderr : stderr,
|
|
195
|
-
outputPolicy,
|
|
196
|
-
),
|
|
285
|
+
summary,
|
|
286
|
+
stdout: truncate(parsedStdout, outputPolicy),
|
|
287
|
+
stderr: truncate(parsedStderr, outputPolicy),
|
|
197
288
|
exitCode:
|
|
198
289
|
typeof parsed.exitCode === "number" && Number.isFinite(parsed.exitCode)
|
|
199
290
|
? parsed.exitCode
|
|
200
291
|
: exitCode,
|
|
292
|
+
usage,
|
|
201
293
|
};
|
|
202
294
|
} catch (err) {
|
|
203
295
|
return {
|
|
204
296
|
ok: false,
|
|
205
297
|
summary: `${backendName} wrapper execution error for ${kind}: ${String(err)}`,
|
|
206
298
|
exitCode: 1,
|
|
299
|
+
usage: estimateJobTokenUsage(
|
|
300
|
+
backendName,
|
|
301
|
+
runtimeConfig.workerpals.llm.model.trim(),
|
|
302
|
+
params,
|
|
303
|
+
`${backendName} wrapper execution error for ${kind}: ${String(err)}`,
|
|
304
|
+
"",
|
|
305
|
+
"",
|
|
306
|
+
),
|
|
207
307
|
};
|
|
208
308
|
}
|
|
209
309
|
};
|
|
@@ -1,9 +1,19 @@
|
|
|
1
1
|
export type ExecutorBackend = string;
|
|
2
2
|
|
|
3
|
+
export interface JobTokenUsage {
|
|
4
|
+
promptTokens: number;
|
|
5
|
+
completionTokens: number;
|
|
6
|
+
totalTokens?: number;
|
|
7
|
+
estimated?: boolean;
|
|
8
|
+
backend?: string;
|
|
9
|
+
modelId?: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
3
12
|
export interface JobResult {
|
|
4
13
|
ok: boolean;
|
|
5
14
|
summary: string;
|
|
6
15
|
stdout?: string;
|
|
7
16
|
stderr?: string;
|
|
8
17
|
exitCode?: number;
|
|
18
|
+
usage?: JobTokenUsage;
|
|
9
19
|
}
|
|
@@ -2022,14 +2022,29 @@ export function shouldUseCodexCliForExecutor(executor: string): boolean {
|
|
|
2022
2022
|
return executor.trim().toLowerCase() === "openai_codex";
|
|
2023
2023
|
}
|
|
2024
2024
|
|
|
2025
|
-
function normalizeCodexReasoningEffort(
|
|
2025
|
+
function normalizeCodexReasoningEffort(
|
|
2026
|
+
value: unknown,
|
|
2027
|
+
): "low" | "medium" | "high" | "xhigh" {
|
|
2026
2028
|
const normalized = String(value ?? "")
|
|
2027
2029
|
.trim()
|
|
2028
2030
|
.toLowerCase();
|
|
2029
|
-
if (
|
|
2031
|
+
if (
|
|
2032
|
+
normalized === "low" ||
|
|
2033
|
+
normalized === "medium" ||
|
|
2034
|
+
normalized === "high" ||
|
|
2035
|
+
normalized === "xhigh"
|
|
2036
|
+
) {
|
|
2030
2037
|
return normalized;
|
|
2031
2038
|
}
|
|
2032
|
-
|
|
2039
|
+
if (
|
|
2040
|
+
normalized === "extra high" ||
|
|
2041
|
+
normalized === "extra-high" ||
|
|
2042
|
+
normalized === "extrahigh" ||
|
|
2043
|
+
normalized === "x-high"
|
|
2044
|
+
) {
|
|
2045
|
+
return "xhigh";
|
|
2046
|
+
}
|
|
2047
|
+
return "xhigh";
|
|
2033
2048
|
}
|
|
2034
2049
|
|
|
2035
2050
|
async function generateCommitMessageFromDiff(
|
|
@@ -91,6 +91,99 @@ function workerLlmConfig(runtimeConfig: ReturnType<typeof loadPushPalsConfig>):
|
|
|
91
91
|
};
|
|
92
92
|
}
|
|
93
93
|
|
|
94
|
+
function estimateTokensFromText(text: string): number {
|
|
95
|
+
return Math.max(0, Math.ceil(String(text ?? "").length / 3));
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function buildWorkerLlmUsageEvent(
|
|
99
|
+
job: {
|
|
100
|
+
kind: string;
|
|
101
|
+
sessionId?: string | null;
|
|
102
|
+
params?: Record<string, unknown> | null;
|
|
103
|
+
},
|
|
104
|
+
result: WorkerJobResult,
|
|
105
|
+
): Record<string, unknown> | null {
|
|
106
|
+
const sessionId = String(job.sessionId ?? CONFIG.sessionId ?? "").trim();
|
|
107
|
+
if (!sessionId) return null;
|
|
108
|
+
const llmConfig = workerLlmConfig(CONFIG);
|
|
109
|
+
const explicitUsage = result.usage;
|
|
110
|
+
if (
|
|
111
|
+
explicitUsage &&
|
|
112
|
+
Number.isFinite(explicitUsage.promptTokens) &&
|
|
113
|
+
explicitUsage.promptTokens >= 0 &&
|
|
114
|
+
Number.isFinite(explicitUsage.completionTokens) &&
|
|
115
|
+
explicitUsage.completionTokens >= 0
|
|
116
|
+
) {
|
|
117
|
+
const promptTokens = Math.round(explicitUsage.promptTokens);
|
|
118
|
+
const completionTokens = Math.round(explicitUsage.completionTokens);
|
|
119
|
+
const totalTokens =
|
|
120
|
+
Number.isFinite(explicitUsage.totalTokens) && (explicitUsage.totalTokens ?? 0) >= 0
|
|
121
|
+
? Math.round(explicitUsage.totalTokens ?? promptTokens + completionTokens)
|
|
122
|
+
: promptTokens + completionTokens;
|
|
123
|
+
return {
|
|
124
|
+
service: "workerpals",
|
|
125
|
+
sessionId,
|
|
126
|
+
backend: String(explicitUsage.backend ?? resolveExecutor(CONFIG)).trim() || resolveExecutor(CONFIG),
|
|
127
|
+
modelId: String(explicitUsage.modelId ?? llmConfig.model).trim() || llmConfig.model,
|
|
128
|
+
promptTokens,
|
|
129
|
+
completionTokens,
|
|
130
|
+
totalTokens,
|
|
131
|
+
estimated: explicitUsage.estimated === true,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const promptSource = (() => {
|
|
136
|
+
try {
|
|
137
|
+
return JSON.stringify({
|
|
138
|
+
kind: job.kind,
|
|
139
|
+
params: job.params ?? {},
|
|
140
|
+
});
|
|
141
|
+
} catch {
|
|
142
|
+
return `${job.kind}\n${String(job.params?.instruction ?? job.params?.prompt ?? "")}`.trim();
|
|
143
|
+
}
|
|
144
|
+
})();
|
|
145
|
+
const completionSource = [result.summary, result.stdout ?? "", result.stderr ?? ""]
|
|
146
|
+
.filter(Boolean)
|
|
147
|
+
.join("\n\n");
|
|
148
|
+
const promptTokens = estimateTokensFromText(promptSource);
|
|
149
|
+
const completionTokens = estimateTokensFromText(completionSource);
|
|
150
|
+
return {
|
|
151
|
+
service: "workerpals",
|
|
152
|
+
sessionId,
|
|
153
|
+
backend: resolveExecutor(CONFIG),
|
|
154
|
+
modelId: llmConfig.model,
|
|
155
|
+
promptTokens,
|
|
156
|
+
completionTokens,
|
|
157
|
+
totalTokens: promptTokens + completionTokens,
|
|
158
|
+
estimated: true,
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
async function reportWorkerLlmUsage(
|
|
163
|
+
server: string,
|
|
164
|
+
headers: Record<string, string>,
|
|
165
|
+
job: {
|
|
166
|
+
kind: string;
|
|
167
|
+
sessionId?: string | null;
|
|
168
|
+
params?: Record<string, unknown> | null;
|
|
169
|
+
},
|
|
170
|
+
result: WorkerJobResult,
|
|
171
|
+
): Promise<void> {
|
|
172
|
+
const payload = buildWorkerLlmUsageEvent(job, result);
|
|
173
|
+
if (!payload) return;
|
|
174
|
+
const response = await fetch(`${server}/telemetry/llm-usage`, {
|
|
175
|
+
method: "POST",
|
|
176
|
+
headers,
|
|
177
|
+
body: JSON.stringify(payload),
|
|
178
|
+
});
|
|
179
|
+
if (!response.ok) {
|
|
180
|
+
const detail = await response.text().catch(() => "");
|
|
181
|
+
throw new Error(
|
|
182
|
+
`usage telemetry rejected (${response.status})${detail ? `: ${detail.trim()}` : ""}`,
|
|
183
|
+
);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
94
187
|
function integrationBranchName(): string {
|
|
95
188
|
const configuredBaseRef = CONFIG.workerpals.baseRef.trim();
|
|
96
189
|
if (!configuredBaseRef) return "main_agents";
|
|
@@ -1047,6 +1140,15 @@ async function workerLoop(
|
|
|
1047
1140
|
const jobDurationMs = Math.max(0, Date.now() - jobStartedAtMs);
|
|
1048
1141
|
|
|
1049
1142
|
await logChain;
|
|
1143
|
+
try {
|
|
1144
|
+
await reportWorkerLlmUsage(opts.server, headers, jobData, result);
|
|
1145
|
+
} catch (err) {
|
|
1146
|
+
console.warn(
|
|
1147
|
+
`[WorkerPals] Failed to report LLM usage for job ${job.id}: ${
|
|
1148
|
+
err instanceof Error ? err.message : String(err)
|
|
1149
|
+
}`,
|
|
1150
|
+
);
|
|
1151
|
+
}
|
|
1050
1152
|
|
|
1051
1153
|
let completionCommit: CommitRef | null = null;
|
|
1052
1154
|
if (result.ok && shouldCommit(job.kind, CONFIG)) {
|
|
@@ -21,6 +21,7 @@ port = 3001
|
|
|
21
21
|
debug_http = false
|
|
22
22
|
stale_claim_ttl_ms = 120000
|
|
23
23
|
stale_claim_sweep_interval_ms = 5000
|
|
24
|
+
session_token_budget = 2000000
|
|
24
25
|
|
|
25
26
|
[localbuddy]
|
|
26
27
|
enabled = false
|
|
@@ -197,7 +198,7 @@ session_id = "workerpals-dev"
|
|
|
197
198
|
[workerpals.openai_codex]
|
|
198
199
|
timeout_ms = 7200000
|
|
199
200
|
progress_log_interval_s = 30
|
|
200
|
-
reasoning_effort = "
|
|
201
|
+
reasoning_effort = "xhigh"
|
|
201
202
|
approval_policy = "never"
|
|
202
203
|
sandbox = "workspace-write"
|
|
203
204
|
color = "never"
|
|
@@ -8,19 +8,19 @@
|
|
|
8
8
|
|
|
9
9
|
[localbuddy.llm]
|
|
10
10
|
backend = "openai_codex"
|
|
11
|
-
model = "gpt-5
|
|
11
|
+
model = "gpt-5.4"
|
|
12
12
|
codex_auth_mode = "chatgpt"
|
|
13
13
|
codex_bin = "bun x --yes @openai/codex"
|
|
14
14
|
codex_timeout_ms = 120000
|
|
15
|
-
reasoning_effort = "
|
|
15
|
+
reasoning_effort = "xhigh"
|
|
16
16
|
|
|
17
17
|
[remotebuddy.llm]
|
|
18
18
|
backend = "openai_codex"
|
|
19
|
-
model = "gpt-5
|
|
19
|
+
model = "gpt-5.4"
|
|
20
20
|
codex_auth_mode = "chatgpt"
|
|
21
21
|
codex_bin = "bun x --yes @openai/codex"
|
|
22
22
|
codex_timeout_ms = 120000
|
|
23
|
-
reasoning_effort = "
|
|
23
|
+
reasoning_effort = "xhigh"
|
|
24
24
|
|
|
25
25
|
[remotebuddy]
|
|
26
26
|
max_workerpals = 10
|
|
@@ -42,11 +42,11 @@ retention_days = 30
|
|
|
42
42
|
|
|
43
43
|
[workerpals.llm]
|
|
44
44
|
backend = "openai_codex"
|
|
45
|
-
model = "gpt-5
|
|
45
|
+
model = "gpt-5.4"
|
|
46
46
|
codex_auth_mode = "chatgpt"
|
|
47
47
|
codex_bin = "bun x --yes @openai/codex"
|
|
48
48
|
codex_timeout_ms = 120000
|
|
49
|
-
reasoning_effort = "
|
|
49
|
+
reasoning_effort = "xhigh"
|
|
50
50
|
|
|
51
51
|
[workerpals]
|
|
52
52
|
executor = "openai_codex"
|
|
@@ -92,7 +92,7 @@ bin = "bun x --yes @openai/codex"
|
|
|
92
92
|
timeout_ms = 7200000
|
|
93
93
|
progress_log_interval_s = 30
|
|
94
94
|
# timeout_s = 120 # optional; if set, overrides timeout_ms
|
|
95
|
-
reasoning_effort = "
|
|
95
|
+
reasoning_effort = "xhigh"
|
|
96
96
|
approval_policy = "never"
|
|
97
97
|
sandbox = "workspace-write"
|
|
98
98
|
color = "never"
|
|
@@ -82,6 +82,8 @@ export interface PushPalsConfig {
|
|
|
82
82
|
debugHttp: boolean;
|
|
83
83
|
staleClaimTtlMs: number;
|
|
84
84
|
staleClaimSweepIntervalMs: number;
|
|
85
|
+
sessionTokenBudget: number;
|
|
86
|
+
sessionTokenBudgetAction: "pause";
|
|
85
87
|
};
|
|
86
88
|
localbuddy: {
|
|
87
89
|
enabled: boolean;
|
|
@@ -682,6 +684,14 @@ export function loadPushPalsConfig(options: LoadOptions = {}): PushPalsConfig {
|
|
|
682
684
|
5_000,
|
|
683
685
|
),
|
|
684
686
|
);
|
|
687
|
+
const sessionTokenBudget = Math.max(
|
|
688
|
+
0,
|
|
689
|
+
asInt(
|
|
690
|
+
parseIntEnv("PUSHPALS_SESSION_TOKEN_BUDGET") ?? serverNode.session_token_budget,
|
|
691
|
+
1_000_000,
|
|
692
|
+
),
|
|
693
|
+
);
|
|
694
|
+
const sessionTokenBudgetAction: "pause" = "pause";
|
|
685
695
|
|
|
686
696
|
const globalStatusHeartbeatMs = parseIntEnv("PUSHPALS_STATUS_HEARTBEAT_MS");
|
|
687
697
|
|
|
@@ -1467,6 +1477,8 @@ export function loadPushPalsConfig(options: LoadOptions = {}): PushPalsConfig {
|
|
|
1467
1477
|
debugHttp,
|
|
1468
1478
|
staleClaimTtlMs,
|
|
1469
1479
|
staleClaimSweepIntervalMs,
|
|
1480
|
+
sessionTokenBudget,
|
|
1481
|
+
sessionTokenBudgetAction,
|
|
1470
1482
|
},
|
|
1471
1483
|
localbuddy: {
|
|
1472
1484
|
enabled: localEnabled,
|