@pushpalsdev/cli 1.0.22 → 1.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +103 -26
- package/package.json +1 -1
- package/runtime/configs/default.toml +1 -0
- package/runtime/configs/local.example.toml +3 -3
- package/runtime/sandbox/apps/workerpals/src/backends/miniswe/miniswe_executor.py +2 -5
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +172 -11
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +93 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openhands/openhands_executor.py +2 -2
- package/runtime/sandbox/apps/workerpals/src/backends/openhands/test_openhands_runtime_paths.py +57 -0
- package/runtime/sandbox/apps/workerpals/src/backends/openhands_task_execute.ts +94 -1
- package/runtime/sandbox/apps/workerpals/src/backends/shared/executor_base.py +19 -12
- package/runtime/sandbox/apps/workerpals/src/common/generic_python_executor.ts +115 -15
- package/runtime/sandbox/apps/workerpals/src/common/types.ts +10 -0
- package/runtime/sandbox/apps/workerpals/src/docker_executor.ts +24 -0
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +21 -3
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +102 -0
- package/runtime/sandbox/configs/default.toml +1 -0
- package/runtime/sandbox/configs/local.example.toml +3 -3
- package/runtime/sandbox/packages/shared/src/config.ts +12 -0
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
import os
|
|
1
2
|
import sys
|
|
2
3
|
import unittest
|
|
4
|
+
import tempfile
|
|
3
5
|
from pathlib import Path
|
|
4
6
|
|
|
5
7
|
_HERE = Path(__file__).resolve().parent
|
|
@@ -8,13 +10,16 @@ for path in (_HERE, _SHARED):
|
|
|
8
10
|
if str(path) not in sys.path:
|
|
9
11
|
sys.path.insert(0, str(path))
|
|
10
12
|
|
|
11
|
-
from executor_base import SettingsResolver
|
|
13
|
+
from executor_base import SettingsResolver, config_dir_for_runtime_config, runtime_config
|
|
12
14
|
from openai_codex_executor import (
|
|
13
15
|
OpenAICodexRuntimeConfig,
|
|
16
|
+
_resolve_reasoning_effort,
|
|
14
17
|
_build_instruction,
|
|
15
18
|
_detect_codex_workaround_signal,
|
|
19
|
+
_extract_usage_counts,
|
|
16
20
|
_load_prompt_template,
|
|
17
21
|
_repo_root_for_prompt_loading,
|
|
22
|
+
_usage_from_trace_or_estimate,
|
|
18
23
|
)
|
|
19
24
|
|
|
20
25
|
|
|
@@ -55,8 +60,69 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
55
60
|
self.assertEqual(cfg.approval_policy, "never")
|
|
56
61
|
self.assertEqual(cfg.sandbox, "workspace-write")
|
|
57
62
|
self.assertEqual(cfg.color, "never")
|
|
63
|
+
self.assertEqual(cfg.reasoning_effort, "high")
|
|
58
64
|
self.assertFalse(cfg.json_output)
|
|
59
65
|
|
|
66
|
+
def test_reasoning_effort_caps_extra_high_for_gpt_5_4(self) -> None:
|
|
67
|
+
cfg = OpenAICodexRuntimeConfig.from_sources(
|
|
68
|
+
SettingsResolver(
|
|
69
|
+
env={"WORKERPALS_OPENAI_CODEX_REASONING_EFFORT": "extra high"},
|
|
70
|
+
config_loader=lambda: {},
|
|
71
|
+
),
|
|
72
|
+
)
|
|
73
|
+
self.assertEqual(_resolve_reasoning_effort(cfg), "high")
|
|
74
|
+
|
|
75
|
+
def test_reasoning_effort_preserves_extra_high_for_future_models(self) -> None:
|
|
76
|
+
cfg = OpenAICodexRuntimeConfig.from_sources(
|
|
77
|
+
SettingsResolver(
|
|
78
|
+
env={"WORKERPALS_OPENAI_CODEX_REASONING_EFFORT": "extra high"},
|
|
79
|
+
config_loader=lambda: {},
|
|
80
|
+
),
|
|
81
|
+
)
|
|
82
|
+
self.assertEqual(_resolve_reasoning_effort(cfg, model="gpt-6-preview"), "xhigh")
|
|
83
|
+
|
|
84
|
+
def test_runtime_config_prefers_explicit_config_dir_override(self) -> None:
|
|
85
|
+
import executor_base
|
|
86
|
+
|
|
87
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-openai-codex-config-") as root:
|
|
88
|
+
repo_root = Path(root) / "repo"
|
|
89
|
+
runtime_config_dir = Path(root) / "runtime" / "configs"
|
|
90
|
+
repo_config_dir = repo_root / "configs"
|
|
91
|
+
runtime_config_dir.mkdir(parents=True, exist_ok=True)
|
|
92
|
+
repo_config_dir.mkdir(parents=True, exist_ok=True)
|
|
93
|
+
|
|
94
|
+
(runtime_config_dir / "default.toml").write_text(
|
|
95
|
+
'profile = "dev"\n[workerpals.openai_codex]\njson = true\n',
|
|
96
|
+
encoding="utf-8",
|
|
97
|
+
)
|
|
98
|
+
(repo_config_dir / "default.toml").write_text(
|
|
99
|
+
'profile = "dev"\n[workerpals.openai_codex]\njson = false\n',
|
|
100
|
+
encoding="utf-8",
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
previous_env = {
|
|
104
|
+
"PUSHPALS_REPO_PATH": os.environ.get("PUSHPALS_REPO_PATH"),
|
|
105
|
+
"PUSHPALS_CONFIG_DIR_OVERRIDE": os.environ.get("PUSHPALS_CONFIG_DIR_OVERRIDE"),
|
|
106
|
+
"PUSHPALS_PROFILE": os.environ.get("PUSHPALS_PROFILE"),
|
|
107
|
+
}
|
|
108
|
+
previous_cache = executor_base._CONFIG_CACHE
|
|
109
|
+
try:
|
|
110
|
+
os.environ["PUSHPALS_REPO_PATH"] = str(repo_root)
|
|
111
|
+
os.environ["PUSHPALS_CONFIG_DIR_OVERRIDE"] = str(runtime_config_dir)
|
|
112
|
+
os.environ["PUSHPALS_PROFILE"] = "dev"
|
|
113
|
+
executor_base._CONFIG_CACHE = None
|
|
114
|
+
|
|
115
|
+
self.assertEqual(config_dir_for_runtime_config(), runtime_config_dir)
|
|
116
|
+
cfg = runtime_config()
|
|
117
|
+
self.assertTrue(cfg["workerpals"]["openai_codex"]["json"])
|
|
118
|
+
finally:
|
|
119
|
+
executor_base._CONFIG_CACHE = previous_cache
|
|
120
|
+
for key, value in previous_env.items():
|
|
121
|
+
if value is None:
|
|
122
|
+
os.environ.pop(key, None)
|
|
123
|
+
else:
|
|
124
|
+
os.environ[key] = value
|
|
125
|
+
|
|
60
126
|
def test_build_instruction_includes_codex_runtime_invariants(self) -> None:
|
|
61
127
|
prompt = _build_instruction("Add two tests for localbuddy", [])
|
|
62
128
|
self.assertIn("Codex CLI is required infrastructure", prompt)
|
|
@@ -105,6 +171,32 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
105
171
|
template = _load_prompt_template("workerpals/openai_codex_task_execute_system_prompt.md")
|
|
106
172
|
self.assertIn("Codex CLI is required infrastructure", template)
|
|
107
173
|
|
|
174
|
+
def test_extracts_usage_counts_from_nested_json_event(self) -> None:
|
|
175
|
+
usage = _extract_usage_counts(
|
|
176
|
+
{
|
|
177
|
+
"type": "response.completed",
|
|
178
|
+
"response": {
|
|
179
|
+
"usage": {
|
|
180
|
+
"input_tokens": 120,
|
|
181
|
+
"output_tokens": 30,
|
|
182
|
+
"total_tokens": 150,
|
|
183
|
+
}
|
|
184
|
+
},
|
|
185
|
+
}
|
|
186
|
+
)
|
|
187
|
+
self.assertEqual(
|
|
188
|
+
usage,
|
|
189
|
+
{"prompt_tokens": 120, "completion_tokens": 30, "total_tokens": 150},
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
def test_usage_falls_back_to_estimate_when_trace_has_no_usage(self) -> None:
|
|
193
|
+
usage = _usage_from_trace_or_estimate({}, "abc" * 30, "done", model="gpt-5.4")
|
|
194
|
+
self.assertTrue(usage["estimated"])
|
|
195
|
+
self.assertEqual(usage["backend"], "openai_codex")
|
|
196
|
+
self.assertEqual(usage["modelId"], "gpt-5.4")
|
|
197
|
+
self.assertGreater(usage["promptTokens"], 0)
|
|
198
|
+
self.assertGreater(usage["totalTokens"], usage["completionTokens"])
|
|
199
|
+
|
|
108
200
|
|
|
109
201
|
if __name__ == "__main__":
|
|
110
202
|
unittest.main()
|
|
@@ -28,7 +28,7 @@ from executor_base import (
|
|
|
28
28
|
log_git_status,
|
|
29
29
|
looks_local_base_url,
|
|
30
30
|
parse_task_execute_payload,
|
|
31
|
-
|
|
31
|
+
prompts_root_for_runtime_assets,
|
|
32
32
|
resolve_llm_config,
|
|
33
33
|
setting_int,
|
|
34
34
|
setting_str,
|
|
@@ -78,7 +78,7 @@ def _session_hint_headers(session_user: str) -> Dict[str, str]:
|
|
|
78
78
|
|
|
79
79
|
|
|
80
80
|
def _repo_root_for_prompt_loading() -> Path:
|
|
81
|
-
return
|
|
81
|
+
return prompts_root_for_runtime_assets()
|
|
82
82
|
|
|
83
83
|
|
|
84
84
|
def _resolve_prompt_file(relative_path: str) -> Path:
|
package/runtime/sandbox/apps/workerpals/src/backends/openhands/test_openhands_runtime_paths.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import tempfile
|
|
4
|
+
import unittest
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
_HERE = Path(__file__).resolve().parent
|
|
8
|
+
_SHARED = _HERE.parent / "shared"
|
|
9
|
+
for path in (_HERE, _SHARED):
|
|
10
|
+
if str(path) not in sys.path:
|
|
11
|
+
sys.path.insert(0, str(path))
|
|
12
|
+
|
|
13
|
+
from openhands_executor import _PROMPT_TEMPLATE_CACHE, _load_prompt_template, _resolve_prompt_file
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class OpenHandsRuntimePathTests(unittest.TestCase):
|
|
17
|
+
def test_prompt_resolution_prefers_explicit_prompt_root_override(self) -> None:
|
|
18
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-openhands-prompts-") as root:
|
|
19
|
+
repo_root = Path(root) / "repo"
|
|
20
|
+
runtime_root = Path(root) / "runtime"
|
|
21
|
+
repo_prompt = repo_root / "prompts" / "workerpals" / "openhands_strict_tool_use_message.md"
|
|
22
|
+
runtime_prompt = (
|
|
23
|
+
runtime_root / "prompts" / "workerpals" / "openhands_strict_tool_use_message.md"
|
|
24
|
+
)
|
|
25
|
+
repo_prompt.parent.mkdir(parents=True, exist_ok=True)
|
|
26
|
+
runtime_prompt.parent.mkdir(parents=True, exist_ok=True)
|
|
27
|
+
repo_prompt.write_text("repo prompt", encoding="utf-8")
|
|
28
|
+
runtime_prompt.write_text("runtime prompt", encoding="utf-8")
|
|
29
|
+
|
|
30
|
+
previous_env = {
|
|
31
|
+
"PUSHPALS_REPO_PATH": os.environ.get("PUSHPALS_REPO_PATH"),
|
|
32
|
+
"PUSHPALS_PROMPTS_ROOT_OVERRIDE": os.environ.get("PUSHPALS_PROMPTS_ROOT_OVERRIDE"),
|
|
33
|
+
}
|
|
34
|
+
previous_cache = dict(_PROMPT_TEMPLATE_CACHE)
|
|
35
|
+
try:
|
|
36
|
+
os.environ["PUSHPALS_REPO_PATH"] = str(repo_root)
|
|
37
|
+
os.environ["PUSHPALS_PROMPTS_ROOT_OVERRIDE"] = str(runtime_root)
|
|
38
|
+
_PROMPT_TEMPLATE_CACHE.clear()
|
|
39
|
+
|
|
40
|
+
resolved = _resolve_prompt_file("workerpals/openhands_strict_tool_use_message.md")
|
|
41
|
+
self.assertEqual(resolved, runtime_prompt)
|
|
42
|
+
self.assertEqual(
|
|
43
|
+
_load_prompt_template("workerpals/openhands_strict_tool_use_message.md"),
|
|
44
|
+
"runtime prompt",
|
|
45
|
+
)
|
|
46
|
+
finally:
|
|
47
|
+
_PROMPT_TEMPLATE_CACHE.clear()
|
|
48
|
+
_PROMPT_TEMPLATE_CACHE.update(previous_cache)
|
|
49
|
+
for key, value in previous_env.items():
|
|
50
|
+
if value is None:
|
|
51
|
+
os.environ.pop(key, None)
|
|
52
|
+
else:
|
|
53
|
+
os.environ[key] = value
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
if __name__ == "__main__":
|
|
57
|
+
unittest.main()
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
import { existsSync } from "fs";
|
|
10
10
|
import { resolve } from "path";
|
|
11
|
-
import type { JobResult } from "../common/types.js";
|
|
11
|
+
import type { JobResult, JobTokenUsage } from "../common/types.js";
|
|
12
12
|
import type { WorkerpalsRuntimeConfig } from "../common/executor_backend.js";
|
|
13
13
|
import {
|
|
14
14
|
truncate,
|
|
@@ -24,6 +24,83 @@ const OPENHANDS_SCRIPT_PATH = resolve(import.meta.dir, "openhands", "openhands_e
|
|
|
24
24
|
|
|
25
25
|
// ---- OpenHands-specific helpers ----------------------------------------------
|
|
26
26
|
|
|
27
|
+
function estimateTokensFromText(text: string): number {
|
|
28
|
+
return Math.max(0, Math.ceil(String(text ?? "").length / 3));
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function estimateJobTokenUsage(
|
|
32
|
+
runtimeConfig: WorkerpalsRuntimeConfig,
|
|
33
|
+
params: Record<string, unknown>,
|
|
34
|
+
summary: string,
|
|
35
|
+
stdout: string,
|
|
36
|
+
stderr: string,
|
|
37
|
+
): JobTokenUsage {
|
|
38
|
+
const promptSource = (() => {
|
|
39
|
+
try {
|
|
40
|
+
return JSON.stringify(params);
|
|
41
|
+
} catch {
|
|
42
|
+
return String(params?.instruction ?? params?.prompt ?? "");
|
|
43
|
+
}
|
|
44
|
+
})();
|
|
45
|
+
const completionSource = [summary, stdout, stderr].filter(Boolean).join("\n\n");
|
|
46
|
+
const promptTokens = estimateTokensFromText(promptSource);
|
|
47
|
+
const completionTokens = estimateTokensFromText(completionSource);
|
|
48
|
+
return {
|
|
49
|
+
promptTokens,
|
|
50
|
+
completionTokens,
|
|
51
|
+
totalTokens: promptTokens + completionTokens,
|
|
52
|
+
estimated: true,
|
|
53
|
+
backend: "openhands",
|
|
54
|
+
modelId: runtimeConfig.workerpals.llm.model.trim(),
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function coerceJobTokenUsage(
|
|
59
|
+
value: unknown,
|
|
60
|
+
fallback: JobTokenUsage,
|
|
61
|
+
): JobTokenUsage {
|
|
62
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
63
|
+
return fallback;
|
|
64
|
+
}
|
|
65
|
+
const raw = value as Record<string, unknown>;
|
|
66
|
+
const promptTokens = Number(raw.promptTokens ?? raw.prompt_tokens);
|
|
67
|
+
const completionTokens = Number(raw.completionTokens ?? raw.completion_tokens);
|
|
68
|
+
const totalTokens = Number(raw.totalTokens ?? raw.total_tokens);
|
|
69
|
+
const hasPrompt = Number.isFinite(promptTokens) && promptTokens >= 0;
|
|
70
|
+
const hasCompletion = Number.isFinite(completionTokens) && completionTokens >= 0;
|
|
71
|
+
const hasTotal = Number.isFinite(totalTokens) && totalTokens >= 0;
|
|
72
|
+
if (!hasPrompt && !hasCompletion && !hasTotal) {
|
|
73
|
+
return fallback;
|
|
74
|
+
}
|
|
75
|
+
const normalizedPrompt = hasPrompt
|
|
76
|
+
? Math.round(promptTokens)
|
|
77
|
+
: hasTotal
|
|
78
|
+
? Math.max(0, Math.round(totalTokens) - fallback.completionTokens)
|
|
79
|
+
: fallback.promptTokens;
|
|
80
|
+
const normalizedCompletion = hasCompletion
|
|
81
|
+
? Math.round(completionTokens)
|
|
82
|
+
: hasTotal
|
|
83
|
+
? Math.max(0, Math.round(totalTokens) - normalizedPrompt)
|
|
84
|
+
: fallback.completionTokens;
|
|
85
|
+
const normalizedTotal = hasTotal
|
|
86
|
+
? Math.round(totalTokens)
|
|
87
|
+
: normalizedPrompt + normalizedCompletion;
|
|
88
|
+
return {
|
|
89
|
+
promptTokens: normalizedPrompt,
|
|
90
|
+
completionTokens: normalizedCompletion,
|
|
91
|
+
totalTokens: normalizedTotal,
|
|
92
|
+
estimated: typeof raw.estimated === "boolean" ? raw.estimated : false,
|
|
93
|
+
backend:
|
|
94
|
+
typeof raw.backend === "string" && raw.backend.trim().length > 0
|
|
95
|
+
? raw.backend.trim()
|
|
96
|
+
: fallback.backend,
|
|
97
|
+
modelId:
|
|
98
|
+
typeof raw.modelId === "string" && raw.modelId.trim().length > 0
|
|
99
|
+
? raw.modelId.trim()
|
|
100
|
+
: fallback.modelId,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
27
104
|
function classifyShellCommand(cmd: string): "explore" | "progress" {
|
|
28
105
|
const trimmed = cmd.trim().toLowerCase();
|
|
29
106
|
if (!trimmed) return "explore";
|
|
@@ -450,6 +527,7 @@ export async function executeWithOpenHands(
|
|
|
450
527
|
|
|
451
528
|
const parsed = parseStructuredResult(stdout, outputPolicy.executorResultPrefix);
|
|
452
529
|
const filteredStdout = filterResultLines(stdout, outputPolicy.executorResultPrefix);
|
|
530
|
+
const fallbackUsage = estimateJobTokenUsage(runtimeConfig, params, "", filteredStdout, stderr);
|
|
453
531
|
|
|
454
532
|
if (!parsed) {
|
|
455
533
|
if (timedOut) {
|
|
@@ -464,6 +542,7 @@ export async function executeWithOpenHands(
|
|
|
464
542
|
stdout: truncate(filteredStdout, outputPolicy),
|
|
465
543
|
stderr: truncate(stderr, outputPolicy),
|
|
466
544
|
exitCode: exitCode === 0 ? 124 : exitCode,
|
|
545
|
+
usage: fallbackUsage,
|
|
467
546
|
};
|
|
468
547
|
}
|
|
469
548
|
return {
|
|
@@ -472,6 +551,7 @@ export async function executeWithOpenHands(
|
|
|
472
551
|
stdout: truncate(filteredStdout, outputPolicy),
|
|
473
552
|
stderr: truncate(stderr, outputPolicy),
|
|
474
553
|
exitCode,
|
|
554
|
+
usage: fallbackUsage,
|
|
475
555
|
};
|
|
476
556
|
}
|
|
477
557
|
|
|
@@ -483,6 +563,10 @@ export async function executeWithOpenHands(
|
|
|
483
563
|
: `${kind} failed via OpenHands (exit ${exitCode})`;
|
|
484
564
|
const parsedStdout = typeof parsed.stdout === "string" ? parsed.stdout : filteredStdout;
|
|
485
565
|
const parsedStderr = typeof parsed.stderr === "string" ? parsed.stderr : stderr;
|
|
566
|
+
const usage = coerceJobTokenUsage(
|
|
567
|
+
parsed.usage,
|
|
568
|
+
estimateJobTokenUsage(runtimeConfig, params, summary, parsedStdout, parsedStderr),
|
|
569
|
+
);
|
|
486
570
|
const parsedExitCode =
|
|
487
571
|
typeof parsed.exitCode === "number" && Number.isFinite(parsed.exitCode)
|
|
488
572
|
? parsed.exitCode
|
|
@@ -502,6 +586,7 @@ export async function executeWithOpenHands(
|
|
|
502
586
|
stdout: truncate(filteredStdout || String(parsedStdout ?? ""), outputPolicy),
|
|
503
587
|
stderr: truncate(`Clarification needed: ${clarificationQuestion}`, outputPolicy),
|
|
504
588
|
exitCode: 0,
|
|
589
|
+
usage,
|
|
505
590
|
};
|
|
506
591
|
}
|
|
507
592
|
}
|
|
@@ -512,12 +597,20 @@ export async function executeWithOpenHands(
|
|
|
512
597
|
stdout: truncate(parsedStdout ?? "", outputPolicy),
|
|
513
598
|
stderr: truncate(parsedStderr ?? "", outputPolicy),
|
|
514
599
|
exitCode: parsedExitCode,
|
|
600
|
+
usage,
|
|
515
601
|
};
|
|
516
602
|
} catch (err) {
|
|
517
603
|
return {
|
|
518
604
|
ok: false,
|
|
519
605
|
summary: `OpenHands wrapper execution error for ${kind}: ${String(err)}`,
|
|
520
606
|
exitCode: 1,
|
|
607
|
+
usage: estimateJobTokenUsage(
|
|
608
|
+
runtimeConfig,
|
|
609
|
+
params,
|
|
610
|
+
`OpenHands wrapper execution error for ${kind}: ${String(err)}`,
|
|
611
|
+
"",
|
|
612
|
+
"",
|
|
613
|
+
),
|
|
521
614
|
};
|
|
522
615
|
} finally {
|
|
523
616
|
if (warningTimer) {
|
|
@@ -203,6 +203,24 @@ def repo_root_for_runtime_config() -> Path:
|
|
|
203
203
|
return Path(__file__).resolve().parents[3]
|
|
204
204
|
|
|
205
205
|
|
|
206
|
+
def config_dir_for_runtime_config() -> Path:
|
|
207
|
+
explicit = (os.environ.get("PUSHPALS_CONFIG_DIR_OVERRIDE") or "").strip()
|
|
208
|
+
if explicit:
|
|
209
|
+
return Path(explicit)
|
|
210
|
+
return repo_root_for_runtime_config() / "configs"
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def prompts_root_for_runtime_assets() -> Path:
|
|
214
|
+
explicit = (os.environ.get("PUSHPALS_PROMPTS_ROOT_OVERRIDE") or "").strip()
|
|
215
|
+
if explicit:
|
|
216
|
+
return Path(explicit)
|
|
217
|
+
current = Path(__file__).resolve()
|
|
218
|
+
for parent in current.parents:
|
|
219
|
+
if (parent / "prompts").is_dir():
|
|
220
|
+
return parent
|
|
221
|
+
return repo_root_for_runtime_config()
|
|
222
|
+
|
|
223
|
+
|
|
206
224
|
def _parse_toml_file(path: Path) -> Dict[str, Any]:
|
|
207
225
|
if not path.exists() or not tomllib:
|
|
208
226
|
return {}
|
|
@@ -217,12 +235,7 @@ def runtime_config() -> Dict[str, Any]:
|
|
|
217
235
|
global _CONFIG_CACHE
|
|
218
236
|
if _CONFIG_CACHE is not None:
|
|
219
237
|
return _CONFIG_CACHE
|
|
220
|
-
|
|
221
|
-
legacy_config_dir = repo_root / "config"
|
|
222
|
-
config_dir = repo_root / "configs"
|
|
223
|
-
if not (config_dir / "default.toml").exists():
|
|
224
|
-
if (legacy_config_dir / "default.toml").exists():
|
|
225
|
-
config_dir = legacy_config_dir
|
|
238
|
+
config_dir = config_dir_for_runtime_config()
|
|
226
239
|
default_cfg = _parse_toml_file(config_dir / "default.toml")
|
|
227
240
|
profile = (
|
|
228
241
|
(os.environ.get("PUSHPALS_PROFILE") or "").strip()
|
|
@@ -231,12 +244,6 @@ def runtime_config() -> Dict[str, Any]:
|
|
|
231
244
|
)
|
|
232
245
|
profile_cfg = _parse_toml_file(config_dir / f"{profile}.toml")
|
|
233
246
|
local_cfg = _parse_toml_file(config_dir / "local.toml")
|
|
234
|
-
if (
|
|
235
|
-
not local_cfg
|
|
236
|
-
and config_dir != legacy_config_dir
|
|
237
|
-
and (legacy_config_dir / "local.toml").exists()
|
|
238
|
-
):
|
|
239
|
-
local_cfg = _parse_toml_file(legacy_config_dir / "local.toml")
|
|
240
247
|
_CONFIG_CACHE = _deep_merge(_deep_merge(default_cfg, profile_cfg), local_cfg)
|
|
241
248
|
return _CONFIG_CACHE
|
|
242
249
|
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
import { existsSync } from "fs";
|
|
11
11
|
import { resolve } from "path";
|
|
12
|
-
import type { JobResult } from "./types.js";
|
|
12
|
+
import type { JobResult, JobTokenUsage } from "./types.js";
|
|
13
13
|
import type { WorkerpalsRuntimeConfig } from "./executor_backend.js";
|
|
14
14
|
import type { BackendTaskExecutor } from "../backends/types.js";
|
|
15
15
|
import {
|
|
@@ -26,6 +26,84 @@ interface GenericPythonExecutorConfig {
|
|
|
26
26
|
timeoutConfigKey: string;
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
+
function estimateTokensFromText(text: string): number {
|
|
30
|
+
return Math.max(0, Math.ceil(String(text ?? "").length / 3));
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function estimateJobTokenUsage(
|
|
34
|
+
backendName: string,
|
|
35
|
+
modelId: string,
|
|
36
|
+
params: Record<string, unknown>,
|
|
37
|
+
summary: string,
|
|
38
|
+
stdout: string,
|
|
39
|
+
stderr: string,
|
|
40
|
+
): JobTokenUsage {
|
|
41
|
+
const promptSource = (() => {
|
|
42
|
+
try {
|
|
43
|
+
return JSON.stringify(params);
|
|
44
|
+
} catch {
|
|
45
|
+
return String(params?.instruction ?? params?.prompt ?? "");
|
|
46
|
+
}
|
|
47
|
+
})();
|
|
48
|
+
const completionSource = [summary, stdout, stderr].filter(Boolean).join("\n\n");
|
|
49
|
+
const promptTokens = estimateTokensFromText(promptSource);
|
|
50
|
+
const completionTokens = estimateTokensFromText(completionSource);
|
|
51
|
+
return {
|
|
52
|
+
promptTokens,
|
|
53
|
+
completionTokens,
|
|
54
|
+
totalTokens: promptTokens + completionTokens,
|
|
55
|
+
estimated: true,
|
|
56
|
+
backend: backendName,
|
|
57
|
+
modelId,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function coerceJobTokenUsage(
|
|
62
|
+
value: unknown,
|
|
63
|
+
fallback: JobTokenUsage,
|
|
64
|
+
): JobTokenUsage {
|
|
65
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
66
|
+
return fallback;
|
|
67
|
+
}
|
|
68
|
+
const raw = value as Record<string, unknown>;
|
|
69
|
+
const promptTokens = Number(raw.promptTokens ?? raw.prompt_tokens);
|
|
70
|
+
const completionTokens = Number(raw.completionTokens ?? raw.completion_tokens);
|
|
71
|
+
const totalTokens = Number(raw.totalTokens ?? raw.total_tokens);
|
|
72
|
+
const hasPrompt = Number.isFinite(promptTokens) && promptTokens >= 0;
|
|
73
|
+
const hasCompletion = Number.isFinite(completionTokens) && completionTokens >= 0;
|
|
74
|
+
const hasTotal = Number.isFinite(totalTokens) && totalTokens >= 0;
|
|
75
|
+
if (!hasPrompt && !hasCompletion && !hasTotal) {
|
|
76
|
+
return fallback;
|
|
77
|
+
}
|
|
78
|
+
const normalizedPrompt = hasPrompt
|
|
79
|
+
? Math.round(promptTokens)
|
|
80
|
+
: hasTotal
|
|
81
|
+
? Math.max(0, Math.round(totalTokens) - fallback.completionTokens)
|
|
82
|
+
: fallback.promptTokens;
|
|
83
|
+
const normalizedCompletion = hasCompletion
|
|
84
|
+
? Math.round(completionTokens)
|
|
85
|
+
: hasTotal
|
|
86
|
+
? Math.max(0, Math.round(totalTokens) - normalizedPrompt)
|
|
87
|
+
: fallback.completionTokens;
|
|
88
|
+
const normalizedTotal = hasTotal
|
|
89
|
+
? Math.round(totalTokens)
|
|
90
|
+
: normalizedPrompt + normalizedCompletion;
|
|
91
|
+
return {
|
|
92
|
+
promptTokens: normalizedPrompt,
|
|
93
|
+
completionTokens: normalizedCompletion,
|
|
94
|
+
totalTokens: normalizedTotal,
|
|
95
|
+
estimated: typeof raw.estimated === "boolean" ? raw.estimated : false,
|
|
96
|
+
backend:
|
|
97
|
+
typeof raw.backend === "string" && raw.backend.trim().length > 0
|
|
98
|
+
? raw.backend.trim()
|
|
99
|
+
: fallback.backend,
|
|
100
|
+
modelId:
|
|
101
|
+
typeof raw.modelId === "string" && raw.modelId.trim().length > 0
|
|
102
|
+
? raw.modelId.trim()
|
|
103
|
+
: fallback.modelId,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
|
|
29
107
|
function resolveRuntimeSettings(
|
|
30
108
|
config: GenericPythonExecutorConfig,
|
|
31
109
|
runtimeConfig: WorkerpalsRuntimeConfig,
|
|
@@ -69,6 +147,7 @@ export function createGenericPythonExecutor(
|
|
|
69
147
|
config,
|
|
70
148
|
runtimeConfig,
|
|
71
149
|
);
|
|
150
|
+
const modelId = runtimeConfig.workerpals.llm.model.trim();
|
|
72
151
|
const executionBudgetMs =
|
|
73
152
|
typeof budgets?.executionBudgetMs === "number" && Number.isFinite(budgets.executionBudgetMs)
|
|
74
153
|
? Math.max(10_000, Math.floor(budgets.executionBudgetMs))
|
|
@@ -158,6 +237,14 @@ export function createGenericPythonExecutor(
|
|
|
158
237
|
|
|
159
238
|
const parsed = parseStructuredResult(stdout, outputPolicy.executorResultPrefix);
|
|
160
239
|
const filteredStdout = filterResultLines(stdout, outputPolicy.executorResultPrefix);
|
|
240
|
+
const fallbackUsage = estimateJobTokenUsage(
|
|
241
|
+
backendName,
|
|
242
|
+
modelId,
|
|
243
|
+
params,
|
|
244
|
+
"",
|
|
245
|
+
filteredStdout,
|
|
246
|
+
stderr,
|
|
247
|
+
);
|
|
161
248
|
|
|
162
249
|
if (!parsed) {
|
|
163
250
|
if (timedOut) {
|
|
@@ -167,6 +254,7 @@ export function createGenericPythonExecutor(
|
|
|
167
254
|
stdout: truncate(filteredStdout, outputPolicy),
|
|
168
255
|
stderr: truncate(stderr, outputPolicy),
|
|
169
256
|
exitCode: exitCode === 0 ? 124 : exitCode,
|
|
257
|
+
usage: fallbackUsage,
|
|
170
258
|
};
|
|
171
259
|
}
|
|
172
260
|
return {
|
|
@@ -175,35 +263,47 @@ export function createGenericPythonExecutor(
|
|
|
175
263
|
stdout: truncate(filteredStdout, outputPolicy),
|
|
176
264
|
stderr: truncate(stderr, outputPolicy),
|
|
177
265
|
exitCode,
|
|
266
|
+
usage: fallbackUsage,
|
|
178
267
|
};
|
|
179
268
|
}
|
|
180
269
|
|
|
270
|
+
const summary =
|
|
271
|
+
typeof parsed.summary === "string"
|
|
272
|
+
? parsed.summary
|
|
273
|
+
: exitCode === 0
|
|
274
|
+
? `${kind} passed via ${backendName}`
|
|
275
|
+
: `${kind} failed via ${backendName} (exit ${exitCode})`;
|
|
276
|
+
const parsedStdout = typeof parsed.stdout === "string" ? parsed.stdout : filteredStdout;
|
|
277
|
+
const parsedStderr = typeof parsed.stderr === "string" ? parsed.stderr : stderr;
|
|
278
|
+
const usage = coerceJobTokenUsage(
|
|
279
|
+
parsed.usage,
|
|
280
|
+
estimateJobTokenUsage(backendName, modelId, params, summary, parsedStdout, parsedStderr),
|
|
281
|
+
);
|
|
282
|
+
|
|
181
283
|
return {
|
|
182
284
|
ok: typeof parsed.ok === "boolean" ? parsed.ok : exitCode === 0,
|
|
183
|
-
summary
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
: exitCode === 0
|
|
187
|
-
? `${kind} passed via ${backendName}`
|
|
188
|
-
: `${kind} failed via ${backendName} (exit ${exitCode})`,
|
|
189
|
-
stdout: truncate(
|
|
190
|
-
typeof parsed.stdout === "string" ? parsed.stdout : filteredStdout,
|
|
191
|
-
outputPolicy,
|
|
192
|
-
),
|
|
193
|
-
stderr: truncate(
|
|
194
|
-
typeof parsed.stderr === "string" ? parsed.stderr : stderr,
|
|
195
|
-
outputPolicy,
|
|
196
|
-
),
|
|
285
|
+
summary,
|
|
286
|
+
stdout: truncate(parsedStdout, outputPolicy),
|
|
287
|
+
stderr: truncate(parsedStderr, outputPolicy),
|
|
197
288
|
exitCode:
|
|
198
289
|
typeof parsed.exitCode === "number" && Number.isFinite(parsed.exitCode)
|
|
199
290
|
? parsed.exitCode
|
|
200
291
|
: exitCode,
|
|
292
|
+
usage,
|
|
201
293
|
};
|
|
202
294
|
} catch (err) {
|
|
203
295
|
return {
|
|
204
296
|
ok: false,
|
|
205
297
|
summary: `${backendName} wrapper execution error for ${kind}: ${String(err)}`,
|
|
206
298
|
exitCode: 1,
|
|
299
|
+
usage: estimateJobTokenUsage(
|
|
300
|
+
backendName,
|
|
301
|
+
runtimeConfig.workerpals.llm.model.trim(),
|
|
302
|
+
params,
|
|
303
|
+
`${backendName} wrapper execution error for ${kind}: ${String(err)}`,
|
|
304
|
+
"",
|
|
305
|
+
"",
|
|
306
|
+
),
|
|
207
307
|
};
|
|
208
308
|
}
|
|
209
309
|
};
|
|
@@ -1,9 +1,19 @@
|
|
|
1
1
|
export type ExecutorBackend = string;
|
|
2
2
|
|
|
3
|
+
export interface JobTokenUsage {
|
|
4
|
+
promptTokens: number;
|
|
5
|
+
completionTokens: number;
|
|
6
|
+
totalTokens?: number;
|
|
7
|
+
estimated?: boolean;
|
|
8
|
+
backend?: string;
|
|
9
|
+
modelId?: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
3
12
|
export interface JobResult {
|
|
4
13
|
ok: boolean;
|
|
5
14
|
summary: string;
|
|
6
15
|
stdout?: string;
|
|
7
16
|
stderr?: string;
|
|
8
17
|
exitCode?: number;
|
|
18
|
+
usage?: JobTokenUsage;
|
|
9
19
|
}
|
|
@@ -1048,6 +1048,7 @@ export class DockerExecutor {
|
|
|
1048
1048
|
|
|
1049
1049
|
const worktreeRelPath = relative(this.options.repo, worktreePath).replace(/\\/g, "/");
|
|
1050
1050
|
const containerWorktreePath = `/repo/${worktreeRelPath}`;
|
|
1051
|
+
await this.waitForWorktreePathInWarmContainer(containerWorktreePath);
|
|
1051
1052
|
|
|
1052
1053
|
const args: string[] = [
|
|
1053
1054
|
"exec",
|
|
@@ -1124,6 +1125,26 @@ export class DockerExecutor {
|
|
|
1124
1125
|
return result;
|
|
1125
1126
|
}
|
|
1126
1127
|
|
|
1128
|
+
private async waitForWorktreePathInWarmContainer(
|
|
1129
|
+
containerWorktreePath: string,
|
|
1130
|
+
timeoutMs = 5_000,
|
|
1131
|
+
): Promise<void> {
|
|
1132
|
+
const deadline = Date.now() + timeoutMs;
|
|
1133
|
+
let lastDetail = "";
|
|
1134
|
+
const command = `test -d ${shellSingleQuote(containerWorktreePath)}`;
|
|
1135
|
+
while (Date.now() < deadline) {
|
|
1136
|
+
const result = await this.runWarmShell(command);
|
|
1137
|
+
if (result.ok) return;
|
|
1138
|
+
lastDetail = [result.stdout, result.stderr].filter(Boolean).join("\n").trim();
|
|
1139
|
+
await this.sleep(100);
|
|
1140
|
+
}
|
|
1141
|
+
throw new Error(
|
|
1142
|
+
`worktree path not visible inside warm container after ${timeoutMs}ms: ${containerWorktreePath}${
|
|
1143
|
+
lastDetail ? ` (${lastDetail})` : ""
|
|
1144
|
+
}`,
|
|
1145
|
+
);
|
|
1146
|
+
}
|
|
1147
|
+
|
|
1127
1148
|
private normalizeProvider(raw: string): string {
|
|
1128
1149
|
const value = raw.trim().toLowerCase();
|
|
1129
1150
|
if (!value) return "auto";
|
|
@@ -1450,6 +1471,9 @@ export class DockerExecutor {
|
|
|
1450
1471
|
/\btemporary failure\b/i,
|
|
1451
1472
|
/\bopenhands wrapper timed out\b/i,
|
|
1452
1473
|
/\bjob timed out in docker executor\b/i,
|
|
1474
|
+
/\bworktree path not visible inside warm container\b/i,
|
|
1475
|
+
/\bchdir to cwd\b/i,
|
|
1476
|
+
/\bunable to start container process\b/i,
|
|
1453
1477
|
];
|
|
1454
1478
|
return transientPatterns.some((pattern) => pattern.test(text));
|
|
1455
1479
|
}
|