@pushpalsdev/cli 1.0.22 → 1.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2022,12 +2022,29 @@ export function shouldUseCodexCliForExecutor(executor: string): boolean {
2022
2022
  return executor.trim().toLowerCase() === "openai_codex";
2023
2023
  }
2024
2024
 
2025
- function normalizeCodexReasoningEffort(value: unknown): "low" | "medium" | "high" {
2025
+ function normalizeCodexReasoningEffort(
2026
+ value: unknown,
2027
+ model = "",
2028
+ ): "low" | "medium" | "high" | "xhigh" {
2026
2029
  const normalized = String(value ?? "")
2027
2030
  .trim()
2028
2031
  .toLowerCase();
2029
- if (normalized === "low" || normalized === "medium" || normalized === "high") {
2030
- return normalized;
2032
+ const supportsExtraHigh = !/^(gpt-5\.4(?:$|-)|codex-1p(?:$|-))/i.test(String(model ?? "").trim());
2033
+ if (
2034
+ normalized === "low" ||
2035
+ normalized === "medium" ||
2036
+ normalized === "high" ||
2037
+ normalized === "xhigh"
2038
+ ) {
2039
+ return normalized === "xhigh" && !supportsExtraHigh ? "high" : normalized;
2040
+ }
2041
+ if (
2042
+ normalized === "extra high" ||
2043
+ normalized === "extra-high" ||
2044
+ normalized === "extrahigh" ||
2045
+ normalized === "x-high"
2046
+ ) {
2047
+ return supportsExtraHigh ? "xhigh" : "high";
2031
2048
  }
2032
2049
  return "high";
2033
2050
  }
@@ -2090,6 +2107,7 @@ async function generateCommitMessageFromDiffViaCodex(
2090
2107
  })();
2091
2108
  const reasoningEffort = normalizeCodexReasoningEffort(
2092
2109
  runtimeConfig.workerpals.llm.reasoningEffort,
2110
+ model,
2093
2111
  );
2094
2112
  const tmpOutputPath = resolve(
2095
2113
  Bun.env.TEMP || Bun.env.TMP || Bun.env.TMPDIR || "/tmp",
@@ -91,6 +91,99 @@ function workerLlmConfig(runtimeConfig: ReturnType<typeof loadPushPalsConfig>):
91
91
  };
92
92
  }
93
93
 
94
+ function estimateTokensFromText(text: string): number {
95
+ return Math.max(0, Math.ceil(String(text ?? "").length / 3));
96
+ }
97
+
98
+ function buildWorkerLlmUsageEvent(
99
+ job: {
100
+ kind: string;
101
+ sessionId?: string | null;
102
+ params?: Record<string, unknown> | null;
103
+ },
104
+ result: WorkerJobResult,
105
+ ): Record<string, unknown> | null {
106
+ const sessionId = String(job.sessionId ?? CONFIG.sessionId ?? "").trim();
107
+ if (!sessionId) return null;
108
+ const llmConfig = workerLlmConfig(CONFIG);
109
+ const explicitUsage = result.usage;
110
+ if (
111
+ explicitUsage &&
112
+ Number.isFinite(explicitUsage.promptTokens) &&
113
+ explicitUsage.promptTokens >= 0 &&
114
+ Number.isFinite(explicitUsage.completionTokens) &&
115
+ explicitUsage.completionTokens >= 0
116
+ ) {
117
+ const promptTokens = Math.round(explicitUsage.promptTokens);
118
+ const completionTokens = Math.round(explicitUsage.completionTokens);
119
+ const totalTokens =
120
+ Number.isFinite(explicitUsage.totalTokens) && (explicitUsage.totalTokens ?? 0) >= 0
121
+ ? Math.round(explicitUsage.totalTokens ?? promptTokens + completionTokens)
122
+ : promptTokens + completionTokens;
123
+ return {
124
+ service: "workerpals",
125
+ sessionId,
126
+ backend: String(explicitUsage.backend ?? resolveExecutor(CONFIG)).trim() || resolveExecutor(CONFIG),
127
+ modelId: String(explicitUsage.modelId ?? llmConfig.model).trim() || llmConfig.model,
128
+ promptTokens,
129
+ completionTokens,
130
+ totalTokens,
131
+ estimated: explicitUsage.estimated === true,
132
+ };
133
+ }
134
+
135
+ const promptSource = (() => {
136
+ try {
137
+ return JSON.stringify({
138
+ kind: job.kind,
139
+ params: job.params ?? {},
140
+ });
141
+ } catch {
142
+ return `${job.kind}\n${String(job.params?.instruction ?? job.params?.prompt ?? "")}`.trim();
143
+ }
144
+ })();
145
+ const completionSource = [result.summary, result.stdout ?? "", result.stderr ?? ""]
146
+ .filter(Boolean)
147
+ .join("\n\n");
148
+ const promptTokens = estimateTokensFromText(promptSource);
149
+ const completionTokens = estimateTokensFromText(completionSource);
150
+ return {
151
+ service: "workerpals",
152
+ sessionId,
153
+ backend: resolveExecutor(CONFIG),
154
+ modelId: llmConfig.model,
155
+ promptTokens,
156
+ completionTokens,
157
+ totalTokens: promptTokens + completionTokens,
158
+ estimated: true,
159
+ };
160
+ }
161
+
162
+ async function reportWorkerLlmUsage(
163
+ server: string,
164
+ headers: Record<string, string>,
165
+ job: {
166
+ kind: string;
167
+ sessionId?: string | null;
168
+ params?: Record<string, unknown> | null;
169
+ },
170
+ result: WorkerJobResult,
171
+ ): Promise<void> {
172
+ const payload = buildWorkerLlmUsageEvent(job, result);
173
+ if (!payload) return;
174
+ const response = await fetch(`${server}/telemetry/llm-usage`, {
175
+ method: "POST",
176
+ headers,
177
+ body: JSON.stringify(payload),
178
+ });
179
+ if (!response.ok) {
180
+ const detail = await response.text().catch(() => "");
181
+ throw new Error(
182
+ `usage telemetry rejected (${response.status})${detail ? `: ${detail.trim()}` : ""}`,
183
+ );
184
+ }
185
+ }
186
+
94
187
  function integrationBranchName(): string {
95
188
  const configuredBaseRef = CONFIG.workerpals.baseRef.trim();
96
189
  if (!configuredBaseRef) return "main_agents";
@@ -1047,6 +1140,15 @@ async function workerLoop(
1047
1140
  const jobDurationMs = Math.max(0, Date.now() - jobStartedAtMs);
1048
1141
 
1049
1142
  await logChain;
1143
+ try {
1144
+ await reportWorkerLlmUsage(opts.server, headers, jobData, result);
1145
+ } catch (err) {
1146
+ console.warn(
1147
+ `[WorkerPals] Failed to report LLM usage for job ${job.id}: ${
1148
+ err instanceof Error ? err.message : String(err)
1149
+ }`,
1150
+ );
1151
+ }
1050
1152
 
1051
1153
  let completionCommit: CommitRef | null = null;
1052
1154
  if (result.ok && shouldCommit(job.kind, CONFIG)) {
@@ -21,6 +21,7 @@ port = 3001
21
21
  debug_http = false
22
22
  stale_claim_ttl_ms = 120000
23
23
  stale_claim_sweep_interval_ms = 5000
24
+ session_token_budget = 2000000
24
25
 
25
26
  [localbuddy]
26
27
  enabled = false
@@ -8,7 +8,7 @@
8
8
 
9
9
  [localbuddy.llm]
10
10
  backend = "openai_codex"
11
- model = "gpt-5-codex"
11
+ model = "gpt-5.4"
12
12
  codex_auth_mode = "chatgpt"
13
13
  codex_bin = "bun x --yes @openai/codex"
14
14
  codex_timeout_ms = 120000
@@ -16,7 +16,7 @@ reasoning_effort = "high"
16
16
 
17
17
  [remotebuddy.llm]
18
18
  backend = "openai_codex"
19
- model = "gpt-5-codex"
19
+ model = "gpt-5.4"
20
20
  codex_auth_mode = "chatgpt"
21
21
  codex_bin = "bun x --yes @openai/codex"
22
22
  codex_timeout_ms = 120000
@@ -42,7 +42,7 @@ retention_days = 30
42
42
 
43
43
  [workerpals.llm]
44
44
  backend = "openai_codex"
45
- model = "gpt-5-codex"
45
+ model = "gpt-5.4"
46
46
  codex_auth_mode = "chatgpt"
47
47
  codex_bin = "bun x --yes @openai/codex"
48
48
  codex_timeout_ms = 120000
@@ -82,6 +82,8 @@ export interface PushPalsConfig {
82
82
  debugHttp: boolean;
83
83
  staleClaimTtlMs: number;
84
84
  staleClaimSweepIntervalMs: number;
85
+ sessionTokenBudget: number;
86
+ sessionTokenBudgetAction: "pause";
85
87
  };
86
88
  localbuddy: {
87
89
  enabled: boolean;
@@ -682,6 +684,14 @@ export function loadPushPalsConfig(options: LoadOptions = {}): PushPalsConfig {
682
684
  5_000,
683
685
  ),
684
686
  );
687
+ const sessionTokenBudget = Math.max(
688
+ 0,
689
+ asInt(
690
+ parseIntEnv("PUSHPALS_SESSION_TOKEN_BUDGET") ?? serverNode.session_token_budget,
691
+ 1_000_000,
692
+ ),
693
+ );
694
+ const sessionTokenBudgetAction: "pause" = "pause";
685
695
 
686
696
  const globalStatusHeartbeatMs = parseIntEnv("PUSHPALS_STATUS_HEARTBEAT_MS");
687
697
 
@@ -1467,6 +1477,8 @@ export function loadPushPalsConfig(options: LoadOptions = {}): PushPalsConfig {
1467
1477
  debugHttp,
1468
1478
  staleClaimTtlMs,
1469
1479
  staleClaimSweepIntervalMs,
1480
+ sessionTokenBudget,
1481
+ sessionTokenBudgetAction,
1470
1482
  },
1471
1483
  localbuddy: {
1472
1484
  enabled: localEnabled,