@mono-agent/agent-runtime 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +219 -0
- package/LICENSE +674 -0
- package/README.md +430 -0
- package/package.json +46 -0
- package/src/agent/allowlists.js +49 -0
- package/src/agent/approval.js +211 -0
- package/src/agent/compaction.js +752 -0
- package/src/agent/index.js +40 -0
- package/src/agent/prompt/skill-index.js +66 -0
- package/src/agent/tool-bloat.js +164 -0
- package/src/agent/tools/bash.js +156 -0
- package/src/agent/tools/edit.js +15 -0
- package/src/agent/tools/glob.js +71 -0
- package/src/agent/tools/grep.js +84 -0
- package/src/agent/tools/index.js +17 -0
- package/src/agent/tools/pi-bridge.js +638 -0
- package/src/agent/tools/read.js +39 -0
- package/src/agent/tools/shared/constants.js +21 -0
- package/src/agent/tools/shared/dedup.js +31 -0
- package/src/agent/tools/shared/output-truncation.js +54 -0
- package/src/agent/tools/shared/path-resolver.js +156 -0
- package/src/agent/tools/shared/ripgrep.js +130 -0
- package/src/agent/tools/shared/runtime-context.js +69 -0
- package/src/agent/tools/web-fetch.js +59 -0
- package/src/agent/tools/web-search.js +21 -0
- package/src/agent/tools/write.js +14 -0
- package/src/agent/transcript.js +227 -0
- package/src/ai/backend.js +17 -0
- package/src/ai/cost.js +164 -0
- package/src/ai/failure.js +165 -0
- package/src/ai/file-change-stats.js +234 -0
- package/src/ai/index.js +16 -0
- package/src/ai/live-input-prompt.js +15 -0
- package/src/ai/observer.js +233 -0
- package/src/ai/providers/claude-cli.js +694 -0
- package/src/ai/providers/claude-sdk.js +864 -0
- package/src/ai/providers/claude-subagents.js +67 -0
- package/src/ai/providers/codex-app.js +1045 -0
- package/src/ai/providers/opencode-app.js +356 -0
- package/src/ai/providers/opencode-discovery.js +39 -0
- package/src/ai/providers/pi-events.js +62 -0
- package/src/ai/providers/pi-messages.js +68 -0
- package/src/ai/providers/pi-models.js +111 -0
- package/src/ai/providers/pi-sdk.js +1310 -0
- package/src/ai/registry.js +5 -0
- package/src/ai/runtime/capabilities-used.js +56 -0
- package/src/ai/runtime/capabilities.js +44 -0
- package/src/ai/runtime/context-windows.js +38 -0
- package/src/ai/runtime/fast-mode.js +8 -0
- package/src/ai/runtime/model-refs.js +144 -0
- package/src/ai/runtime/registry.js +57 -0
- package/src/ai/runtime/router.js +214 -0
- package/src/ai/runtime/sessions.js +126 -0
- package/src/ai/streaming/codex-events.js +139 -0
- package/src/ai/streaming/opencode-events.js +54 -0
- package/src/ai/types.js +70 -0
- package/src/index.js +23 -0
- package/src/pi-auth.js +80 -0
- package/src/runtime-brand.js +32 -0
- package/src/runtime.js +104 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
// Snapshot/render helpers for preserving partial agent progress across
|
|
2
|
+
// continuations. The worker writes a bounded "transcript tail" when a run
|
|
3
|
+
// terminates with usable progress but no structured result, and the coordinator
|
|
4
|
+
// passes it through `diagnosticsSeed.resume_snapshot` so the next worker can
|
|
5
|
+
// prepend it to the system prompt.
|
|
6
|
+
|
|
7
|
+
import { readRuntimeBrand } from "./tools/shared/runtime-context.js";
|
|
8
|
+
|
|
9
|
+
// intelligence-ramp Phase 5.3: keep more turns but reserve verbatim slots for
|
|
10
|
+
// the most recent few. Older turns ride along as one-paragraph summaries so
|
|
11
|
+
// the SDK-mode agent can reason about the whole arc without paying the full
|
|
12
|
+
// payload cost (CLI agents get true session resume; this is the SDK fallback).
|
|
13
|
+
const DEFAULT_MAX_TURNS = 12;
|
|
14
|
+
const DEFAULT_VERBATIM_TURNS = 3;
|
|
15
|
+
const DEFAULT_MAX_CHARS = 24_000;
|
|
16
|
+
const DEFAULT_TOOL_RESULT_CHARS = 2_400;
|
|
17
|
+
const DEFAULT_ASSISTANT_TEXT_CHARS = 4_000;
|
|
18
|
+
const DEFAULT_TURN_SUMMARY_CHARS = 320;
|
|
19
|
+
|
|
20
|
+
function truncate(text, limit, suffix = "…") {
|
|
21
|
+
const value = String(text ?? "");
|
|
22
|
+
if (!Number.isFinite(limit) || limit <= 0 || value.length <= limit) return value;
|
|
23
|
+
return `${value.slice(0, Math.max(0, limit - suffix.length))}${suffix}`;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function flattenContent(content) {
|
|
27
|
+
if (!Array.isArray(content)) {
|
|
28
|
+
if (typeof content === "string") return content;
|
|
29
|
+
return "";
|
|
30
|
+
}
|
|
31
|
+
return content
|
|
32
|
+
.map((block) => {
|
|
33
|
+
if (!block || typeof block !== "object") return "";
|
|
34
|
+
if (block.type === "text" && typeof block.text === "string") return block.text;
|
|
35
|
+
if (block.type === "thinking" && typeof block.text === "string") return block.text;
|
|
36
|
+
if (block.type === "tool_result") {
|
|
37
|
+
if (typeof block.content === "string") return block.content;
|
|
38
|
+
if (Array.isArray(block.content)) return flattenContent(block.content);
|
|
39
|
+
}
|
|
40
|
+
return "";
|
|
41
|
+
})
|
|
42
|
+
.filter(Boolean)
|
|
43
|
+
.join("");
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function describeToolUse(block) {
|
|
47
|
+
if (!block || block.type !== "tool_use") return null;
|
|
48
|
+
return {
|
|
49
|
+
id: block.id || null,
|
|
50
|
+
name: block.name || "",
|
|
51
|
+
input_summary: truncate(JSON.stringify(block.input || {}), DEFAULT_TOOL_RESULT_CHARS),
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function describeToolResult(block, { toolResultChars }) {
|
|
56
|
+
if (!block || block.type !== "tool_result") return null;
|
|
57
|
+
return {
|
|
58
|
+
tool_use_id: block.tool_use_id || null,
|
|
59
|
+
is_error: !!block.is_error,
|
|
60
|
+
content: truncate(flattenContent(block.content), toolResultChars || DEFAULT_TOOL_RESULT_CHARS),
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Compress one turn into a single short paragraph: tool names + an excerpt
|
|
65
|
+
// of the assistant text. Used for older turns that we keep around for
|
|
66
|
+
// continuity but can't afford verbatim.
|
|
67
|
+
function summarizeTurn(turn, { maxChars = DEFAULT_TURN_SUMMARY_CHARS } = {}) {
|
|
68
|
+
const parts = [];
|
|
69
|
+
if (turn.assistant_text) {
|
|
70
|
+
const firstLine = turn.assistant_text.split(/\r?\n/).find((line) => line.trim());
|
|
71
|
+
if (firstLine) parts.push(truncate(firstLine.trim(), Math.floor(maxChars * 0.6)));
|
|
72
|
+
}
|
|
73
|
+
const toolUseNames = (turn.tool_uses || []).map((u) => u.name).filter(Boolean);
|
|
74
|
+
if (toolUseNames.length) {
|
|
75
|
+
parts.push(`tools: ${toolUseNames.slice(0, 5).join(", ")}${toolUseNames.length > 5 ? "…" : ""}`);
|
|
76
|
+
}
|
|
77
|
+
const errorCount = (turn.tool_results || []).filter((r) => r.is_error).length;
|
|
78
|
+
if (errorCount > 0) parts.push(`${errorCount} tool error${errorCount === 1 ? "" : "s"}`);
|
|
79
|
+
return truncate(parts.join("; ") || "no narrative", maxChars);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Walk the captured event log backwards and collect the most recent N turns
|
|
83
|
+
// worth of (assistant text, tool calls, tool results). Returns null when there
|
|
84
|
+
// is nothing usable to resume from. Older turns beyond `verbatimTurns` are
|
|
85
|
+
// summarized into a one-paragraph snippet; only the trailing `verbatimTurns`
|
|
86
|
+
// keep their full assistant text + tool detail.
|
|
87
|
+
export function buildTranscriptTailSnapshot(events, {
|
|
88
|
+
maxTurns = DEFAULT_MAX_TURNS,
|
|
89
|
+
verbatimTurns = DEFAULT_VERBATIM_TURNS,
|
|
90
|
+
maxChars = DEFAULT_MAX_CHARS,
|
|
91
|
+
toolResultChars = DEFAULT_TOOL_RESULT_CHARS,
|
|
92
|
+
assistantTextChars = DEFAULT_ASSISTANT_TEXT_CHARS,
|
|
93
|
+
turnSummaryChars = DEFAULT_TURN_SUMMARY_CHARS,
|
|
94
|
+
} = {}) {
|
|
95
|
+
if (!Array.isArray(events) || events.length === 0) return null;
|
|
96
|
+
const turns = [];
|
|
97
|
+
let currentAssistantText = "";
|
|
98
|
+
let currentThinking = "";
|
|
99
|
+
let currentToolUses = [];
|
|
100
|
+
let currentToolResults = [];
|
|
101
|
+
|
|
102
|
+
function flushTurn() {
|
|
103
|
+
if (!currentAssistantText && !currentThinking && currentToolUses.length === 0 && currentToolResults.length === 0) {
|
|
104
|
+
return;
|
|
105
|
+
}
|
|
106
|
+
turns.push({
|
|
107
|
+
assistant_text: truncate(currentAssistantText.trim(), assistantTextChars) || null,
|
|
108
|
+
thinking: truncate(currentThinking.trim(), assistantTextChars) || null,
|
|
109
|
+
tool_uses: currentToolUses,
|
|
110
|
+
tool_results: currentToolResults,
|
|
111
|
+
});
|
|
112
|
+
currentAssistantText = "";
|
|
113
|
+
currentThinking = "";
|
|
114
|
+
currentToolUses = [];
|
|
115
|
+
currentToolResults = [];
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
for (const event of events) {
|
|
119
|
+
if (!event || typeof event !== "object") continue;
|
|
120
|
+
if (event.type === "assistant" && Array.isArray(event.message?.content)) {
|
|
121
|
+
for (const block of event.message.content) {
|
|
122
|
+
if (block?.type === "text" && typeof block.text === "string") {
|
|
123
|
+
currentAssistantText += block.text;
|
|
124
|
+
} else if (block?.type === "thinking" && typeof block.text === "string") {
|
|
125
|
+
currentThinking += block.text;
|
|
126
|
+
} else if (block?.type === "tool_use") {
|
|
127
|
+
const summary = describeToolUse(block);
|
|
128
|
+
if (summary) currentToolUses.push(summary);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
} else if (event.type === "user" && Array.isArray(event.message?.content)) {
|
|
132
|
+
for (const block of event.message.content) {
|
|
133
|
+
if (block?.type === "tool_result") {
|
|
134
|
+
const summary = describeToolResult(block, { toolResultChars });
|
|
135
|
+
if (summary) currentToolResults.push(summary);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
if (currentToolResults.length > 0) flushTurn();
|
|
139
|
+
} else if (event.type === "final" || event.type === "error" || event.type === "cancelled") {
|
|
140
|
+
flushTurn();
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
flushTurn();
|
|
144
|
+
|
|
145
|
+
if (turns.length === 0) return null;
|
|
146
|
+
const totalCap = Math.max(1, Number(maxTurns) || DEFAULT_MAX_TURNS);
|
|
147
|
+
const verbatimCap = Math.max(1, Number(verbatimTurns) || DEFAULT_VERBATIM_TURNS);
|
|
148
|
+
const tailWindow = turns.slice(-totalCap);
|
|
149
|
+
const verbatimSlice = tailWindow.slice(-Math.min(verbatimCap, tailWindow.length));
|
|
150
|
+
const summarizedSlice = tailWindow.slice(0, Math.max(0, tailWindow.length - verbatimSlice.length));
|
|
151
|
+
// Each summarized entry retains its turn index (so renderResumeSnapshot
|
|
152
|
+
// labels them correctly) and a one-paragraph description.
|
|
153
|
+
const earlierTurnSummaries = summarizedSlice.map((turn, idx) => ({
|
|
154
|
+
turn_index: turns.length - tailWindow.length + idx + 1,
|
|
155
|
+
summary: summarizeTurn(turn, { maxChars: turnSummaryChars }),
|
|
156
|
+
}));
|
|
157
|
+
const brand = readRuntimeBrand();
|
|
158
|
+
const snapshot = {
|
|
159
|
+
schema: `${brand.schemaPrefix}.transcript-tail.v1`,
|
|
160
|
+
captured_at: Date.now(),
|
|
161
|
+
turn_count: turns.length,
|
|
162
|
+
earlier_turn_summaries: earlierTurnSummaries,
|
|
163
|
+
turns: verbatimSlice,
|
|
164
|
+
};
|
|
165
|
+
const json = JSON.stringify(snapshot);
|
|
166
|
+
if (json.length <= maxChars) return snapshot;
|
|
167
|
+
// Fall back to fewer verbatim turns if the JSON exceeds maxChars; the
|
|
168
|
+
// summaries are tiny and stay.
|
|
169
|
+
let trimmedTurns = verbatimSlice.slice();
|
|
170
|
+
while (trimmedTurns.length > 1 && JSON.stringify({ ...snapshot, turns: trimmedTurns }).length > maxChars) {
|
|
171
|
+
trimmedTurns = trimmedTurns.slice(1);
|
|
172
|
+
}
|
|
173
|
+
return { ...snapshot, turns: trimmedTurns, truncated: true };
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export function renderResumeSnapshot(snapshot) {
|
|
177
|
+
if (!snapshot || !Array.isArray(snapshot.turns) || snapshot.turns.length === 0) return "";
|
|
178
|
+
const lines = [];
|
|
179
|
+
lines.push("<resume_context>");
|
|
180
|
+
lines.push(`A previous attempt at this task ran ${snapshot.turn_count || snapshot.turns.length} turn(s) before the provider connection dropped.`);
|
|
181
|
+
lines.push("Below is the trail of recent work so you can continue from where it left off rather than starting over. Tool results are abbreviated; re-read files as needed if you require the full content.");
|
|
182
|
+
lines.push("");
|
|
183
|
+
// Phase 5.3: earlier turns are present as one-paragraph summaries; only the
|
|
184
|
+
// most recent few are verbatim. Render summaries first so the agent gets
|
|
185
|
+
// the full arc, then the recent verbatim turns for fine-grained context.
|
|
186
|
+
if (Array.isArray(snapshot.earlier_turn_summaries) && snapshot.earlier_turn_summaries.length > 0) {
|
|
187
|
+
lines.push("### Earlier turns (summarized)");
|
|
188
|
+
for (const entry of snapshot.earlier_turn_summaries) {
|
|
189
|
+
lines.push(`- Turn ${entry.turn_index}: ${entry.summary}`);
|
|
190
|
+
}
|
|
191
|
+
lines.push("");
|
|
192
|
+
}
|
|
193
|
+
const verbatimStartIndex = (snapshot.turn_count || snapshot.turns.length) - snapshot.turns.length + 1;
|
|
194
|
+
snapshot.turns.forEach((turn, index) => {
|
|
195
|
+
const label = `Turn ${verbatimStartIndex + index}`;
|
|
196
|
+
lines.push(`### ${label}`);
|
|
197
|
+
if (turn.thinking) {
|
|
198
|
+
lines.push(`Thinking: ${turn.thinking}`);
|
|
199
|
+
}
|
|
200
|
+
if (turn.assistant_text) {
|
|
201
|
+
lines.push(`Assistant: ${turn.assistant_text}`);
|
|
202
|
+
}
|
|
203
|
+
if (Array.isArray(turn.tool_uses) && turn.tool_uses.length > 0) {
|
|
204
|
+
for (const use of turn.tool_uses) {
|
|
205
|
+
lines.push(`Tool call: ${use.name}(${use.input_summary || ""})`);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
if (Array.isArray(turn.tool_results) && turn.tool_results.length > 0) {
|
|
209
|
+
for (const result of turn.tool_results) {
|
|
210
|
+
const prefix = result.is_error ? "Tool result (error)" : "Tool result";
|
|
211
|
+
lines.push(`${prefix}: ${result.content || ""}`);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
lines.push("");
|
|
215
|
+
});
|
|
216
|
+
lines.push("</resume_context>");
|
|
217
|
+
return lines.join("\n");
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
export const RESUME_SNAPSHOT_DEFAULTS = {
|
|
221
|
+
maxTurns: DEFAULT_MAX_TURNS,
|
|
222
|
+
verbatimTurns: DEFAULT_VERBATIM_TURNS,
|
|
223
|
+
maxChars: DEFAULT_MAX_CHARS,
|
|
224
|
+
toolResultChars: DEFAULT_TOOL_RESULT_CHARS,
|
|
225
|
+
assistantTextChars: DEFAULT_ASSISTANT_TEXT_CHARS,
|
|
226
|
+
turnSummaryChars: DEFAULT_TURN_SUMMARY_CHARS,
|
|
227
|
+
};
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { RUNTIME_CAPABILITIES, runtimeCapabilities } from "./runtime/capabilities.js";
|
|
2
|
+
|
|
3
|
+
// Back-compat export name for callers that still ask for backend capabilities.
|
|
4
|
+
// The canonical source is the runtime bridge registry.
|
|
5
|
+
export const BACKEND_CAPABILITIES = RUNTIME_CAPABILITIES;
|
|
6
|
+
|
|
7
|
+
export function backendCapabilities(sdkOrModel) {
|
|
8
|
+
return runtimeCapabilities(sdkOrModel);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function backendUsesExecenvConfig(sdk) {
|
|
12
|
+
return !!RUNTIME_CAPABILITIES[sdk]?.native_runtime_config;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function backendSupportsSessionResume(sdk) {
|
|
16
|
+
return !!RUNTIME_CAPABILITIES[sdk]?.supports_session_resume;
|
|
17
|
+
}
|
package/src/ai/cost.js
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import { getModel as getPiModel } from "@earendil-works/pi-ai";
|
|
2
|
+
|
|
3
|
+
const CLAUDE_PRICING = {
|
|
4
|
+
"claude-haiku-4-5-20251001": { input: 1.0, cacheRead: 0.1, cacheWrite: 1.25, output: 5.0 },
|
|
5
|
+
"claude-haiku-4-5": { input: 1.0, cacheRead: 0.1, cacheWrite: 1.25, output: 5.0 },
|
|
6
|
+
"claude-sonnet-4-6": { input: 3.0, cacheRead: 0.3, cacheWrite: 3.75, output: 15.0 },
|
|
7
|
+
"claude-sonnet-4-5": { input: 3.0, cacheRead: 0.3, cacheWrite: 3.75, output: 15.0 },
|
|
8
|
+
"claude-sonnet-4": { input: 3.0, cacheRead: 0.3, cacheWrite: 3.75, output: 15.0 },
|
|
9
|
+
"claude-opus-4-7": { input: 5.0, cacheRead: 0.5, cacheWrite: 6.25, output: 25.0 },
|
|
10
|
+
"claude-opus-4-6": { input: 5.0, cacheRead: 0.5, cacheWrite: 6.25, output: 25.0 },
|
|
11
|
+
"claude-opus-4-5": { input: 5.0, cacheRead: 0.5, cacheWrite: 6.25, output: 25.0 },
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
function finiteOrNull(value) {
|
|
15
|
+
if (value == null || value === "") return null;
|
|
16
|
+
const n = Number(value);
|
|
17
|
+
return Number.isFinite(n) && n >= 0 ? n : null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function rate(value, fallback = 0) {
|
|
21
|
+
const n = finiteOrNull(value);
|
|
22
|
+
return n == null ? fallback : n;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function normalizePricing(pricing, { source, priced = true, missing = 0 } = {}) {
|
|
26
|
+
if (!pricing || typeof pricing !== "object") return null;
|
|
27
|
+
const input = rate(pricing.input ?? pricing.input_per_million, missing);
|
|
28
|
+
const cacheRead = rate(
|
|
29
|
+
pricing.cacheRead
|
|
30
|
+
?? pricing.cachedInput
|
|
31
|
+
?? pricing.cached_input_per_million,
|
|
32
|
+
input,
|
|
33
|
+
);
|
|
34
|
+
const cacheWrite = rate(
|
|
35
|
+
pricing.cacheWrite
|
|
36
|
+
?? pricing.cache_write_per_million
|
|
37
|
+
?? pricing.cache_creation_per_million,
|
|
38
|
+
missing,
|
|
39
|
+
);
|
|
40
|
+
const output = rate(pricing.output ?? pricing.output_per_million, missing);
|
|
41
|
+
return { input, cacheRead, cacheWrite, output, source, priced };
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function zeroPricing(source) {
|
|
45
|
+
return { input: 0, cacheRead: 0, cacheWrite: 0, output: 0, source, priced: true };
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function unknownPricing() {
|
|
49
|
+
return { input: null, cacheRead: null, cacheWrite: null, output: null, source: "unknown", priced: false };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function parseReference(reference) {
|
|
53
|
+
if (typeof reference !== "string" || !reference.trim()) return null;
|
|
54
|
+
if (reference.startsWith("vercel:")) {
|
|
55
|
+
const rest = reference.slice("vercel:".length);
|
|
56
|
+
const i = rest.indexOf(":");
|
|
57
|
+
return i > 0 ? { sdk: "pi", provider: rest.slice(0, i), model: rest.slice(i + 1) } : null;
|
|
58
|
+
}
|
|
59
|
+
if (reference.startsWith("codex:")) {
|
|
60
|
+
return { sdk: "pi", provider: "openai-codex", model: reference.slice("codex:".length) };
|
|
61
|
+
}
|
|
62
|
+
if (reference.startsWith("openai:")) {
|
|
63
|
+
return { sdk: "pi", provider: "openai", model: reference.slice("openai:".length) };
|
|
64
|
+
}
|
|
65
|
+
if (reference.startsWith("pi:")) {
|
|
66
|
+
const rest = reference.slice("pi:".length);
|
|
67
|
+
const i = rest.indexOf(":");
|
|
68
|
+
return i > 0 ? { sdk: "pi", provider: rest.slice(0, i), model: rest.slice(i + 1) } : null;
|
|
69
|
+
}
|
|
70
|
+
const i = reference.indexOf(":");
|
|
71
|
+
if (i <= 0) return { sdk: null, model: reference };
|
|
72
|
+
return { sdk: reference.slice(0, i), model: reference.slice(i + 1) };
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function isPrivateHost(baseUrl) {
|
|
76
|
+
try {
|
|
77
|
+
const host = new URL(baseUrl).hostname.toLowerCase().replace(/^\[|\]$/g, "");
|
|
78
|
+
return host === "localhost"
|
|
79
|
+
|| host === "host.docker.internal"
|
|
80
|
+
|| host === "::1"
|
|
81
|
+
|| host.startsWith("127.")
|
|
82
|
+
|| host.startsWith("10.")
|
|
83
|
+
|| host.startsWith("192.168.")
|
|
84
|
+
|| /^172\.(1[6-9]|2\d|3[01])\./.test(host)
|
|
85
|
+
|| /^100\.(6[4-9]|[7-9]\d|1[01]\d|12[0-7])\./.test(host);
|
|
86
|
+
} catch {
|
|
87
|
+
return false;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function pricingHasRates(pricing = {}) {
|
|
92
|
+
return [
|
|
93
|
+
pricing.input_per_million,
|
|
94
|
+
pricing.cached_input_per_million,
|
|
95
|
+
pricing.cache_write_per_million,
|
|
96
|
+
pricing.output_per_million,
|
|
97
|
+
].some((value) => finiteOrNull(value) != null);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function piCatalogPricing(parsed) {
|
|
101
|
+
if (parsed?.sdk !== "pi" || !parsed.provider || !parsed.model) return null;
|
|
102
|
+
try {
|
|
103
|
+
const model = getPiModel(parsed.provider, parsed.model);
|
|
104
|
+
return model?.cost ? normalizePricing(model.cost, { source: "pi-catalog" }) : null;
|
|
105
|
+
} catch {
|
|
106
|
+
return null;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function claudePricing(parsed) {
|
|
111
|
+
if (parsed?.sdk !== "claude") return null;
|
|
112
|
+
return normalizePricing(CLAUDE_PRICING[parsed.model], { source: "claude-table" });
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// `resolveCustomPricing(parsed) -> NormalizedPricing | null` lets a host plug
|
|
116
|
+
// in user-defined pricing tables. Hosts query custom model/provider stores
|
|
117
|
+
// in src/core/custom-pricing.js and passes the closure in via `generateResponse`.
|
|
118
|
+
// The pricing helpers below (`normalizePricing`, `zeroPricing`, `unknownPricing`,
|
|
119
|
+
// `pricingHasRates`, `isPrivateHost`, `parseReference`) are exported so hosts
|
|
120
|
+
// can build their own resolvers without re-implementing the row-shape conversion.
|
|
121
|
+
export function resolvePricing({ resolveCustomPricing, model } = {}) {
|
|
122
|
+
const parsed = parseReference(model);
|
|
123
|
+
if (!parsed) return unknownPricing();
|
|
124
|
+
const custom = typeof resolveCustomPricing === "function"
|
|
125
|
+
? resolveCustomPricing(parsed)
|
|
126
|
+
: null;
|
|
127
|
+
return custom
|
|
128
|
+
|| piCatalogPricing(parsed)
|
|
129
|
+
|| claudePricing(parsed)
|
|
130
|
+
|| unknownPricing();
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
export { normalizePricing, zeroPricing, unknownPricing, pricingHasRates, isPrivateHost, parseReference };
|
|
134
|
+
|
|
135
|
+
export function estimateCost({
|
|
136
|
+
resolveCustomPricing,
|
|
137
|
+
model,
|
|
138
|
+
inputTokens = 0,
|
|
139
|
+
outputTokens = 0,
|
|
140
|
+
cachedTokens = 0,
|
|
141
|
+
cacheWriteTokens = 0,
|
|
142
|
+
cacheCreationTokens = 0,
|
|
143
|
+
} = {}) {
|
|
144
|
+
const pricing = resolvePricing({ resolveCustomPricing, model });
|
|
145
|
+
if (!pricing?.priced) return null;
|
|
146
|
+
const cacheRead = Math.max(0, Number(cachedTokens) || 0);
|
|
147
|
+
const cacheWrite = Math.max(0, Number(cacheWriteTokens ?? cacheCreationTokens) || 0);
|
|
148
|
+
const input = Math.max(0, Number(inputTokens) || 0);
|
|
149
|
+
const output = Math.max(0, Number(outputTokens) || 0);
|
|
150
|
+
const parts = [
|
|
151
|
+
[input, pricing.input],
|
|
152
|
+
[cacheRead, pricing.cacheRead],
|
|
153
|
+
[cacheWrite, pricing.cacheWrite],
|
|
154
|
+
[output, pricing.output],
|
|
155
|
+
];
|
|
156
|
+
let total = 0;
|
|
157
|
+
for (const [tokens, price] of parts) {
|
|
158
|
+
if (tokens <= 0) continue;
|
|
159
|
+
const priceNumber = finiteOrNull(price);
|
|
160
|
+
if (priceNumber == null) return null;
|
|
161
|
+
total += (tokens / 1_000_000) * priceNumber;
|
|
162
|
+
}
|
|
163
|
+
return total;
|
|
164
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
export const FAILURE_KINDS = [
|
|
2
|
+
"spawn",
|
|
3
|
+
"timeout",
|
|
4
|
+
"stall",
|
|
5
|
+
"usage_limit",
|
|
6
|
+
"invalid_result",
|
|
7
|
+
"invalid_delegation",
|
|
8
|
+
"tool_failure",
|
|
9
|
+
"provider_unavailable",
|
|
10
|
+
"provider_unavailable_exhausted",
|
|
11
|
+
"child_failed",
|
|
12
|
+
"budget_exceeded",
|
|
13
|
+
"cancelled",
|
|
14
|
+
"cancelled_user",
|
|
15
|
+
"cancelled_stale",
|
|
16
|
+
"cancelled_shutdown",
|
|
17
|
+
"cancelled_signal",
|
|
18
|
+
"abandoned",
|
|
19
|
+
// v33: planner delegated to an agent outside the effective team's roster
|
|
20
|
+
// (lead + members). Replaces the retired delegation_agent_not_allowed kind.
|
|
21
|
+
"delegation_agent_not_in_team",
|
|
22
|
+
"delegation_team_roster_empty",
|
|
23
|
+
// Provider session resume: the host asked to resume a provider session that
|
|
24
|
+
// is no longer live (expired, evicted, or process died) or that is still
|
|
25
|
+
// executing another turn. Both are non-retryable at the router level; the
|
|
26
|
+
// host retries once without the session (replaying history) instead.
|
|
27
|
+
"session_not_found",
|
|
28
|
+
"session_busy",
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
const USAGE_LIMIT_RE = /(rate limit|usage limit|max tokens|max turns|context length|too many tokens)/i;
|
|
32
|
+
const PROVIDER_UNAVAILABLE_RE = /(econn|enotfound|etimedout|service unavailable|503|502|gateway|fetch failed|network|websocket)/i;
|
|
33
|
+
const TOOL_FAILURE_RE = /(tool .* failed|mcp tool|permission denied|EACCES|read-only file system)/i;
|
|
34
|
+
const NON_RETRYABLE_PROVIDER_RE = /(invalid[_ ]request|unknown parameter|invalid api key|incorrect api key|authentication|authorization|not authorized|forbidden|billing|insufficient[_ ]quota|quota exceeded|model[_ ]not[_ ]found|unsupported model|permission denied|bad request|401|403|404)/i;
|
|
35
|
+
const RETRYABLE_PROVIDER_RE = /(currently overloaded|server(?:s)? (?:is |are )?overloaded|try again later|retry your request|request id|service unavailable|temporar(?:y|ily)|timed? ?out|stream disconnected|fetch failed|econnreset|econnrefused|eai_again|enotfound|etimedout|network|429|too many requests|500|502|503|504|gateway|internal server error)/i;
|
|
36
|
+
export const PROVIDER_ABORT_RE = /\b(?:terminated|aborted before final output|aborted before final|stream aborted|stream was aborted|stream disconnected|websocket (?:error|disconnected|closed)|socket hang up|und_err_socket|econnreset|premature close)\b/i;
|
|
37
|
+
|
|
38
|
+
function requestIdFromText(text) {
|
|
39
|
+
const match = /\b(?:request[_ -]?id|req[_ -]?id)\s*[:#]?\s*([A-Za-z0-9._:-]{8,})/i.exec(text || "");
|
|
40
|
+
return match?.[1]?.replace(/[.,;:]+$/, "") || null;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function retryableProviderSubkind(text) {
|
|
44
|
+
if (/overloaded/i.test(text)) return "overloaded";
|
|
45
|
+
if (/429|too many requests|rate limit/i.test(text)) return "rate_limited";
|
|
46
|
+
if (/timed? ?out|etimedout/i.test(text)) return "timeout";
|
|
47
|
+
if (/stream disconnected|fetch failed|econnreset|econnrefused|eai_again|enotfound|network/i.test(text)) return "network";
|
|
48
|
+
if (/500|502|503|504|service unavailable|gateway|internal server error/i.test(text)) return "server_error";
|
|
49
|
+
if (/retry your request|try again later|request id|processing your request/i.test(text)) return "retryable_request";
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function retryableProviderFailureInfo({
|
|
54
|
+
errorText = "",
|
|
55
|
+
stderrTail = "",
|
|
56
|
+
failureKind = null,
|
|
57
|
+
} = {}) {
|
|
58
|
+
if (failureKind && failureKind !== "provider_unavailable") {
|
|
59
|
+
return { retryable: false, subkind: null, requestId: null };
|
|
60
|
+
}
|
|
61
|
+
const haystack = `${errorText || ""}\n${stderrTail || ""}`.trim();
|
|
62
|
+
if (!haystack) return { retryable: false, subkind: null, requestId: null };
|
|
63
|
+
const requestId = requestIdFromText(haystack);
|
|
64
|
+
if (NON_RETRYABLE_PROVIDER_RE.test(haystack)) {
|
|
65
|
+
return { retryable: false, subkind: "non_retryable", requestId };
|
|
66
|
+
}
|
|
67
|
+
const subkind = (failureKind === "provider_unavailable" && PROVIDER_ABORT_RE.test(haystack))
|
|
68
|
+
? "terminated"
|
|
69
|
+
: retryableProviderSubkind(haystack);
|
|
70
|
+
return {
|
|
71
|
+
retryable: !!subkind || RETRYABLE_PROVIDER_RE.test(haystack),
|
|
72
|
+
subkind: subkind || (RETRYABLE_PROVIDER_RE.test(haystack) ? "retryable_request" : null),
|
|
73
|
+
requestId,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// classifyFailure is the single source of truth for mapping the disparate
|
|
78
|
+
// inputs the coordinator sees on a worker exit (process code, signal, error
|
|
79
|
+
// text, stderr tail, timeout flag, cancellation flag, mcp init result, parse
|
|
80
|
+
// errors) into one of FAILURE_KINDS. Every adapter / spawn-worker / watcher
|
|
81
|
+
// path should funnel through this so the values in `task_runs.failure_kind`
|
|
82
|
+
// stay coherent.
|
|
83
|
+
export function classifyFailure({
|
|
84
|
+
exitCode = null,
|
|
85
|
+
signal = null,
|
|
86
|
+
errorText = "",
|
|
87
|
+
stderrTail = "",
|
|
88
|
+
timedOut = false,
|
|
89
|
+
cancelRequested = false,
|
|
90
|
+
cancelInitiator = null,
|
|
91
|
+
resultParseError = false,
|
|
92
|
+
mcpInitFailed = false,
|
|
93
|
+
budgetExceeded = false,
|
|
94
|
+
childFailed = false,
|
|
95
|
+
hint = null,
|
|
96
|
+
} = {}) {
|
|
97
|
+
if (budgetExceeded) return "budget_exceeded";
|
|
98
|
+
if (childFailed) return "child_failed";
|
|
99
|
+
if (resultParseError) return "invalid_result";
|
|
100
|
+
if (timedOut) return "timeout";
|
|
101
|
+
if (cancelRequested) {
|
|
102
|
+
// R5: distinguish a clean coordinator shutdown from a stale-run reconcile.
|
|
103
|
+
// Both the audit and the operator care which one: a coordinator_shutdown
|
|
104
|
+
// means "we asked you to stop", and the work is reconciliation-eligible
|
|
105
|
+
// on the next boot. A stale_reconcile means the run was already orphaned
|
|
106
|
+
// (no live coordinator to ask). Mapping both to cancelled_stale hid the
|
|
107
|
+
// difference and confused the audit-period reports.
|
|
108
|
+
if (cancelInitiator === "coordinator_shutdown") return "cancelled_shutdown";
|
|
109
|
+
if (cancelInitiator === "stale_reconcile") return "cancelled_stale";
|
|
110
|
+
if (cancelInitiator === "worker_signal") return "cancelled_signal";
|
|
111
|
+
if (cancelInitiator === "user" || cancelInitiator === "api_cancel") return "cancelled_user";
|
|
112
|
+
// An in-flight run cancelled by the settings-backed turn guardrail reuses
|
|
113
|
+
// budget_exceeded so dashboards / reports don't have to learn a new label.
|
|
114
|
+
if (cancelInitiator === "budget") return "budget_exceeded";
|
|
115
|
+
return "cancelled";
|
|
116
|
+
}
|
|
117
|
+
if (exitCode === 130 || signal === "SIGTERM" || signal === "SIGINT") return "cancelled_signal";
|
|
118
|
+
if (signal === "SIGKILL" && !exitCode) return "abandoned";
|
|
119
|
+
|
|
120
|
+
if (hint && FAILURE_KINDS.includes(hint)) return hint;
|
|
121
|
+
|
|
122
|
+
const haystack = `${errorText || ""}\n${stderrTail || ""}`;
|
|
123
|
+
if (USAGE_LIMIT_RE.test(haystack)) return "usage_limit";
|
|
124
|
+
if (TOOL_FAILURE_RE.test(haystack)) return "tool_failure";
|
|
125
|
+
if (PROVIDER_UNAVAILABLE_RE.test(haystack)) return "provider_unavailable";
|
|
126
|
+
if (mcpInitFailed && haystack.toLowerCase().includes("mcp")) return "tool_failure";
|
|
127
|
+
|
|
128
|
+
if (exitCode === 0 && !errorText) return null;
|
|
129
|
+
return "spawn";
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Bounded ring buffer for stderr tails. CLI providers can produce 100s of MB
|
|
133
|
+
// of stderr; we only want the last few KB for diagnostics. Returns a string
|
|
134
|
+
// guaranteed to be ≤ `limit` bytes, with a `[truncated …]` marker if anything
|
|
135
|
+
// was dropped.
|
|
136
|
+
export function createStderrTail({ limit = 8 * 1024 } = {}) {
|
|
137
|
+
let buffer = "";
|
|
138
|
+
let dropped = 0;
|
|
139
|
+
return {
|
|
140
|
+
push(chunk) {
|
|
141
|
+
const text = typeof chunk === "string" ? chunk : chunk?.toString?.() || "";
|
|
142
|
+
if (!text) return;
|
|
143
|
+
if (text.length >= limit) {
|
|
144
|
+
dropped += buffer.length + (text.length - limit);
|
|
145
|
+
buffer = text.slice(text.length - limit);
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
const combined = buffer + text;
|
|
149
|
+
if (combined.length <= limit) {
|
|
150
|
+
buffer = combined;
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
153
|
+
const overflow = combined.length - limit;
|
|
154
|
+
dropped += Math.min(buffer.length, overflow);
|
|
155
|
+
buffer = combined.slice(overflow);
|
|
156
|
+
},
|
|
157
|
+
toString() {
|
|
158
|
+
if (!dropped) return buffer;
|
|
159
|
+
return `[truncated ${dropped} earlier bytes]\n${buffer}`;
|
|
160
|
+
},
|
|
161
|
+
get bytesDropped() {
|
|
162
|
+
return dropped;
|
|
163
|
+
},
|
|
164
|
+
};
|
|
165
|
+
}
|