auggy 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +96 -0
- package/LICENSE +201 -0
- package/README.md +161 -0
- package/package.json +76 -0
- package/src/agent-card.ts +39 -0
- package/src/agent.ts +283 -0
- package/src/agentmail-client.ts +138 -0
- package/src/augments/bash/index.ts +463 -0
- package/src/augments/bash/skill/SKILL.md +156 -0
- package/src/augments/budgets/budget-store.ts +513 -0
- package/src/augments/budgets/index.ts +134 -0
- package/src/augments/budgets/preamble.ts +93 -0
- package/src/augments/budgets/types.ts +89 -0
- package/src/augments/file-memory/index.ts +71 -0
- package/src/augments/filesystem/index.ts +533 -0
- package/src/augments/filesystem/skill/SKILL.md +142 -0
- package/src/augments/filesystem/skill/references/mount-permissions.md +81 -0
- package/src/augments/layered-memory/extractor/buffer.ts +56 -0
- package/src/augments/layered-memory/extractor/frequency.ts +79 -0
- package/src/augments/layered-memory/extractor/inject-handler.ts +103 -0
- package/src/augments/layered-memory/extractor/parse.ts +75 -0
- package/src/augments/layered-memory/extractor/prompt.md +26 -0
- package/src/augments/layered-memory/index.ts +757 -0
- package/src/augments/layered-memory/skill/SKILL.md +153 -0
- package/src/augments/layered-memory/storage/migrations/README.md +16 -0
- package/src/augments/layered-memory/storage/migrations/supabase-add-fact-fields.sql +9 -0
- package/src/augments/layered-memory/storage/sqlite-store.ts +352 -0
- package/src/augments/layered-memory/storage/supabase-store.ts +263 -0
- package/src/augments/layered-memory/storage/types.ts +98 -0
- package/src/augments/link/index.ts +489 -0
- package/src/augments/link/translate.ts +261 -0
- package/src/augments/notify/adapters/agentmail.ts +70 -0
- package/src/augments/notify/adapters/telegram.ts +60 -0
- package/src/augments/notify/adapters/webhook.ts +55 -0
- package/src/augments/notify/index.ts +284 -0
- package/src/augments/notify/skill/SKILL.md +150 -0
- package/src/augments/org-context/index.ts +721 -0
- package/src/augments/org-context/skill/SKILL.md +96 -0
- package/src/augments/skills/index.ts +103 -0
- package/src/augments/supabase-memory/index.ts +151 -0
- package/src/augments/telegram-transport/index.ts +312 -0
- package/src/augments/telegram-transport/polling.ts +55 -0
- package/src/augments/telegram-transport/webhook.ts +56 -0
- package/src/augments/turn-control/index.ts +61 -0
- package/src/augments/turn-control/skill/SKILL.md +155 -0
- package/src/augments/visitor-auth/email-validation.ts +66 -0
- package/src/augments/visitor-auth/index.ts +779 -0
- package/src/augments/visitor-auth/rate-limiter.ts +90 -0
- package/src/augments/visitor-auth/skill/SKILL.md +55 -0
- package/src/augments/visitor-auth/storage/sqlite-store.ts +398 -0
- package/src/augments/visitor-auth/storage/types.ts +164 -0
- package/src/augments/visitor-auth/types.ts +123 -0
- package/src/augments/visitor-auth/verify-page.ts +179 -0
- package/src/augments/web-fetch/index.ts +331 -0
- package/src/augments/web-fetch/skill/SKILL.md +100 -0
- package/src/cli/agent-index.ts +289 -0
- package/src/cli/augment-catalog.ts +320 -0
- package/src/cli/augment-resolver.ts +597 -0
- package/src/cli/commands/add-skill.ts +194 -0
- package/src/cli/commands/add.ts +87 -0
- package/src/cli/commands/chat.ts +207 -0
- package/src/cli/commands/create.ts +462 -0
- package/src/cli/commands/dev.ts +139 -0
- package/src/cli/commands/eval.ts +180 -0
- package/src/cli/commands/ls.ts +66 -0
- package/src/cli/commands/remove.ts +95 -0
- package/src/cli/commands/restart.ts +40 -0
- package/src/cli/commands/start.ts +123 -0
- package/src/cli/commands/status.ts +104 -0
- package/src/cli/commands/stop.ts +84 -0
- package/src/cli/commands/visitors-revoke.ts +155 -0
- package/src/cli/commands/visitors.ts +101 -0
- package/src/cli/config-parser.ts +1034 -0
- package/src/cli/engine-resolver.ts +68 -0
- package/src/cli/index.ts +178 -0
- package/src/cli/model-picker.ts +89 -0
- package/src/cli/pid-registry.ts +146 -0
- package/src/cli/plist-generator.ts +117 -0
- package/src/cli/resolve-config.ts +56 -0
- package/src/cli/scaffold-skills.ts +158 -0
- package/src/cli/scaffold.ts +291 -0
- package/src/cli/skill-frontmatter.ts +51 -0
- package/src/cli/skill-validator.ts +151 -0
- package/src/cli/types.ts +228 -0
- package/src/cli/yaml-helpers.ts +66 -0
- package/src/engines/_shared/cost.ts +55 -0
- package/src/engines/_shared/schema-normalize.ts +75 -0
- package/src/engines/anthropic/pricing.ts +117 -0
- package/src/engines/anthropic.ts +483 -0
- package/src/engines/openai/pricing.ts +67 -0
- package/src/engines/openai.ts +446 -0
- package/src/engines/openrouter/pricing.ts +83 -0
- package/src/engines/openrouter.ts +185 -0
- package/src/helpers.ts +24 -0
- package/src/http.ts +387 -0
- package/src/index.ts +165 -0
- package/src/kernel/capability-table.ts +172 -0
- package/src/kernel/context-allocator.ts +161 -0
- package/src/kernel/history-manager.ts +198 -0
- package/src/kernel/lifecycle-manager.ts +106 -0
- package/src/kernel/output-validator.ts +35 -0
- package/src/kernel/preamble.ts +23 -0
- package/src/kernel/route-collector.ts +97 -0
- package/src/kernel/timeout.ts +21 -0
- package/src/kernel/tool-selector.ts +47 -0
- package/src/kernel/trace-emitter.ts +66 -0
- package/src/kernel/transport-queue.ts +147 -0
- package/src/kernel/turn-loop.ts +1148 -0
- package/src/memory/context-synthesis.ts +83 -0
- package/src/memory/memory-bus.ts +61 -0
- package/src/memory/registry.ts +80 -0
- package/src/memory/tools.ts +320 -0
- package/src/memory/types.ts +8 -0
- package/src/parts.ts +30 -0
- package/src/scaffold-templates/identity.md +31 -0
- package/src/telegram-client.ts +145 -0
- package/src/tokenizer.ts +14 -0
- package/src/transports/ag-ui-events.ts +253 -0
- package/src/transports/visitor-token.ts +82 -0
- package/src/transports/web-transport.ts +948 -0
- package/src/types.ts +1009 -0
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
2
|
+
import { lookup, getFreshness, priceAnthropicResponse } from "./anthropic/pricing";
|
|
3
|
+
import type {
|
|
4
|
+
AssembledPrompt,
|
|
5
|
+
Message,
|
|
6
|
+
ModelClient,
|
|
7
|
+
ModelDelta,
|
|
8
|
+
ModelResponse,
|
|
9
|
+
ToolDefinition,
|
|
10
|
+
} from "../types";
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Anthropic engine — a ModelClient adapter that drives the agent's reasoning
|
|
14
|
+
* via Anthropic's Messages API.
|
|
15
|
+
*
|
|
16
|
+
* Responsibilities:
|
|
17
|
+
* - Translate AssembledPrompt into the Messages API request shape
|
|
18
|
+
* (system text, conversation messages, tools)
|
|
19
|
+
* - Run the API call
|
|
20
|
+
* - Translate the response back into ModelResponse
|
|
21
|
+
*
|
|
22
|
+
* This engine is stateless beyond the underlying HTTP client. Retries,
|
|
23
|
+
* timeouts, and rate limit handling live in the SDK; everything above
|
|
24
|
+
* (queue, history, context budgeting) is the kernel's job.
|
|
25
|
+
*
|
|
26
|
+
* Token counting uses a character/4 approximation rather than Anthropic's
|
|
27
|
+
* async token-counting endpoint, because Auggy's ModelClient interface
|
|
28
|
+
* wants a synchronous countTokens and the accuracy gain isn't worth the
|
|
29
|
+
* extra round trip on every budget computation.
|
|
30
|
+
*/
|
|
31
|
+
export interface AnthropicEngineOptions {
|
|
32
|
+
/** API key. Defaults to the ANTHROPIC_API_KEY environment variable. */
|
|
33
|
+
apiKey?: string;
|
|
34
|
+
/** Model ID (e.g. "claude-sonnet-4-6", "claude-opus-4-6", "claude-haiku-4-5-20251001"). */
|
|
35
|
+
model: string;
|
|
36
|
+
/** Total context window in tokens for this model. Defaults to 200_000. */
|
|
37
|
+
maxContextTokens?: number;
|
|
38
|
+
/** Per-turn output cap. Defaults to 4096. */
|
|
39
|
+
maxTokens?: number;
|
|
40
|
+
/** Optional base URL override (for proxying or compatible providers). */
|
|
41
|
+
baseURL?: string;
|
|
42
|
+
/**
|
|
43
|
+
* Override pricing for cost estimation. If set, the adapter uses these rates
|
|
44
|
+
* instead of the built-in pricing table. Useful for unknown models or custom
|
|
45
|
+
* pricing arrangements. USD per million tokens.
|
|
46
|
+
*
|
|
47
|
+
* Accepts the full Pricing shape (input + output + optional cache write/read).
|
|
48
|
+
* Legacy 2-field overrides still typecheck — cache rates are optional and
|
|
49
|
+
* default to undefined (no cache cost contribution). Anthropic operators with
|
|
50
|
+
* cache-heavy workloads should set both `cacheWriteUsdPerMtok` and
|
|
51
|
+
* `cacheReadUsdPerMtok` to avoid under-reporting cached responses.
|
|
52
|
+
*/
|
|
53
|
+
costOverride?: import("./_shared/cost").Pricing;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export function createAnthropicEngine(opts: AnthropicEngineOptions): ModelClient {
|
|
57
|
+
const client = new Anthropic({
|
|
58
|
+
apiKey: opts.apiKey,
|
|
59
|
+
baseURL: opts.baseURL,
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
const maxContextTokens = opts.maxContextTokens ?? 200_000;
|
|
63
|
+
const maxOutputTokens = opts.maxTokens ?? 4096;
|
|
64
|
+
|
|
65
|
+
// Pricing freshness + availability warning at startup. Cost estimation
|
|
66
|
+
// is advisory; this surfaces gaps so the operator isn't surprised when
|
|
67
|
+
// budgets enforce against fabricated zeros. Fires once at factory time,
|
|
68
|
+
// not per-turn.
|
|
69
|
+
if (!opts.costOverride) {
|
|
70
|
+
const rates = lookup(opts.model);
|
|
71
|
+
if (!rates) {
|
|
72
|
+
// eslint-disable-next-line no-console
|
|
73
|
+
console.warn(
|
|
74
|
+
`[engines/anthropic] No pricing entry for model "${opts.model}" and no costOverride configured. ` +
|
|
75
|
+
`costUsd will be undefined; dailyBudgetUsd cannot enforce against this model. ` +
|
|
76
|
+
`Add the model to src/engines/anthropic/pricing.ts or configure engine.costOverride in agent.yaml.`,
|
|
77
|
+
);
|
|
78
|
+
} else {
|
|
79
|
+
const f = getFreshness();
|
|
80
|
+
if (f.stale) {
|
|
81
|
+
// eslint-disable-next-line no-console
|
|
82
|
+
console.warn(
|
|
83
|
+
`[engines/anthropic] Pricing table verifiedAt ${f.verifiedAt} is more than 90 days old. ` +
|
|
84
|
+
`Cost estimates may be drifting from actual billing. Verify rates and update src/engines/anthropic/pricing.ts.`,
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
maxContextTokens,
|
|
92
|
+
|
|
93
|
+
countTokens(text: string): number {
|
|
94
|
+
// Rough approximation matching Auggy's default tokenizer. Anthropic
|
|
95
|
+
// does expose an async countTokens endpoint, but the ModelClient
|
|
96
|
+
// interface is sync and an extra round trip per budget computation
|
|
97
|
+
// is not worth the accuracy.
|
|
98
|
+
return Math.ceil(text.length / 4);
|
|
99
|
+
},
|
|
100
|
+
|
|
101
|
+
async complete(
|
|
102
|
+
prompt: AssembledPrompt,
|
|
103
|
+
opts2?: { onDelta?: (delta: ModelDelta) => void },
|
|
104
|
+
): Promise<ModelResponse> {
|
|
105
|
+
const system = assembleSystemText(prompt);
|
|
106
|
+
const messages = convertMessages(prompt.messages);
|
|
107
|
+
const tools = convertTools(prompt.tools);
|
|
108
|
+
const toolChoice =
|
|
109
|
+
prompt.toolChoice === "any"
|
|
110
|
+
? { type: "any" as const }
|
|
111
|
+
: prompt.toolChoice === "auto" || !prompt.toolChoice
|
|
112
|
+
? { type: "auto" as const }
|
|
113
|
+
: { type: "tool" as const, name: prompt.toolChoice.name };
|
|
114
|
+
|
|
115
|
+
const params = {
|
|
116
|
+
model: opts.model,
|
|
117
|
+
max_tokens: maxOutputTokens,
|
|
118
|
+
system,
|
|
119
|
+
messages,
|
|
120
|
+
...(tools.length > 0 ? { tools, tool_choice: toolChoice } : {}),
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
const withCost = (r: ModelResponse, rawUsage: Anthropic.Messages.Usage): ModelResponse => {
|
|
124
|
+
const result = priceAnthropicResponse(opts.model, opts.costOverride, {
|
|
125
|
+
input_tokens: rawUsage.input_tokens,
|
|
126
|
+
output_tokens: rawUsage.output_tokens,
|
|
127
|
+
cache_creation_input_tokens: rawUsage.cache_creation_input_tokens ?? null,
|
|
128
|
+
cache_read_input_tokens: rawUsage.cache_read_input_tokens ?? null,
|
|
129
|
+
// cache_creation (TTL breakdown) and service_tier are new fields not yet
|
|
130
|
+
// in the Anthropic SDK type; cast defensively via unknown.
|
|
131
|
+
cache_creation: (rawUsage as unknown as Record<string, unknown>).cache_creation as
|
|
132
|
+
| { ephemeral_5m_input_tokens?: number; ephemeral_1h_input_tokens?: number }
|
|
133
|
+
| null
|
|
134
|
+
| undefined,
|
|
135
|
+
service_tier: (rawUsage as unknown as Record<string, unknown>).service_tier as
|
|
136
|
+
| string
|
|
137
|
+
| null
|
|
138
|
+
| undefined,
|
|
139
|
+
});
|
|
140
|
+
return result.priced
|
|
141
|
+
? { ...r, costUsd: result.costUsd }
|
|
142
|
+
: { ...r, costUsd: undefined, unpricedReason: result.reason };
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
try {
|
|
146
|
+
if (opts2?.onDelta) {
|
|
147
|
+
// Streaming path: emit text deltas as they arrive from the model.
|
|
148
|
+
// Tool-use blocks are NOT streamed in v1 — they arrive in the
|
|
149
|
+
// finalMessage. This is intentional: text streaming is the latency
|
|
150
|
+
// win; tool args are small.
|
|
151
|
+
const stream = client.messages.stream(params);
|
|
152
|
+
stream.on("text", (text) => {
|
|
153
|
+
opts2.onDelta!({ kind: "text_delta", text });
|
|
154
|
+
});
|
|
155
|
+
const finalMessage = await stream.finalMessage();
|
|
156
|
+
return withCost(buildModelResponse(finalMessage), finalMessage.usage);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Non-streaming path (backward compat for tests, other consumers)
|
|
160
|
+
const response = await client.messages.create(params);
|
|
161
|
+
return withCost(buildModelResponse(response), response.usage);
|
|
162
|
+
} catch (err) {
|
|
163
|
+
rewrapCostCapError(err);
|
|
164
|
+
}
|
|
165
|
+
},
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Anthropic SDK errors that indicate the operator's provider-side spend cap
|
|
171
|
+
* has been reached get rewrapped with a clear, operator-actionable message.
|
|
172
|
+
* Other errors are re-thrown unchanged.
|
|
173
|
+
*
|
|
174
|
+
* Per ADR-024, provider-side spend caps are the v1.0 hard limit on agent
|
|
175
|
+
* spend. When they fire, Anthropic returns a 402 (Payment Required) or a
|
|
176
|
+
* 429 with cap-related text in the message body. We surface a concise
|
|
177
|
+
* pointer to the console rather than the raw SDK error string, so an
|
|
178
|
+
* operator who sees this in logs / `auggy dev` output knows exactly where
|
|
179
|
+
* to go.
|
|
180
|
+
*
|
|
181
|
+
* Detected by structural shape (`status` field on the thrown object) rather
|
|
182
|
+
* than `instanceof Anthropic.APIError` — keeps the helper testable without
|
|
183
|
+
* coupling to the SDK's class hierarchy.
|
|
184
|
+
*/
|
|
185
|
+
function rewrapCostCapError(err: unknown): never {
|
|
186
|
+
if (err && typeof err === "object" && "status" in err) {
|
|
187
|
+
const status = (err as { status: unknown }).status;
|
|
188
|
+
const message = String((err as { message?: unknown }).message ?? "");
|
|
189
|
+
const lower = message.toLowerCase();
|
|
190
|
+
|
|
191
|
+
const isCostCap =
|
|
192
|
+
status === 402 ||
|
|
193
|
+
(status === 429 && /credit|spend|billing|limit|quota|cap|exceed|plan/.test(lower));
|
|
194
|
+
|
|
195
|
+
if (isCostCap) {
|
|
196
|
+
throw new Error(
|
|
197
|
+
`Anthropic provider spend cap reached (HTTP ${String(status)}). ` +
|
|
198
|
+
`Increase the cap or wait for reset in your Anthropic console at ` +
|
|
199
|
+
`https://console.anthropic.com/settings/limits. ` +
|
|
200
|
+
`(Original error: ${message})`,
|
|
201
|
+
);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
throw err;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// === AssembledPrompt → Anthropic request translation ===
|
|
208
|
+
|
|
209
|
+
function assembleSystemText(prompt: AssembledPrompt): string {
|
|
210
|
+
const parts: string[] = [];
|
|
211
|
+
|
|
212
|
+
if (prompt.systemBlocks.length > 0) {
|
|
213
|
+
parts.push(prompt.systemBlocks.join("\n\n"));
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// contextBlocks are preamble-placement blocks — content that should
|
|
217
|
+
// appear before the user message. Anthropic has no "between system
|
|
218
|
+
// and user" slot, so these fold into system.
|
|
219
|
+
if (prompt.contextBlocks.length > 0) {
|
|
220
|
+
parts.push(prompt.contextBlocks.join("\n\n"));
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// assistantPreamble is typically used for personality reinforcement.
|
|
224
|
+
// v1 puts it in system too rather than using Anthropic's assistant
|
|
225
|
+
// prefill (which would force the model to continue from that text
|
|
226
|
+
// instead of treating it as background).
|
|
227
|
+
if (prompt.assistantPreamble && prompt.assistantPreamble.length > 0) {
|
|
228
|
+
parts.push(prompt.assistantPreamble.join("\n\n"));
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return parts.join("\n\n");
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
type MessageParam = Anthropic.Messages.MessageParam;
|
|
235
|
+
type ContentBlockParam = Anthropic.Messages.ContentBlockParam;
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Walk Auggy's flat message list and produce Anthropic's grouped format.
|
|
239
|
+
* Auggy stores tool_use and tool_result as separate flat messages with
|
|
240
|
+
* matching toolCallIds. Anthropic wants:
|
|
241
|
+
* - tool_use blocks folded into the preceding assistant message
|
|
242
|
+
* - tool_result blocks folded into a user-role message
|
|
243
|
+
*
|
|
244
|
+
* Consecutive tool_results collapse into a single user message so the
|
|
245
|
+
* conversation alternates strictly between user and assistant roles.
|
|
246
|
+
*/
|
|
247
|
+
function convertMessages(messages: Message[]): MessageParam[] {
|
|
248
|
+
const result: MessageParam[] = [];
|
|
249
|
+
let i = 0;
|
|
250
|
+
|
|
251
|
+
while (i < messages.length) {
|
|
252
|
+
const msg = messages[i]!;
|
|
253
|
+
|
|
254
|
+
if (msg.role === "user") {
|
|
255
|
+
result.push({ role: "user", content: msg.content });
|
|
256
|
+
i++;
|
|
257
|
+
continue;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
if (msg.role === "assistant") {
|
|
261
|
+
const blocks: ContentBlockParam[] = [];
|
|
262
|
+
if (msg.content) {
|
|
263
|
+
blocks.push({ type: "text", text: msg.content });
|
|
264
|
+
}
|
|
265
|
+
i++;
|
|
266
|
+
// Gather any consecutive tool_use messages into the same assistant
|
|
267
|
+
// turn. Auggy emits them back-to-back after a text response.
|
|
268
|
+
while (i < messages.length && messages[i]!.role === "tool_use") {
|
|
269
|
+
const tu = messages[i]!;
|
|
270
|
+
const parsed = safeParseToolCall(tu.content);
|
|
271
|
+
if (parsed && tu.toolCallId) {
|
|
272
|
+
blocks.push({
|
|
273
|
+
type: "tool_use",
|
|
274
|
+
id: tu.toolCallId,
|
|
275
|
+
name: parsed.name,
|
|
276
|
+
input: parsed.arguments,
|
|
277
|
+
});
|
|
278
|
+
}
|
|
279
|
+
i++;
|
|
280
|
+
}
|
|
281
|
+
if (blocks.length === 0) continue; // Anthropic rejects empty assistant
|
|
282
|
+
result.push({ role: "assistant", content: blocks });
|
|
283
|
+
continue;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
if (msg.role === "tool_use") {
|
|
287
|
+
// Orphaned tool_use with no preceding assistant text — wrap alone.
|
|
288
|
+
const parsed = safeParseToolCall(msg.content);
|
|
289
|
+
if (parsed && msg.toolCallId) {
|
|
290
|
+
result.push({
|
|
291
|
+
role: "assistant",
|
|
292
|
+
content: [
|
|
293
|
+
{
|
|
294
|
+
type: "tool_use",
|
|
295
|
+
id: msg.toolCallId,
|
|
296
|
+
name: parsed.name,
|
|
297
|
+
input: parsed.arguments,
|
|
298
|
+
},
|
|
299
|
+
],
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
i++;
|
|
303
|
+
continue;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
if (msg.role === "tool_result") {
|
|
307
|
+
const blocks: ContentBlockParam[] = [];
|
|
308
|
+
while (i < messages.length && messages[i]!.role === "tool_result") {
|
|
309
|
+
const tr = messages[i]!;
|
|
310
|
+
if (tr.toolCallId) {
|
|
311
|
+
blocks.push({
|
|
312
|
+
type: "tool_result",
|
|
313
|
+
tool_use_id: tr.toolCallId,
|
|
314
|
+
content: tr.content,
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
i++;
|
|
318
|
+
}
|
|
319
|
+
if (blocks.length > 0) {
|
|
320
|
+
result.push({ role: "user", content: blocks });
|
|
321
|
+
}
|
|
322
|
+
continue;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Unknown role — skip defensively.
|
|
326
|
+
i++;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Coalesce pass: Anthropic requires strict user/assistant alternation.
|
|
330
|
+
// Consecutive same-role messages can appear when:
|
|
331
|
+
// - tool_result (mapped to user) is followed by the next turn's user message
|
|
332
|
+
// - Empty assistant content is skipped, producing adjacent user messages
|
|
333
|
+
// Merge consecutive same-role messages by combining their content blocks.
|
|
334
|
+
return coalesceMessages(result);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
function coalesceMessages(messages: MessageParam[]): MessageParam[] {
|
|
338
|
+
if (messages.length <= 1) return messages;
|
|
339
|
+
|
|
340
|
+
const coalesced: MessageParam[] = [messages[0]!];
|
|
341
|
+
|
|
342
|
+
for (let i = 1; i < messages.length; i++) {
|
|
343
|
+
const prev = coalesced[coalesced.length - 1]!;
|
|
344
|
+
const curr = messages[i]!;
|
|
345
|
+
|
|
346
|
+
if (prev.role === curr.role) {
|
|
347
|
+
// Merge: combine content into an array of content blocks
|
|
348
|
+
const prevBlocks = toContentBlocks(prev.content);
|
|
349
|
+
const currBlocks = toContentBlocks(curr.content);
|
|
350
|
+
(prev as { content: ContentBlockParam[] }).content = [...prevBlocks, ...currBlocks];
|
|
351
|
+
} else {
|
|
352
|
+
coalesced.push(curr);
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
return coalesced;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
function toContentBlocks(content: string | ContentBlockParam[]): ContentBlockParam[] {
|
|
360
|
+
if (typeof content === "string") {
|
|
361
|
+
return [{ type: "text", text: content }];
|
|
362
|
+
}
|
|
363
|
+
return content;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
function safeParseToolCall(
|
|
367
|
+
content: string,
|
|
368
|
+
): { name: string; arguments: Record<string, unknown> } | null {
|
|
369
|
+
try {
|
|
370
|
+
const parsed = JSON.parse(content) as {
|
|
371
|
+
name?: unknown;
|
|
372
|
+
arguments?: unknown;
|
|
373
|
+
};
|
|
374
|
+
if (
|
|
375
|
+
parsed &&
|
|
376
|
+
typeof parsed.name === "string" &&
|
|
377
|
+
parsed.arguments &&
|
|
378
|
+
typeof parsed.arguments === "object"
|
|
379
|
+
) {
|
|
380
|
+
return {
|
|
381
|
+
name: parsed.name,
|
|
382
|
+
arguments: parsed.arguments as Record<string, unknown>,
|
|
383
|
+
};
|
|
384
|
+
}
|
|
385
|
+
} catch {
|
|
386
|
+
/* fall through */
|
|
387
|
+
}
|
|
388
|
+
return null;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
type AnthropicTool = Anthropic.Messages.Tool;
|
|
392
|
+
type AnthropicInputSchema = Anthropic.Messages.Tool.InputSchema;
|
|
393
|
+
|
|
394
|
+
function convertTools(toolDefs: ToolDefinition[]): AnthropicTool[] {
|
|
395
|
+
return toolDefs.map((td) => ({
|
|
396
|
+
name: td.name,
|
|
397
|
+
description: td.description,
|
|
398
|
+
input_schema: normalizeSchema(td.inputSchema),
|
|
399
|
+
}));
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// JSON Schema keys Anthropic's API accepts for tool input schemas.
|
|
403
|
+
const ALLOWED_SCHEMA_KEYS = new Set([
|
|
404
|
+
"properties",
|
|
405
|
+
"required",
|
|
406
|
+
"description",
|
|
407
|
+
"enum",
|
|
408
|
+
"items",
|
|
409
|
+
"minItems",
|
|
410
|
+
"maxItems",
|
|
411
|
+
"minimum",
|
|
412
|
+
"maximum",
|
|
413
|
+
"pattern",
|
|
414
|
+
"format",
|
|
415
|
+
"default",
|
|
416
|
+
"anyOf",
|
|
417
|
+
"oneOf",
|
|
418
|
+
"allOf",
|
|
419
|
+
"not",
|
|
420
|
+
"additionalProperties",
|
|
421
|
+
]);
|
|
422
|
+
|
|
423
|
+
function normalizeSchema(schema: Record<string, unknown> | undefined): AnthropicInputSchema {
|
|
424
|
+
if (!schema || Object.keys(schema).length === 0) {
|
|
425
|
+
return { type: "object", properties: {} };
|
|
426
|
+
}
|
|
427
|
+
// Filter to known JSON Schema keys — strip $schema, $id, and other
|
|
428
|
+
// keys that Anthropic may reject or silently ignore.
|
|
429
|
+
const filtered: Record<string, unknown> = {};
|
|
430
|
+
for (const [key, value] of Object.entries(schema)) {
|
|
431
|
+
if (key !== "type" && ALLOWED_SCHEMA_KEYS.has(key)) {
|
|
432
|
+
filtered[key] = value;
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
return { type: "object", ...filtered } as AnthropicInputSchema;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
// === Anthropic response → ModelResponse translation ===
|
|
439
|
+
|
|
440
|
+
function buildModelResponse(response: Anthropic.Messages.Message): ModelResponse {
|
|
441
|
+
let content = "";
|
|
442
|
+
const toolCalls: {
|
|
443
|
+
name: string;
|
|
444
|
+
arguments: Record<string, unknown>;
|
|
445
|
+
}[] = [];
|
|
446
|
+
|
|
447
|
+
for (const block of response.content) {
|
|
448
|
+
if (block.type === "text") {
|
|
449
|
+
content += block.text;
|
|
450
|
+
} else if (block.type === "tool_use") {
|
|
451
|
+
// Validate input is a plain object — the model could hallucinate
|
|
452
|
+
// a non-object value which would break downstream JSON.stringify
|
|
453
|
+
const input = block.input;
|
|
454
|
+
const args =
|
|
455
|
+
input && typeof input === "object" && !Array.isArray(input)
|
|
456
|
+
? (input as Record<string, unknown>)
|
|
457
|
+
: {};
|
|
458
|
+
toolCalls.push({ name: block.name, arguments: args });
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
const finishReason: ModelResponse["finishReason"] =
|
|
463
|
+
response.stop_reason === "tool_use"
|
|
464
|
+
? "tool_use"
|
|
465
|
+
: response.stop_reason === "max_tokens"
|
|
466
|
+
? "max_tokens"
|
|
467
|
+
: "end_turn";
|
|
468
|
+
|
|
469
|
+
// Anthropic's SDK may return null or omit cache fields when caching isn't active.
|
|
470
|
+
// Map nullish values to undefined so ModelResponse consumers can rely on undefined-checking.
|
|
471
|
+
const cacheCreationTokens = response.usage.cache_creation_input_tokens ?? undefined;
|
|
472
|
+
const cacheReadTokens = response.usage.cache_read_input_tokens ?? undefined;
|
|
473
|
+
|
|
474
|
+
return {
|
|
475
|
+
content,
|
|
476
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
|
477
|
+
inputTokens: response.usage.input_tokens,
|
|
478
|
+
outputTokens: response.usage.output_tokens,
|
|
479
|
+
...(cacheCreationTokens !== undefined ? { cacheCreationTokens } : {}),
|
|
480
|
+
...(cacheReadTokens !== undefined ? { cacheReadTokens } : {}),
|
|
481
|
+
finishReason,
|
|
482
|
+
};
|
|
483
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import {
|
|
2
|
+
type Pricing,
|
|
3
|
+
type CostResult,
|
|
4
|
+
type PricingFreshness,
|
|
5
|
+
computeCostUsd,
|
|
6
|
+
freshness,
|
|
7
|
+
} from "../_shared/cost";
|
|
8
|
+
|
|
9
|
+
// USD per million tokens. Update via PR when OpenAI changes pricing.
|
|
10
|
+
const TABLE: Record<string, Pricing> = {
|
|
11
|
+
"gpt-5": { inputUsdPerMtok: 5.0, outputUsdPerMtok: 20.0 },
|
|
12
|
+
"gpt-5-mini": { inputUsdPerMtok: 1.0, outputUsdPerMtok: 4.0 },
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Enumerate the model IDs in the pricing table. Used by the model picker
|
|
17
|
+
* to derive UI choices without exposing the table's internal shape.
|
|
18
|
+
*/
|
|
19
|
+
export function listModels(): string[] {
|
|
20
|
+
return Object.keys(TABLE);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const VERIFIED_AT = "2026-04-27";
|
|
24
|
+
|
|
25
|
+
export function lookup(model: string): Pricing | null {
|
|
26
|
+
return TABLE[model] ?? null;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function getFreshness(): PricingFreshness {
|
|
30
|
+
return freshness(VERIFIED_AT);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface OpenAIUsage {
|
|
34
|
+
prompt_tokens: number;
|
|
35
|
+
completion_tokens: number;
|
|
36
|
+
cached_tokens?: number;
|
|
37
|
+
/** Reasoning tokens are billed at the output rate for o-series / gpt-5.1. */
|
|
38
|
+
reasoning_tokens?: number;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Price an OpenAI Chat Completions response.
|
|
43
|
+
*
|
|
44
|
+
* Reasoning tokens (o-series, gpt-5.1) are folded into outputTokens because
|
|
45
|
+
* they are billed at the output rate.
|
|
46
|
+
*
|
|
47
|
+
* Returns `{ priced: false, reason }` when the model is not in the table and
|
|
48
|
+
* no override is provided.
|
|
49
|
+
*/
|
|
50
|
+
export function priceOpenAIResponse(
|
|
51
|
+
model: string,
|
|
52
|
+
override: Pricing | undefined,
|
|
53
|
+
usage: OpenAIUsage,
|
|
54
|
+
): CostResult {
|
|
55
|
+
const rates = override ?? lookup(model);
|
|
56
|
+
if (!rates) {
|
|
57
|
+
return { priced: false, reason: `openai: no pricing entry for model "${model}"` };
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Reasoning tokens are billed as output for GPT-5/o-series.
|
|
61
|
+
const outputTokens = usage.completion_tokens + (usage.reasoning_tokens ?? 0);
|
|
62
|
+
const costUsd = computeCostUsd(rates, {
|
|
63
|
+
inputTokens: usage.prompt_tokens,
|
|
64
|
+
outputTokens,
|
|
65
|
+
});
|
|
66
|
+
return { priced: true, costUsd };
|
|
67
|
+
}
|