@circuitwall/jarela 0.14.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.next/standalone/.next/BUILD_ID +1 -1
- package/.next/standalone/.next/app-path-routes-manifest.json +1 -1
- package/.next/standalone/.next/build-manifest.json +2 -2
- package/.next/standalone/.next/prerender-manifest.json +3 -3
- package/.next/standalone/.next/server/app/_global-error/page_client-reference-manifest.js +1 -1
- package/.next/standalone/.next/server/app/_global-error.html +1 -1
- package/.next/standalone/.next/server/app/_global-error.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
- package/.next/standalone/.next/server/app/_not-found.html +2 -2
- package/.next/standalone/.next/server/app/_not-found.rsc +2 -2
- package/.next/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +2 -2
- package/.next/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +2 -2
- package/.next/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +2 -2
- package/.next/standalone/.next/server/app/api/v1/agents/[id]/route.js +6 -1
- package/.next/standalone/.next/server/app/api/v1/agents/[id]/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/agents/route.js +6 -1
- package/.next/standalone/.next/server/app/api/v1/agents/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/bridges/[id]/route.js +9 -1
- package/.next/standalone/.next/server/app/api/v1/bridges/[id]/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/bridges/route.js +9 -1
- package/.next/standalone/.next/server/app/api/v1/bridges/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/builtin-tools/route.js +36 -29
- package/.next/standalone/.next/server/app/api/v1/builtin-tools/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/events/route.js +7 -1
- package/.next/standalone/.next/server/app/api/v1/events/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/extensions/route.js +3 -3
- package/.next/standalone/.next/server/app/api/v1/extensions/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/extensions/tools/[name]/secrets/route.js +4 -4
- package/.next/standalone/.next/server/app/api/v1/extensions/tools/[name]/secrets/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/health/route.js +7 -1
- package/.next/standalone/.next/server/app/api/v1/health/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/mcp-servers/[name]/route.js +9 -1
- package/.next/standalone/.next/server/app/api/v1/mcp-servers/[name]/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/mcp-servers/route.js +9 -1
- package/.next/standalone/.next/server/app/api/v1/mcp-servers/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/models/route.js +6 -1
- package/.next/standalone/.next/server/app/api/v1/models/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/page-capture/route.js +7 -1
- package/.next/standalone/.next/server/app/api/v1/page-capture/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/pending-actions/[id]/approve/route.js +14 -7
- package/.next/standalone/.next/server/app/api/v1/pending-actions/[id]/approve/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/providers/[provider]/models/route.js +28 -0
- package/.next/standalone/.next/server/app/api/v1/providers/[provider]/models/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/providers/route.js +7 -1
- package/.next/standalone/.next/server/app/api/v1/providers/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/threads/[thread_id]/route.js +16 -2
- package/.next/standalone/.next/server/app/api/v1/threads/[thread_id]/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/threads/[thread_id]/run/route.js +8 -1
- package/.next/standalone/.next/server/app/api/v1/threads/[thread_id]/run/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/threads/route.js +6 -1
- package/.next/standalone/.next/server/app/api/v1/threads/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/tools/route.js +10 -3
- package/.next/standalone/.next/server/app/api/v1/tools/route.js.map +1 -1
- package/.next/standalone/.next/server/app/index.html +2 -2
- package/.next/standalone/.next/server/app/index.rsc +3 -3
- package/.next/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +2 -2
- package/.next/standalone/.next/server/app/index.segments/_full.segment.rsc +3 -3
- package/.next/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/index.segments/_index.segment.rsc +2 -2
- package/.next/standalone/.next/server/app/index.segments/_tree.segment.rsc +2 -2
- package/.next/standalone/.next/server/app/page.js +56 -0
- package/.next/standalone/.next/server/app/page.js.map +1 -1
- package/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -1
- package/.next/standalone/.next/server/app/setup/page_client-reference-manifest.js +1 -1
- package/.next/standalone/.next/server/app/setup.html +1 -1
- package/.next/standalone/.next/server/app/setup.rsc +2 -2
- package/.next/standalone/.next/server/app/setup.segments/_full.segment.rsc +2 -2
- package/.next/standalone/.next/server/app/setup.segments/_head.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/setup.segments/_index.segment.rsc +2 -2
- package/.next/standalone/.next/server/app/setup.segments/_tree.segment.rsc +2 -2
- package/.next/standalone/.next/server/app/setup.segments/setup/__PAGE__.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/setup.segments/setup.segment.rsc +1 -1
- package/.next/standalone/.next/server/app-paths-manifest.json +1 -1
- package/.next/standalone/.next/server/chunks/1683.js +2 -2
- package/.next/standalone/.next/server/chunks/2082.js +122 -13
- package/.next/standalone/.next/server/chunks/2082.js.map +1 -1
- package/.next/standalone/.next/server/chunks/210.js +3 -3
- package/.next/standalone/.next/server/chunks/210.js.map +1 -1
- package/.next/standalone/.next/server/chunks/239.js +1902 -1487
- package/.next/standalone/.next/server/chunks/239.js.map +1 -1
- package/.next/standalone/.next/server/chunks/2447.js +9 -1
- package/.next/standalone/.next/server/chunks/2447.js.map +1 -1
- package/.next/standalone/.next/server/chunks/423.js +125 -16
- package/.next/standalone/.next/server/chunks/423.js.map +1 -1
- package/.next/standalone/.next/server/chunks/4631.js +36 -29
- package/.next/standalone/.next/server/chunks/4631.js.map +1 -1
- package/.next/standalone/.next/server/chunks/5937.js +3 -2
- package/.next/standalone/.next/server/chunks/5937.js.map +1 -1
- package/.next/standalone/.next/server/chunks/{947.js → 8866.js} +11321 -10883
- package/.next/standalone/.next/server/chunks/8866.js.map +1 -0
- package/.next/standalone/.next/server/chunks/9032.js +3 -3
- package/.next/standalone/.next/server/chunks/9032.js.map +1 -1
- package/.next/standalone/.next/server/middleware-build-manifest.js +2 -2
- package/.next/standalone/.next/server/middleware.js +122 -13
- package/.next/standalone/.next/server/pages/404.html +2 -2
- package/.next/standalone/.next/server/pages/500.html +1 -1
- package/.next/standalone/.next/server/proxy.js.map +1 -1
- package/.next/standalone/.next/server/server-reference-manifest.json +1 -1
- package/.next/standalone/.next/static/chunks/app/{page-473b39ec30c7f569.js → page-a7cae65f235e2942.js} +57 -1
- package/.next/standalone/.next/static/chunks/app/page-a7cae65f235e2942.js.map +1 -0
- package/.next/standalone/.next/static/css/{6f8b1a84bcbcd467.css → e57bdbbbb5a05779.css} +2 -2
- package/.next/standalone/.next/static/css/e57bdbbbb5a05779.css.map +1 -0
- package/.next/standalone/package.json +9 -1
- package/CHANGELOG.md +90 -0
- package/README.md +30 -2
- package/api/types.ts +8 -0
- package/app/api/v1/agents/[id]/route.ts +7 -0
- package/app/api/v1/agents/route.ts +7 -0
- package/app/api/v1/events/route.ts +8 -0
- package/app/api/v1/extensions/route.ts +2 -2
- package/app/api/v1/extensions/tools/[name]/secrets/route.ts +3 -3
- package/app/api/v1/health/route.ts +8 -0
- package/app/api/v1/models/route.ts +7 -0
- package/app/api/v1/page-capture/route.ts +8 -0
- package/app/api/v1/providers/route.ts +8 -0
- package/app/api/v1/threads/[thread_id]/route.ts +8 -0
- package/app/api/v1/threads/[thread_id]/run/route.ts +9 -0
- package/app/api/v1/threads/route.ts +7 -0
- package/app/api/v1/tools/route.ts +9 -0
- package/components/chat/ContextUsageBar.tsx +44 -0
- package/lib/agents/llm.ts +25 -2
- package/lib/agents/run-thread.ts +13 -1
- package/lib/agents/stream-collector.ts +9 -1
- package/lib/api/serializers.test.ts +15 -0
- package/lib/api/serializers.ts +8 -0
- package/lib/db/migrations.ts +15 -0
- package/lib/health/runner.test.ts +24 -2
- package/lib/mcp/registry.ts +14 -6
- package/lib/providers/anthropic.test.ts +95 -0
- package/lib/providers/anthropic.ts +106 -10
- package/lib/providers/jarela-chat-model.ts +9 -1
- package/lib/providers/known-context-windows.ts +21 -0
- package/lib/providers/types.ts +21 -1
- package/lib/stores/message-usage.test.ts +34 -0
- package/lib/stores/message-usage.ts +15 -3
- package/lib/stores/pricing.test.ts +52 -0
- package/lib/stores/pricing.ts +26 -1
- package/lib/tools/builtins.ts +4 -0
- package/lib/tools/extension-surfaces.test.ts +79 -0
- package/lib/tools/extension-surfaces.ts +153 -0
- package/lib/tools/index.ts +27 -8
- package/lib/tools/list-tools.test.ts +76 -0
- package/lib/tools/list-tools.ts +84 -0
- package/lib/tools/mcp-servers-info.test.ts +73 -0
- package/lib/tools/mcp-servers-info.ts +71 -0
- package/lib/tools/providers-info.test.ts +73 -0
- package/lib/tools/providers-info.ts +106 -0
- package/lib/tools/registry.ts +36 -25
- package/lib/tools/types.ts +13 -0
- package/package.json +9 -1
- package/.next/standalone/.next/server/chunks/947.js.map +0 -1
- package/.next/standalone/.next/static/chunks/app/page-473b39ec30c7f569.js.map +0 -1
- package/.next/standalone/.next/static/css/6f8b1a84bcbcd467.css.map +0 -1
- /package/.next/standalone/.next/static/{T0p2VVPsJPj44rwbmjaFb → d_vhp-lJqfdjRFpnLVIqZ}/_buildManifest.js +0 -0
- /package/.next/standalone/.next/static/{T0p2VVPsJPj44rwbmjaFb → d_vhp-lJqfdjRFpnLVIqZ}/_ssgManifest.js +0 -0
|
@@ -53,6 +53,80 @@ function appendServerTools(
|
|
|
53
53
|
return merged;
|
|
54
54
|
}
|
|
55
55
|
|
|
56
|
+
// Anthropic prompt-caching breakpoints. Within a multi-tool ReAct turn the
|
|
57
|
+
// system prompt + tools are stable across every LLM call, and tool_results
|
|
58
|
+
// only grow at the tail — exactly the prefix Anthropic's ephemeral cache is
|
|
59
|
+
// built for. We mark three breakpoints (system, last tool, last tool_result)
|
|
60
|
+
// so calls 2..N read the prefix at ~10% the input rate. The prefix below the
|
|
61
|
+
// minimum cacheable size is silently ignored by the API at no extra cost,
|
|
62
|
+
// so it is safe to mark unconditionally.
|
|
63
|
+
const EPHEMERAL: Anthropic.CacheControlEphemeral = { type: "ephemeral" };
|
|
64
|
+
|
|
65
|
+
export function withSystemCacheControl(text: string): Anthropic.TextBlockParam[] | undefined {
|
|
66
|
+
if (!text) return undefined;
|
|
67
|
+
return [{ type: "text", text, cache_control: EPHEMERAL }];
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export function withToolsCacheControl(tools: Anthropic.Tool[]): Anthropic.Tool[] {
|
|
71
|
+
if (tools.length === 0) return tools;
|
|
72
|
+
const last = tools[tools.length - 1];
|
|
73
|
+
return [...tools.slice(0, -1), { ...last, cache_control: EPHEMERAL }];
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export function withLastToolResultCacheControl(
|
|
77
|
+
messages: Anthropic.MessageParam[],
|
|
78
|
+
): Anthropic.MessageParam[] {
|
|
79
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
80
|
+
const m = messages[i];
|
|
81
|
+
if (typeof m.content === "string") continue;
|
|
82
|
+
const blocks = m.content;
|
|
83
|
+
for (let j = blocks.length - 1; j >= 0; j--) {
|
|
84
|
+
const b = blocks[j];
|
|
85
|
+
if (b.type === "tool_result") {
|
|
86
|
+
const newBlocks = [...blocks];
|
|
87
|
+
newBlocks[j] = { ...b, cache_control: EPHEMERAL };
|
|
88
|
+
const next = [...messages];
|
|
89
|
+
next[i] = { ...m, content: newBlocks };
|
|
90
|
+
return next;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return messages;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
interface AnthropicMessageStartEvent {
|
|
98
|
+
type: "message_start";
|
|
99
|
+
message: { usage?: Anthropic.Usage };
|
|
100
|
+
}
|
|
101
|
+
interface AnthropicMessageDeltaEvent {
|
|
102
|
+
type: "message_delta";
|
|
103
|
+
usage?: Anthropic.MessageDeltaUsage;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function usageEventFromStart(usage: Anthropic.Usage | undefined): ProviderStreamEvent | null {
|
|
107
|
+
if (!usage) return null;
|
|
108
|
+
return {
|
|
109
|
+
type: "usage",
|
|
110
|
+
input_tokens: usage.input_tokens ?? 0,
|
|
111
|
+
output_tokens: usage.output_tokens ?? 0,
|
|
112
|
+
cache_creation_input_tokens: usage.cache_creation_input_tokens ?? 0,
|
|
113
|
+
cache_read_input_tokens: usage.cache_read_input_tokens ?? 0,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function usageEventFromDelta(usage: Anthropic.MessageDeltaUsage | undefined): ProviderStreamEvent | null {
|
|
118
|
+
if (!usage) return null;
|
|
119
|
+
// message_delta only carries the *final* output_tokens; input/cache fields
|
|
120
|
+
// are already accounted for from message_start. Emitting just the output
|
|
121
|
+
// delta here keeps the agent loop's running total accurate without
|
|
122
|
+
// double-counting cache reads.
|
|
123
|
+
return {
|
|
124
|
+
type: "usage",
|
|
125
|
+
input_tokens: 0,
|
|
126
|
+
output_tokens: usage.output_tokens ?? 0,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
56
130
|
export const anthropicProvider: ModelProvider = {
|
|
57
131
|
name: "anthropic",
|
|
58
132
|
|
|
@@ -80,7 +154,7 @@ export const anthropicProvider: ModelProvider = {
|
|
|
80
154
|
const stream = await client.messages.stream({
|
|
81
155
|
model: model_id,
|
|
82
156
|
max_tokens: params.max_tokens ?? 4096,
|
|
83
|
-
system: systemText
|
|
157
|
+
system: withSystemCacheControl(systemText),
|
|
84
158
|
messages: userMessages,
|
|
85
159
|
});
|
|
86
160
|
|
|
@@ -103,13 +177,18 @@ export const anthropicProvider: ModelProvider = {
|
|
|
103
177
|
});
|
|
104
178
|
|
|
105
179
|
const systemMsg = messages.find((m) => m.role === "system");
|
|
106
|
-
const
|
|
107
|
-
const
|
|
180
|
+
const systemText = typeof systemMsg?.content === "string" ? systemMsg.content : "";
|
|
181
|
+
const msgList = withLastToolResultCacheControl(
|
|
182
|
+
toAnthropicMessages(messages.filter((m) => m.role !== "system")),
|
|
183
|
+
);
|
|
184
|
+
const anthropicTools = withToolsCacheControl(
|
|
185
|
+
appendServerTools(toAnthropicTools(tools), params),
|
|
186
|
+
);
|
|
108
187
|
|
|
109
188
|
const resp = await client.messages.create({
|
|
110
189
|
model: model_id,
|
|
111
190
|
max_tokens: params.max_tokens ?? 4096,
|
|
112
|
-
system:
|
|
191
|
+
system: withSystemCacheControl(systemText),
|
|
113
192
|
messages: msgList,
|
|
114
193
|
tools: anthropicTools,
|
|
115
194
|
...(params.thinking ? { thinking: params.thinking } : {}),
|
|
@@ -141,8 +220,13 @@ export const anthropicProvider: ModelProvider = {
|
|
|
141
220
|
});
|
|
142
221
|
|
|
143
222
|
const systemMsg = messages.find((m) => m.role === "system");
|
|
144
|
-
const
|
|
145
|
-
const
|
|
223
|
+
const systemText = typeof systemMsg?.content === "string" ? systemMsg.content : "";
|
|
224
|
+
const msgList = withLastToolResultCacheControl(
|
|
225
|
+
toAnthropicMessages(messages.filter((m) => m.role !== "system")),
|
|
226
|
+
);
|
|
227
|
+
const anthropicTools = withToolsCacheControl(
|
|
228
|
+
appendServerTools(toAnthropicTools(tools), params),
|
|
229
|
+
);
|
|
146
230
|
|
|
147
231
|
const body: Anthropic.Messages.MessageStreamParams = {
|
|
148
232
|
model: model_id,
|
|
@@ -150,7 +234,8 @@ export const anthropicProvider: ModelProvider = {
|
|
|
150
234
|
messages: msgList,
|
|
151
235
|
...(pickAnthropicOptions(params) as Record<string, unknown>),
|
|
152
236
|
};
|
|
153
|
-
|
|
237
|
+
const systemParam = withSystemCacheControl(systemText);
|
|
238
|
+
if (systemParam) body.system = systemParam;
|
|
154
239
|
if (anthropicTools.length > 0) body.tools = anthropicTools;
|
|
155
240
|
if (params.thinking) {
|
|
156
241
|
(body as unknown as Record<string, unknown>).thinking = params.thinking;
|
|
@@ -161,6 +246,12 @@ export const anthropicProvider: ModelProvider = {
|
|
|
161
246
|
const blockType = new Map<number, "text" | "thinking" | "tool_use">();
|
|
162
247
|
|
|
163
248
|
for await (const event of stream) {
|
|
249
|
+
if (event.type === "message_start") {
|
|
250
|
+
const ev = event as unknown as AnthropicMessageStartEvent;
|
|
251
|
+
const u = usageEventFromStart(ev.message?.usage);
|
|
252
|
+
if (u) yield u;
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
164
255
|
if (event.type === "content_block_start") {
|
|
165
256
|
const cb = event.content_block;
|
|
166
257
|
if (cb.type === "tool_use") {
|
|
@@ -180,9 +271,14 @@ export const anthropicProvider: ModelProvider = {
|
|
|
180
271
|
} else if (d.type === "input_json_delta" && d.partial_json !== undefined) {
|
|
181
272
|
yield { type: "tool_call_chunk", index: event.index, args_delta: d.partial_json };
|
|
182
273
|
}
|
|
183
|
-
} else if (event.type === "message_delta"
|
|
184
|
-
const
|
|
185
|
-
|
|
274
|
+
} else if (event.type === "message_delta") {
|
|
275
|
+
const delta = event as unknown as AnthropicMessageDeltaEvent;
|
|
276
|
+
const u = usageEventFromDelta(delta.usage);
|
|
277
|
+
if (u) yield u;
|
|
278
|
+
if (event.delta?.stop_reason) {
|
|
279
|
+
const reason = event.delta.stop_reason;
|
|
280
|
+
yield { type: "stop", reason: reason === "tool_use" ? "tool_use" : reason === "max_tokens" ? "length" : "stop" };
|
|
281
|
+
}
|
|
186
282
|
}
|
|
187
283
|
}
|
|
188
284
|
})();
|
|
@@ -203,8 +203,13 @@ export class JarelaChatModel extends BaseChatModel {
|
|
|
203
203
|
} else if (event.type === "usage") {
|
|
204
204
|
// ADR-0041: surface real provider token counts on the final
|
|
205
205
|
// AIMessageChunk via LangChain's standard `usage_metadata` field so
|
|
206
|
-
// the agent loop can snapshot them into message_usage.
|
|
206
|
+
// the agent loop can snapshot them into message_usage. PR #181 added
|
|
207
|
+
// Anthropic prompt caching; carry the cache breakdown through
|
|
208
|
+
// `input_token_details` (LangChain's standard channel) so cost
|
|
209
|
+
// attribution downstream can apply the 1.25× / 0.1× rates.
|
|
207
210
|
emittedAny = true;
|
|
211
|
+
const cacheCreation = event.cache_creation_input_tokens ?? 0;
|
|
212
|
+
const cacheRead = event.cache_read_input_tokens ?? 0;
|
|
208
213
|
yield new ChatGenerationChunk({
|
|
209
214
|
message: new AIMessageChunk({
|
|
210
215
|
content: "",
|
|
@@ -213,6 +218,9 @@ export class JarelaChatModel extends BaseChatModel {
|
|
|
213
218
|
output_tokens: event.output_tokens ?? 0,
|
|
214
219
|
total_tokens: event.total_tokens
|
|
215
220
|
?? (event.input_tokens ?? 0) + (event.output_tokens ?? 0),
|
|
221
|
+
...(cacheCreation > 0 || cacheRead > 0
|
|
222
|
+
? { input_token_details: { cache_creation: cacheCreation, cache_read: cacheRead } }
|
|
223
|
+
: {}),
|
|
216
224
|
},
|
|
217
225
|
}),
|
|
218
226
|
text: "",
|
|
@@ -119,3 +119,24 @@ export function getKnownContextLength(provider: string, model_id: string): numbe
|
|
|
119
119
|
export function getKnownMaxOutputTokens(provider: string, model_id: string): number | null {
|
|
120
120
|
return getKnownModelLimits(provider, model_id)?.max_output_tokens ?? null;
|
|
121
121
|
}
|
|
122
|
+
|
|
123
|
+
// Flat catalog snapshot for one provider — used by introspection tools so
|
|
124
|
+
// the agent can enumerate what's known statically. Returns [] for providers
|
|
125
|
+
// without a static table (e.g. `langchain`, `mock`, externals).
|
|
126
|
+
export function listKnownModels(
|
|
127
|
+
provider: string,
|
|
128
|
+
): Array<{ model_id: string; context_length: number; max_output_tokens: number | null }> {
|
|
129
|
+
let table: Record<string, KnownModelLimits> | null = null;
|
|
130
|
+
switch (provider) {
|
|
131
|
+
case "anthropic": table = ANTHROPIC; break;
|
|
132
|
+
case "gemini": table = GEMINI; break;
|
|
133
|
+
case "openai": table = OPENAI; break;
|
|
134
|
+
case "deepseek": table = DEEPSEEK; break;
|
|
135
|
+
default: return [];
|
|
136
|
+
}
|
|
137
|
+
return Object.entries(table).map(([model_id, l]) => ({
|
|
138
|
+
model_id,
|
|
139
|
+
context_length: l.context_length,
|
|
140
|
+
max_output_tokens: l.max_output_tokens ?? null,
|
|
141
|
+
}));
|
|
142
|
+
}
|
package/lib/providers/types.ts
CHANGED
|
@@ -1,3 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @public
|
|
3
|
+
*
|
|
4
|
+
* Public LLM-provider extension contract.
|
|
5
|
+
*
|
|
6
|
+
* Every type and interface in this file is part of the package's
|
|
7
|
+
* stable public surface (per `package.json#exports`). External provider
|
|
8
|
+
* adapters — both in-tree and `~/.jarela/providers/*.cjs` plugins —
|
|
9
|
+
* conform to {@link ModelProvider}. Removing or breaking any export
|
|
10
|
+
* here counts as a breaking change under the deprecation policy in
|
|
11
|
+
* CONTRIBUTING.md.
|
|
12
|
+
*/
|
|
13
|
+
|
|
1
14
|
import type { ContentPart, InvokeMessage, InvokeResult, OpenAITool } from "@/lib/tools/types";
|
|
2
15
|
export type { InvokeMessage, InvokeResult, OpenAITool };
|
|
3
16
|
|
|
@@ -44,7 +57,14 @@ export type ProviderStreamEvent =
|
|
|
44
57
|
| { type: "thinking"; delta: string }
|
|
45
58
|
| { type: "tool_call_chunk"; index: number; id?: string; name?: string; args_delta?: string }
|
|
46
59
|
| { type: "citation"; source?: string; snippet?: string; url?: string }
|
|
47
|
-
| {
|
|
60
|
+
| {
|
|
61
|
+
type: "usage";
|
|
62
|
+
input_tokens?: number;
|
|
63
|
+
output_tokens?: number;
|
|
64
|
+
total_tokens?: number;
|
|
65
|
+
cache_creation_input_tokens?: number;
|
|
66
|
+
cache_read_input_tokens?: number;
|
|
67
|
+
}
|
|
48
68
|
| { type: "audio_chunk"; mime_type: string; data_b64: string }
|
|
49
69
|
| { type: "provider_event"; name: string; payload: unknown }
|
|
50
70
|
| { type: "stop"; reason: "stop" | "tool_use" | "length" };
|
|
@@ -231,4 +231,38 @@ describe("message_usage snapshot store (ADR-0041)", () => {
|
|
|
231
231
|
const map = getMessageUsageByIds([]);
|
|
232
232
|
expect(map.size).toBe(0);
|
|
233
233
|
});
|
|
234
|
+
|
|
235
|
+
it("persists Anthropic cache_creation/cache_read token counts (PR #181 follow-up)", () => {
|
|
236
|
+
recordMessageUsage({
|
|
237
|
+
message_id: "m-cache",
|
|
238
|
+
thread_id: "t-cache",
|
|
239
|
+
agent_id: "a", agent_name: "A",
|
|
240
|
+
provider: "anthropic", model_id: "claude-sonnet-4", model_config_name: null,
|
|
241
|
+
input_tokens: 1200,
|
|
242
|
+
output_tokens: 350,
|
|
243
|
+
cache_creation_input_tokens: 4000,
|
|
244
|
+
cache_read_input_tokens: 80_000,
|
|
245
|
+
input_rate_usd_per_mtok: 3,
|
|
246
|
+
output_rate_usd_per_mtok: 15,
|
|
247
|
+
cost_usd: 0.04,
|
|
248
|
+
});
|
|
249
|
+
const row = getMessageUsage("m-cache");
|
|
250
|
+
expect(row?.cache_creation_input_tokens).toBe(4000);
|
|
251
|
+
expect(row?.cache_read_input_tokens).toBe(80_000);
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
it("stores NULL cache columns for legacy rows that omit them", () => {
|
|
255
|
+
recordMessageUsage({
|
|
256
|
+
message_id: "m-no-cache",
|
|
257
|
+
thread_id: "t-no-cache",
|
|
258
|
+
agent_id: "a", agent_name: "A",
|
|
259
|
+
provider: "openai", model_id: "gpt-5", model_config_name: null,
|
|
260
|
+
input_tokens: 10, output_tokens: 20,
|
|
261
|
+
input_rate_usd_per_mtok: null, output_rate_usd_per_mtok: null,
|
|
262
|
+
cost_usd: 0,
|
|
263
|
+
});
|
|
264
|
+
const row = getMessageUsage("m-no-cache");
|
|
265
|
+
expect(row?.cache_creation_input_tokens).toBeNull();
|
|
266
|
+
expect(row?.cache_read_input_tokens).toBeNull();
|
|
267
|
+
});
|
|
234
268
|
});
|
|
@@ -24,6 +24,13 @@ export interface MessageUsageInput {
|
|
|
24
24
|
// assembly. NULL/undefined when unknown (very old assistant turns
|
|
25
25
|
// persisted before the breakdown was wired up, or non-LLM persists).
|
|
26
26
|
tier_usage?: TierUsage | null;
|
|
27
|
+
// Anthropic prompt caching (PR #181). Both fields are disjoint from
|
|
28
|
+
// `input_tokens`: total billable input tokens =
|
|
29
|
+
// input_tokens + cache_creation_input_tokens + cache_read_input_tokens
|
|
30
|
+
// priced at 1×, 1.25×, and 0.1× the input rate respectively. NULL/zero
|
|
31
|
+
// for providers that don't expose cache counts.
|
|
32
|
+
cache_creation_input_tokens?: number | null;
|
|
33
|
+
cache_read_input_tokens?: number | null;
|
|
27
34
|
}
|
|
28
35
|
|
|
29
36
|
export interface TierUsage {
|
|
@@ -37,7 +44,7 @@ export interface TierUsage {
|
|
|
37
44
|
context_window_tokens: number;
|
|
38
45
|
}
|
|
39
46
|
|
|
40
|
-
export interface MessageUsageRow extends Omit<MessageUsageInput, "tier_usage"> {
|
|
47
|
+
export interface MessageUsageRow extends Omit<MessageUsageInput, "tier_usage" | "cache_creation_input_tokens" | "cache_read_input_tokens"> {
|
|
41
48
|
created_at: string;
|
|
42
49
|
hot_tokens: number | null;
|
|
43
50
|
warm_tokens: number | null;
|
|
@@ -47,6 +54,8 @@ export interface MessageUsageRow extends Omit<MessageUsageInput, "tier_usage"> {
|
|
|
47
54
|
warm_budget_tokens: number | null;
|
|
48
55
|
facts_budget_tokens: number | null;
|
|
49
56
|
context_window_tokens: number | null;
|
|
57
|
+
cache_creation_input_tokens: number | null;
|
|
58
|
+
cache_read_input_tokens: number | null;
|
|
50
59
|
}
|
|
51
60
|
|
|
52
61
|
export function recordMessageUsage(input: MessageUsageInput): void {
|
|
@@ -58,8 +67,9 @@ export function recordMessageUsage(input: MessageUsageInput): void {
|
|
|
58
67
|
model_config_name, input_tokens, output_tokens,
|
|
59
68
|
input_rate_usd_per_mtok, output_rate_usd_per_mtok, cost_usd, created_at,
|
|
60
69
|
hot_tokens, warm_tokens, facts_tokens, overhead_tokens,
|
|
61
|
-
hot_budget_tokens, warm_budget_tokens, facts_budget_tokens, context_window_tokens
|
|
62
|
-
|
|
70
|
+
hot_budget_tokens, warm_budget_tokens, facts_budget_tokens, context_window_tokens,
|
|
71
|
+
cache_creation_input_tokens, cache_read_input_tokens
|
|
72
|
+
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)`,
|
|
63
73
|
).run(
|
|
64
74
|
input.message_id,
|
|
65
75
|
input.thread_id,
|
|
@@ -82,6 +92,8 @@ export function recordMessageUsage(input: MessageUsageInput): void {
|
|
|
82
92
|
t?.warm_budget_tokens ?? null,
|
|
83
93
|
t?.facts_budget_tokens ?? null,
|
|
84
94
|
t?.context_window_tokens ?? null,
|
|
95
|
+
input.cache_creation_input_tokens ?? null,
|
|
96
|
+
input.cache_read_input_tokens ?? null,
|
|
85
97
|
);
|
|
86
98
|
}
|
|
87
99
|
|
|
@@ -80,6 +80,58 @@ describe("estimateCostUsd", () => {
|
|
|
80
80
|
it("scales sub-million token counts proportionally", () => {
|
|
81
81
|
expect(estimateCostUsd(500_000, 100_000, { inputPer1M: 2, outputPer1M: 10 })).toBeCloseTo(2, 6);
|
|
82
82
|
});
|
|
83
|
+
|
|
84
|
+
describe("anthropic prompt-cache pricing", () => {
|
|
85
|
+
const rates = { inputPer1M: 3, outputPer1M: 15 };
|
|
86
|
+
|
|
87
|
+
it("ignores cache breakdown when not provided", () => {
|
|
88
|
+
// Sanity: existing call signature unchanged.
|
|
89
|
+
expect(estimateCostUsd(1_000_000, 0, rates)).toBe(3);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it("prices cache writes at 1.25× the input rate", () => {
|
|
93
|
+
// 1M cache_creation tokens × $3/M × 1.25 = $3.75
|
|
94
|
+
expect(
|
|
95
|
+
estimateCostUsd(0, 0, rates, { cache_creation_input_tokens: 1_000_000 }),
|
|
96
|
+
).toBeCloseTo(3.75, 6);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it("prices cache reads at 0.1× the input rate", () => {
|
|
100
|
+
// 1M cache_read tokens × $3/M × 0.1 = $0.30
|
|
101
|
+
expect(
|
|
102
|
+
estimateCostUsd(0, 0, rates, { cache_read_input_tokens: 1_000_000 }),
|
|
103
|
+
).toBeCloseTo(0.3, 6);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
it("sums fresh + cache_creation + cache_read + output (Anthropic-style turn)", () => {
|
|
107
|
+
// 100k fresh input ($0.30) + 50k cache_creation ($0.1875)
|
|
108
|
+
// + 800k cache_read ($0.24) + 20k output ($0.30) = $1.0275
|
|
109
|
+
const cost = estimateCostUsd(100_000, 20_000, rates, {
|
|
110
|
+
cache_creation_input_tokens: 50_000,
|
|
111
|
+
cache_read_input_tokens: 800_000,
|
|
112
|
+
});
|
|
113
|
+
expect(cost).toBeCloseTo(0.3 + 0.1875 + 0.24 + 0.3, 6);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it("does not double-bill cache when input rate is null", () => {
|
|
117
|
+
// No input rate → cache multipliers have nothing to multiply against.
|
|
118
|
+
expect(
|
|
119
|
+
estimateCostUsd(0, 100_000, { inputPer1M: null, outputPer1M: 5 }, {
|
|
120
|
+
cache_creation_input_tokens: 1_000_000,
|
|
121
|
+
cache_read_input_tokens: 1_000_000,
|
|
122
|
+
}),
|
|
123
|
+
).toBeCloseTo(0.5, 6);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
it("treats nullish cache fields as zero", () => {
|
|
127
|
+
expect(
|
|
128
|
+
estimateCostUsd(0, 0, rates, {
|
|
129
|
+
cache_creation_input_tokens: null,
|
|
130
|
+
cache_read_input_tokens: undefined,
|
|
131
|
+
}),
|
|
132
|
+
).toBe(0);
|
|
133
|
+
});
|
|
134
|
+
});
|
|
83
135
|
});
|
|
84
136
|
|
|
85
137
|
describe("getPricingTables", () => {
|
package/lib/stores/pricing.ts
CHANGED
|
@@ -317,15 +317,40 @@ function inferRatesFromSignals(signals: string[]): {
|
|
|
317
317
|
};
|
|
318
318
|
}
|
|
319
319
|
|
|
320
|
+
// Anthropic prompt-cache multipliers, applied against the standard input
|
|
321
|
+
// rate. Cache writes are billed at 1.25× input ("cache create"); cache
|
|
322
|
+
// reads are billed at 0.1× input ("cache hit"). Source:
|
|
323
|
+
// https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#pricing
|
|
324
|
+
// We apply the same multipliers to other providers that publish a
|
|
325
|
+
// cache-token breakdown — OpenAI's prompt caching, for example, also
|
|
326
|
+
// quotes a 0.5× read multiplier, but its API surfaces only `cached_tokens`
|
|
327
|
+
// (no separate write count) and a future PR can split that out. For now
|
|
328
|
+
// any provider that emits both fields will be priced as above.
|
|
329
|
+
export const CACHE_CREATION_INPUT_RATE_MULTIPLIER = 1.25;
|
|
330
|
+
export const CACHE_READ_INPUT_RATE_MULTIPLIER = 0.1;
|
|
331
|
+
|
|
332
|
+
export interface CacheTokenBreakdown {
|
|
333
|
+
cache_creation_input_tokens?: number | null;
|
|
334
|
+
cache_read_input_tokens?: number | null;
|
|
335
|
+
}
|
|
336
|
+
|
|
320
337
|
export function estimateCostUsd(
|
|
321
338
|
inputTokens: number,
|
|
322
339
|
outputTokens: number,
|
|
323
340
|
rates: Pick<ProviderRates, "inputPer1M" | "outputPer1M">,
|
|
341
|
+
cache?: CacheTokenBreakdown | null,
|
|
324
342
|
): number {
|
|
325
343
|
const inputRate = rates.inputPer1M;
|
|
326
344
|
const outputRate = rates.outputPer1M;
|
|
327
345
|
if (inputRate == null && outputRate == null) return 0;
|
|
328
346
|
const inCost = inputRate == null ? 0 : (inputTokens / 1_000_000) * inputRate;
|
|
329
347
|
const outCost = outputRate == null ? 0 : (outputTokens / 1_000_000) * outputRate;
|
|
330
|
-
|
|
348
|
+
let cacheCost = 0;
|
|
349
|
+
if (cache && inputRate != null) {
|
|
350
|
+
const create = cache.cache_creation_input_tokens ?? 0;
|
|
351
|
+
const read = cache.cache_read_input_tokens ?? 0;
|
|
352
|
+
if (create > 0) cacheCost += (create / 1_000_000) * inputRate * CACHE_CREATION_INPUT_RATE_MULTIPLIER;
|
|
353
|
+
if (read > 0) cacheCost += (read / 1_000_000) * inputRate * CACHE_READ_INPUT_RATE_MULTIPLIER;
|
|
354
|
+
}
|
|
355
|
+
return inCost + outCost + cacheCost;
|
|
331
356
|
}
|
package/lib/tools/builtins.ts
CHANGED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { describe, it, expect, afterAll } from "vitest";
|
|
2
|
+
import { mkdtempSync, rmSync } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
|
|
6
|
+
const tmpRoot = mkdtempSync(join(tmpdir(), "jarela-test-extension-surfaces-"));
|
|
7
|
+
process.env.HOME = tmpRoot;
|
|
8
|
+
process.env.USERPROFILE = tmpRoot;
|
|
9
|
+
process.env.JARELA_DB_DIR = join(tmpRoot, ".jarela-dbdir");
|
|
10
|
+
afterAll(() => {
|
|
11
|
+
try { rmSync(tmpRoot, { recursive: true, force: true }); } catch {}
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
const { describeExtensionSurfacesTool } = await import("./extension-surfaces");
|
|
15
|
+
|
|
16
|
+
interface Surface {
|
|
17
|
+
id: string;
|
|
18
|
+
name: string;
|
|
19
|
+
summary: string;
|
|
20
|
+
registration_entrypoint: string;
|
|
21
|
+
doc_section: string;
|
|
22
|
+
example_path?: string;
|
|
23
|
+
introspection_tool?: string;
|
|
24
|
+
related_adrs: string[];
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
interface Result {
|
|
28
|
+
surfaces: Surface[];
|
|
29
|
+
count: number;
|
|
30
|
+
guide_path: string;
|
|
31
|
+
contract_paths: string[];
|
|
32
|
+
notes: string[];
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
describe("describe_extension_surfaces", () => {
|
|
36
|
+
let out: Result;
|
|
37
|
+
|
|
38
|
+
it("returns the curated catalog with all required fields", async () => {
|
|
39
|
+
out = JSON.parse(await describeExtensionSurfacesTool.invoke({})) as Result;
|
|
40
|
+
expect(out.count).toBe(out.surfaces.length);
|
|
41
|
+
expect(out.guide_path).toBe("docs/EXTENDING.md");
|
|
42
|
+
expect(out.contract_paths.length).toBeGreaterThan(0);
|
|
43
|
+
expect(out.notes.length).toBeGreaterThan(0);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("includes the core extension points", () => {
|
|
47
|
+
const ids = out.surfaces.map((s) => s.id).sort();
|
|
48
|
+
expect(ids).toContain("llm_provider_builtin");
|
|
49
|
+
expect(ids).toContain("llm_provider_external");
|
|
50
|
+
expect(ids).toContain("builtin_tool");
|
|
51
|
+
expect(ids).toContain("mcp_server");
|
|
52
|
+
expect(ids).toContain("agent_harness");
|
|
53
|
+
expect(ids).toContain("integration_manifest");
|
|
54
|
+
expect(ids).toContain("brand_overlay");
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("every surface has a registration entrypoint, doc section, and at least one ADR reference", () => {
|
|
58
|
+
for (const s of out.surfaces) {
|
|
59
|
+
expect(s.registration_entrypoint).toBeTruthy();
|
|
60
|
+
expect(s.doc_section.startsWith("docs/EXTENDING.md#")).toBe(true);
|
|
61
|
+
expect(Array.isArray(s.related_adrs)).toBe(true);
|
|
62
|
+
expect(s.related_adrs.length).toBeGreaterThan(0);
|
|
63
|
+
}
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it("introspection_tool references match real tool names", () => {
|
|
67
|
+
const expectedTools = new Set([
|
|
68
|
+
"list_providers",
|
|
69
|
+
"list_tools",
|
|
70
|
+
"list_mcp_servers",
|
|
71
|
+
"list_integrations",
|
|
72
|
+
]);
|
|
73
|
+
for (const s of out.surfaces) {
|
|
74
|
+
if (s.introspection_tool) {
|
|
75
|
+
expect(expectedTools.has(s.introspection_tool)).toBe(true);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
});
|