@circuitwall/jarela 0.14.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. package/.next/standalone/.next/BUILD_ID +1 -1
  2. package/.next/standalone/.next/app-path-routes-manifest.json +1 -1
  3. package/.next/standalone/.next/build-manifest.json +2 -2
  4. package/.next/standalone/.next/prerender-manifest.json +3 -3
  5. package/.next/standalone/.next/server/app/_global-error/page_client-reference-manifest.js +1 -1
  6. package/.next/standalone/.next/server/app/_global-error.html +1 -1
  7. package/.next/standalone/.next/server/app/_global-error.rsc +1 -1
  8. package/.next/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  9. package/.next/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  10. package/.next/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  11. package/.next/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  12. package/.next/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  13. package/.next/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  14. package/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  15. package/.next/standalone/.next/server/app/_not-found.html +2 -2
  16. package/.next/standalone/.next/server/app/_not-found.rsc +2 -2
  17. package/.next/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +2 -2
  18. package/.next/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  19. package/.next/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +2 -2
  20. package/.next/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  21. package/.next/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  22. package/.next/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +2 -2
  23. package/.next/standalone/.next/server/app/api/v1/agents/[id]/route.js +6 -1
  24. package/.next/standalone/.next/server/app/api/v1/agents/[id]/route.js.map +1 -1
  25. package/.next/standalone/.next/server/app/api/v1/agents/route.js +6 -1
  26. package/.next/standalone/.next/server/app/api/v1/agents/route.js.map +1 -1
  27. package/.next/standalone/.next/server/app/api/v1/bridges/[id]/route.js +9 -1
  28. package/.next/standalone/.next/server/app/api/v1/bridges/[id]/route.js.map +1 -1
  29. package/.next/standalone/.next/server/app/api/v1/bridges/route.js +9 -1
  30. package/.next/standalone/.next/server/app/api/v1/bridges/route.js.map +1 -1
  31. package/.next/standalone/.next/server/app/api/v1/builtin-tools/route.js +36 -29
  32. package/.next/standalone/.next/server/app/api/v1/builtin-tools/route.js.map +1 -1
  33. package/.next/standalone/.next/server/app/api/v1/events/route.js +7 -1
  34. package/.next/standalone/.next/server/app/api/v1/events/route.js.map +1 -1
  35. package/.next/standalone/.next/server/app/api/v1/extensions/route.js +3 -3
  36. package/.next/standalone/.next/server/app/api/v1/extensions/route.js.map +1 -1
  37. package/.next/standalone/.next/server/app/api/v1/extensions/tools/[name]/secrets/route.js +4 -4
  38. package/.next/standalone/.next/server/app/api/v1/extensions/tools/[name]/secrets/route.js.map +1 -1
  39. package/.next/standalone/.next/server/app/api/v1/health/route.js +7 -1
  40. package/.next/standalone/.next/server/app/api/v1/health/route.js.map +1 -1
  41. package/.next/standalone/.next/server/app/api/v1/mcp-servers/[name]/route.js +9 -1
  42. package/.next/standalone/.next/server/app/api/v1/mcp-servers/[name]/route.js.map +1 -1
  43. package/.next/standalone/.next/server/app/api/v1/mcp-servers/route.js +9 -1
  44. package/.next/standalone/.next/server/app/api/v1/mcp-servers/route.js.map +1 -1
  45. package/.next/standalone/.next/server/app/api/v1/models/route.js +6 -1
  46. package/.next/standalone/.next/server/app/api/v1/models/route.js.map +1 -1
  47. package/.next/standalone/.next/server/app/api/v1/page-capture/route.js +7 -1
  48. package/.next/standalone/.next/server/app/api/v1/page-capture/route.js.map +1 -1
  49. package/.next/standalone/.next/server/app/api/v1/pending-actions/[id]/approve/route.js +14 -7
  50. package/.next/standalone/.next/server/app/api/v1/pending-actions/[id]/approve/route.js.map +1 -1
  51. package/.next/standalone/.next/server/app/api/v1/providers/[provider]/models/route.js +28 -0
  52. package/.next/standalone/.next/server/app/api/v1/providers/[provider]/models/route.js.map +1 -1
  53. package/.next/standalone/.next/server/app/api/v1/providers/route.js +7 -1
  54. package/.next/standalone/.next/server/app/api/v1/providers/route.js.map +1 -1
  55. package/.next/standalone/.next/server/app/api/v1/threads/[thread_id]/route.js +16 -2
  56. package/.next/standalone/.next/server/app/api/v1/threads/[thread_id]/route.js.map +1 -1
  57. package/.next/standalone/.next/server/app/api/v1/threads/[thread_id]/run/route.js +8 -1
  58. package/.next/standalone/.next/server/app/api/v1/threads/[thread_id]/run/route.js.map +1 -1
  59. package/.next/standalone/.next/server/app/api/v1/threads/route.js +6 -1
  60. package/.next/standalone/.next/server/app/api/v1/threads/route.js.map +1 -1
  61. package/.next/standalone/.next/server/app/api/v1/tools/route.js +10 -3
  62. package/.next/standalone/.next/server/app/api/v1/tools/route.js.map +1 -1
  63. package/.next/standalone/.next/server/app/index.html +2 -2
  64. package/.next/standalone/.next/server/app/index.rsc +3 -3
  65. package/.next/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +2 -2
  66. package/.next/standalone/.next/server/app/index.segments/_full.segment.rsc +3 -3
  67. package/.next/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  68. package/.next/standalone/.next/server/app/index.segments/_index.segment.rsc +2 -2
  69. package/.next/standalone/.next/server/app/index.segments/_tree.segment.rsc +2 -2
  70. package/.next/standalone/.next/server/app/page.js +56 -0
  71. package/.next/standalone/.next/server/app/page.js.map +1 -1
  72. package/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -1
  73. package/.next/standalone/.next/server/app/setup/page_client-reference-manifest.js +1 -1
  74. package/.next/standalone/.next/server/app/setup.html +1 -1
  75. package/.next/standalone/.next/server/app/setup.rsc +2 -2
  76. package/.next/standalone/.next/server/app/setup.segments/_full.segment.rsc +2 -2
  77. package/.next/standalone/.next/server/app/setup.segments/_head.segment.rsc +1 -1
  78. package/.next/standalone/.next/server/app/setup.segments/_index.segment.rsc +2 -2
  79. package/.next/standalone/.next/server/app/setup.segments/_tree.segment.rsc +2 -2
  80. package/.next/standalone/.next/server/app/setup.segments/setup/__PAGE__.segment.rsc +1 -1
  81. package/.next/standalone/.next/server/app/setup.segments/setup.segment.rsc +1 -1
  82. package/.next/standalone/.next/server/app-paths-manifest.json +1 -1
  83. package/.next/standalone/.next/server/chunks/1683.js +2 -2
  84. package/.next/standalone/.next/server/chunks/2082.js +122 -13
  85. package/.next/standalone/.next/server/chunks/2082.js.map +1 -1
  86. package/.next/standalone/.next/server/chunks/210.js +3 -3
  87. package/.next/standalone/.next/server/chunks/210.js.map +1 -1
  88. package/.next/standalone/.next/server/chunks/239.js +1902 -1487
  89. package/.next/standalone/.next/server/chunks/239.js.map +1 -1
  90. package/.next/standalone/.next/server/chunks/2447.js +9 -1
  91. package/.next/standalone/.next/server/chunks/2447.js.map +1 -1
  92. package/.next/standalone/.next/server/chunks/423.js +125 -16
  93. package/.next/standalone/.next/server/chunks/423.js.map +1 -1
  94. package/.next/standalone/.next/server/chunks/4631.js +36 -29
  95. package/.next/standalone/.next/server/chunks/4631.js.map +1 -1
  96. package/.next/standalone/.next/server/chunks/5937.js +3 -2
  97. package/.next/standalone/.next/server/chunks/5937.js.map +1 -1
  98. package/.next/standalone/.next/server/chunks/{947.js → 8866.js} +11321 -10883
  99. package/.next/standalone/.next/server/chunks/8866.js.map +1 -0
  100. package/.next/standalone/.next/server/chunks/9032.js +3 -3
  101. package/.next/standalone/.next/server/chunks/9032.js.map +1 -1
  102. package/.next/standalone/.next/server/middleware-build-manifest.js +2 -2
  103. package/.next/standalone/.next/server/middleware.js +122 -13
  104. package/.next/standalone/.next/server/pages/404.html +2 -2
  105. package/.next/standalone/.next/server/pages/500.html +1 -1
  106. package/.next/standalone/.next/server/proxy.js.map +1 -1
  107. package/.next/standalone/.next/server/server-reference-manifest.json +1 -1
  108. package/.next/standalone/.next/static/chunks/app/{page-473b39ec30c7f569.js → page-a7cae65f235e2942.js} +57 -1
  109. package/.next/standalone/.next/static/chunks/app/page-a7cae65f235e2942.js.map +1 -0
  110. package/.next/standalone/.next/static/css/{6f8b1a84bcbcd467.css → e57bdbbbb5a05779.css} +2 -2
  111. package/.next/standalone/.next/static/css/e57bdbbbb5a05779.css.map +1 -0
  112. package/.next/standalone/package.json +9 -1
  113. package/CHANGELOG.md +90 -0
  114. package/README.md +30 -2
  115. package/api/types.ts +8 -0
  116. package/app/api/v1/agents/[id]/route.ts +7 -0
  117. package/app/api/v1/agents/route.ts +7 -0
  118. package/app/api/v1/events/route.ts +8 -0
  119. package/app/api/v1/extensions/route.ts +2 -2
  120. package/app/api/v1/extensions/tools/[name]/secrets/route.ts +3 -3
  121. package/app/api/v1/health/route.ts +8 -0
  122. package/app/api/v1/models/route.ts +7 -0
  123. package/app/api/v1/page-capture/route.ts +8 -0
  124. package/app/api/v1/providers/route.ts +8 -0
  125. package/app/api/v1/threads/[thread_id]/route.ts +8 -0
  126. package/app/api/v1/threads/[thread_id]/run/route.ts +9 -0
  127. package/app/api/v1/threads/route.ts +7 -0
  128. package/app/api/v1/tools/route.ts +9 -0
  129. package/components/chat/ContextUsageBar.tsx +44 -0
  130. package/lib/agents/llm.ts +25 -2
  131. package/lib/agents/run-thread.ts +13 -1
  132. package/lib/agents/stream-collector.ts +9 -1
  133. package/lib/api/serializers.test.ts +15 -0
  134. package/lib/api/serializers.ts +8 -0
  135. package/lib/db/migrations.ts +15 -0
  136. package/lib/health/runner.test.ts +24 -2
  137. package/lib/mcp/registry.ts +14 -6
  138. package/lib/providers/anthropic.test.ts +95 -0
  139. package/lib/providers/anthropic.ts +106 -10
  140. package/lib/providers/jarela-chat-model.ts +9 -1
  141. package/lib/providers/known-context-windows.ts +21 -0
  142. package/lib/providers/types.ts +21 -1
  143. package/lib/stores/message-usage.test.ts +34 -0
  144. package/lib/stores/message-usage.ts +15 -3
  145. package/lib/stores/pricing.test.ts +52 -0
  146. package/lib/stores/pricing.ts +26 -1
  147. package/lib/tools/builtins.ts +4 -0
  148. package/lib/tools/extension-surfaces.test.ts +79 -0
  149. package/lib/tools/extension-surfaces.ts +153 -0
  150. package/lib/tools/index.ts +27 -8
  151. package/lib/tools/list-tools.test.ts +76 -0
  152. package/lib/tools/list-tools.ts +84 -0
  153. package/lib/tools/mcp-servers-info.test.ts +73 -0
  154. package/lib/tools/mcp-servers-info.ts +71 -0
  155. package/lib/tools/providers-info.test.ts +73 -0
  156. package/lib/tools/providers-info.ts +106 -0
  157. package/lib/tools/registry.ts +36 -25
  158. package/lib/tools/types.ts +13 -0
  159. package/package.json +9 -1
  160. package/.next/standalone/.next/server/chunks/947.js.map +0 -1
  161. package/.next/standalone/.next/static/chunks/app/page-473b39ec30c7f569.js.map +0 -1
  162. package/.next/standalone/.next/static/css/6f8b1a84bcbcd467.css.map +0 -1
  163. /package/.next/standalone/.next/static/{T0p2VVPsJPj44rwbmjaFb → d_vhp-lJqfdjRFpnLVIqZ}/_buildManifest.js +0 -0
  164. /package/.next/standalone/.next/static/{T0p2VVPsJPj44rwbmjaFb → d_vhp-lJqfdjRFpnLVIqZ}/_ssgManifest.js +0 -0
@@ -53,6 +53,80 @@ function appendServerTools(
53
53
  return merged;
54
54
  }
55
55
 
56
+ // Anthropic prompt-caching breakpoints. Within a multi-tool ReAct turn the
57
+ // system prompt + tools are stable across every LLM call, and tool_results
58
+ // only grow at the tail — exactly the prefix Anthropic's ephemeral cache is
59
+ // built for. We mark three breakpoints (system, last tool, last tool_result)
60
+ // so calls 2..N read the prefix at ~10% the input rate. The prefix below the
61
+ // minimum cacheable size is silently ignored by the API at no extra cost,
62
+ // so it is safe to mark unconditionally.
63
+ const EPHEMERAL: Anthropic.CacheControlEphemeral = { type: "ephemeral" };
64
+
65
+ export function withSystemCacheControl(text: string): Anthropic.TextBlockParam[] | undefined {
66
+ if (!text) return undefined;
67
+ return [{ type: "text", text, cache_control: EPHEMERAL }];
68
+ }
69
+
70
+ export function withToolsCacheControl(tools: Anthropic.Tool[]): Anthropic.Tool[] {
71
+ if (tools.length === 0) return tools;
72
+ const last = tools[tools.length - 1];
73
+ return [...tools.slice(0, -1), { ...last, cache_control: EPHEMERAL }];
74
+ }
75
+
76
+ export function withLastToolResultCacheControl(
77
+ messages: Anthropic.MessageParam[],
78
+ ): Anthropic.MessageParam[] {
79
+ for (let i = messages.length - 1; i >= 0; i--) {
80
+ const m = messages[i];
81
+ if (typeof m.content === "string") continue;
82
+ const blocks = m.content;
83
+ for (let j = blocks.length - 1; j >= 0; j--) {
84
+ const b = blocks[j];
85
+ if (b.type === "tool_result") {
86
+ const newBlocks = [...blocks];
87
+ newBlocks[j] = { ...b, cache_control: EPHEMERAL };
88
+ const next = [...messages];
89
+ next[i] = { ...m, content: newBlocks };
90
+ return next;
91
+ }
92
+ }
93
+ }
94
+ return messages;
95
+ }
96
+
97
+ interface AnthropicMessageStartEvent {
98
+ type: "message_start";
99
+ message: { usage?: Anthropic.Usage };
100
+ }
101
+ interface AnthropicMessageDeltaEvent {
102
+ type: "message_delta";
103
+ usage?: Anthropic.MessageDeltaUsage;
104
+ }
105
+
106
+ function usageEventFromStart(usage: Anthropic.Usage | undefined): ProviderStreamEvent | null {
107
+ if (!usage) return null;
108
+ return {
109
+ type: "usage",
110
+ input_tokens: usage.input_tokens ?? 0,
111
+ output_tokens: usage.output_tokens ?? 0,
112
+ cache_creation_input_tokens: usage.cache_creation_input_tokens ?? 0,
113
+ cache_read_input_tokens: usage.cache_read_input_tokens ?? 0,
114
+ };
115
+ }
116
+
117
+ function usageEventFromDelta(usage: Anthropic.MessageDeltaUsage | undefined): ProviderStreamEvent | null {
118
+ if (!usage) return null;
119
+ // message_delta only carries the *final* output_tokens; input/cache fields
120
+ // are already accounted for from message_start. Emitting just the output
121
+ // delta here keeps the agent loop's running total accurate without
122
+ // double-counting cache reads.
123
+ return {
124
+ type: "usage",
125
+ input_tokens: 0,
126
+ output_tokens: usage.output_tokens ?? 0,
127
+ };
128
+ }
129
+
56
130
  export const anthropicProvider: ModelProvider = {
57
131
  name: "anthropic",
58
132
 
@@ -80,7 +154,7 @@ export const anthropicProvider: ModelProvider = {
80
154
  const stream = await client.messages.stream({
81
155
  model: model_id,
82
156
  max_tokens: params.max_tokens ?? 4096,
83
- system: systemText || undefined,
157
+ system: withSystemCacheControl(systemText),
84
158
  messages: userMessages,
85
159
  });
86
160
 
@@ -103,13 +177,18 @@ export const anthropicProvider: ModelProvider = {
103
177
  });
104
178
 
105
179
  const systemMsg = messages.find((m) => m.role === "system");
106
- const msgList = toAnthropicMessages(messages.filter((m) => m.role !== "system"));
107
- const anthropicTools = appendServerTools(toAnthropicTools(tools), params);
180
+ const systemText = typeof systemMsg?.content === "string" ? systemMsg.content : "";
181
+ const msgList = withLastToolResultCacheControl(
182
+ toAnthropicMessages(messages.filter((m) => m.role !== "system")),
183
+ );
184
+ const anthropicTools = withToolsCacheControl(
185
+ appendServerTools(toAnthropicTools(tools), params),
186
+ );
108
187
 
109
188
  const resp = await client.messages.create({
110
189
  model: model_id,
111
190
  max_tokens: params.max_tokens ?? 4096,
112
- system: typeof systemMsg?.content === "string" ? systemMsg.content : undefined,
191
+ system: withSystemCacheControl(systemText),
113
192
  messages: msgList,
114
193
  tools: anthropicTools,
115
194
  ...(params.thinking ? { thinking: params.thinking } : {}),
@@ -141,8 +220,13 @@ export const anthropicProvider: ModelProvider = {
141
220
  });
142
221
 
143
222
  const systemMsg = messages.find((m) => m.role === "system");
144
- const msgList = toAnthropicMessages(messages.filter((m) => m.role !== "system"));
145
- const anthropicTools = appendServerTools(toAnthropicTools(tools), params);
223
+ const systemText = typeof systemMsg?.content === "string" ? systemMsg.content : "";
224
+ const msgList = withLastToolResultCacheControl(
225
+ toAnthropicMessages(messages.filter((m) => m.role !== "system")),
226
+ );
227
+ const anthropicTools = withToolsCacheControl(
228
+ appendServerTools(toAnthropicTools(tools), params),
229
+ );
146
230
 
147
231
  const body: Anthropic.Messages.MessageStreamParams = {
148
232
  model: model_id,
@@ -150,7 +234,8 @@ export const anthropicProvider: ModelProvider = {
150
234
  messages: msgList,
151
235
  ...(pickAnthropicOptions(params) as Record<string, unknown>),
152
236
  };
153
- if (typeof systemMsg?.content === "string") body.system = systemMsg.content;
237
+ const systemParam = withSystemCacheControl(systemText);
238
+ if (systemParam) body.system = systemParam;
154
239
  if (anthropicTools.length > 0) body.tools = anthropicTools;
155
240
  if (params.thinking) {
156
241
  (body as unknown as Record<string, unknown>).thinking = params.thinking;
@@ -161,6 +246,12 @@ export const anthropicProvider: ModelProvider = {
161
246
  const blockType = new Map<number, "text" | "thinking" | "tool_use">();
162
247
 
163
248
  for await (const event of stream) {
249
+ if (event.type === "message_start") {
250
+ const ev = event as unknown as AnthropicMessageStartEvent;
251
+ const u = usageEventFromStart(ev.message?.usage);
252
+ if (u) yield u;
253
+ continue;
254
+ }
164
255
  if (event.type === "content_block_start") {
165
256
  const cb = event.content_block;
166
257
  if (cb.type === "tool_use") {
@@ -180,9 +271,14 @@ export const anthropicProvider: ModelProvider = {
180
271
  } else if (d.type === "input_json_delta" && d.partial_json !== undefined) {
181
272
  yield { type: "tool_call_chunk", index: event.index, args_delta: d.partial_json };
182
273
  }
183
- } else if (event.type === "message_delta" && event.delta?.stop_reason) {
184
- const reason = event.delta.stop_reason;
185
- yield { type: "stop", reason: reason === "tool_use" ? "tool_use" : reason === "max_tokens" ? "length" : "stop" };
274
+ } else if (event.type === "message_delta") {
275
+ const delta = event as unknown as AnthropicMessageDeltaEvent;
276
+ const u = usageEventFromDelta(delta.usage);
277
+ if (u) yield u;
278
+ if (event.delta?.stop_reason) {
279
+ const reason = event.delta.stop_reason;
280
+ yield { type: "stop", reason: reason === "tool_use" ? "tool_use" : reason === "max_tokens" ? "length" : "stop" };
281
+ }
186
282
  }
187
283
  }
188
284
  })();
@@ -203,8 +203,13 @@ export class JarelaChatModel extends BaseChatModel {
203
203
  } else if (event.type === "usage") {
204
204
  // ADR-0041: surface real provider token counts on the final
205
205
  // AIMessageChunk via LangChain's standard `usage_metadata` field so
206
- // the agent loop can snapshot them into message_usage.
206
+ // the agent loop can snapshot them into message_usage. PR #181 added
207
+ // Anthropic prompt caching; carry the cache breakdown through
208
+ // `input_token_details` (LangChain's standard channel) so cost
209
+ // attribution downstream can apply the 1.25× / 0.1× rates.
207
210
  emittedAny = true;
211
+ const cacheCreation = event.cache_creation_input_tokens ?? 0;
212
+ const cacheRead = event.cache_read_input_tokens ?? 0;
208
213
  yield new ChatGenerationChunk({
209
214
  message: new AIMessageChunk({
210
215
  content: "",
@@ -213,6 +218,9 @@ export class JarelaChatModel extends BaseChatModel {
213
218
  output_tokens: event.output_tokens ?? 0,
214
219
  total_tokens: event.total_tokens
215
220
  ?? (event.input_tokens ?? 0) + (event.output_tokens ?? 0),
221
+ ...(cacheCreation > 0 || cacheRead > 0
222
+ ? { input_token_details: { cache_creation: cacheCreation, cache_read: cacheRead } }
223
+ : {}),
216
224
  },
217
225
  }),
218
226
  text: "",
@@ -119,3 +119,24 @@ export function getKnownContextLength(provider: string, model_id: string): numbe
119
119
  export function getKnownMaxOutputTokens(provider: string, model_id: string): number | null {
120
120
  return getKnownModelLimits(provider, model_id)?.max_output_tokens ?? null;
121
121
  }
122
+
123
+ // Flat catalog snapshot for one provider — used by introspection tools so
124
+ // the agent can enumerate what's known statically. Returns [] for providers
125
+ // without a static table (e.g. `langchain`, `mock`, externals).
126
+ export function listKnownModels(
127
+ provider: string,
128
+ ): Array<{ model_id: string; context_length: number; max_output_tokens: number | null }> {
129
+ let table: Record<string, KnownModelLimits> | null = null;
130
+ switch (provider) {
131
+ case "anthropic": table = ANTHROPIC; break;
132
+ case "gemini": table = GEMINI; break;
133
+ case "openai": table = OPENAI; break;
134
+ case "deepseek": table = DEEPSEEK; break;
135
+ default: return [];
136
+ }
137
+ return Object.entries(table).map(([model_id, l]) => ({
138
+ model_id,
139
+ context_length: l.context_length,
140
+ max_output_tokens: l.max_output_tokens ?? null,
141
+ }));
142
+ }
@@ -1,3 +1,16 @@
1
+ /**
2
+ * @public
3
+ *
4
+ * Public LLM-provider extension contract.
5
+ *
6
+ * Every type and interface in this file is part of the package's
7
+ * stable public surface (per `package.json#exports`). External provider
8
+ * adapters — both in-tree and `~/.jarela/providers/*.cjs` plugins —
9
+ * conform to {@link ModelProvider}. Removing or breaking any export
10
+ * here counts as a breaking change under the deprecation policy in
11
+ * CONTRIBUTING.md.
12
+ */
13
+
1
14
  import type { ContentPart, InvokeMessage, InvokeResult, OpenAITool } from "@/lib/tools/types";
2
15
  export type { InvokeMessage, InvokeResult, OpenAITool };
3
16
 
@@ -44,7 +57,14 @@ export type ProviderStreamEvent =
44
57
  | { type: "thinking"; delta: string }
45
58
  | { type: "tool_call_chunk"; index: number; id?: string; name?: string; args_delta?: string }
46
59
  | { type: "citation"; source?: string; snippet?: string; url?: string }
47
- | { type: "usage"; input_tokens?: number; output_tokens?: number; total_tokens?: number }
60
+ | {
61
+ type: "usage";
62
+ input_tokens?: number;
63
+ output_tokens?: number;
64
+ total_tokens?: number;
65
+ cache_creation_input_tokens?: number;
66
+ cache_read_input_tokens?: number;
67
+ }
48
68
  | { type: "audio_chunk"; mime_type: string; data_b64: string }
49
69
  | { type: "provider_event"; name: string; payload: unknown }
50
70
  | { type: "stop"; reason: "stop" | "tool_use" | "length" };
@@ -231,4 +231,38 @@ describe("message_usage snapshot store (ADR-0041)", () => {
231
231
  const map = getMessageUsageByIds([]);
232
232
  expect(map.size).toBe(0);
233
233
  });
234
+
235
+ it("persists Anthropic cache_creation/cache_read token counts (PR #181 follow-up)", () => {
236
+ recordMessageUsage({
237
+ message_id: "m-cache",
238
+ thread_id: "t-cache",
239
+ agent_id: "a", agent_name: "A",
240
+ provider: "anthropic", model_id: "claude-sonnet-4", model_config_name: null,
241
+ input_tokens: 1200,
242
+ output_tokens: 350,
243
+ cache_creation_input_tokens: 4000,
244
+ cache_read_input_tokens: 80_000,
245
+ input_rate_usd_per_mtok: 3,
246
+ output_rate_usd_per_mtok: 15,
247
+ cost_usd: 0.04,
248
+ });
249
+ const row = getMessageUsage("m-cache");
250
+ expect(row?.cache_creation_input_tokens).toBe(4000);
251
+ expect(row?.cache_read_input_tokens).toBe(80_000);
252
+ });
253
+
254
+ it("stores NULL cache columns for legacy rows that omit them", () => {
255
+ recordMessageUsage({
256
+ message_id: "m-no-cache",
257
+ thread_id: "t-no-cache",
258
+ agent_id: "a", agent_name: "A",
259
+ provider: "openai", model_id: "gpt-5", model_config_name: null,
260
+ input_tokens: 10, output_tokens: 20,
261
+ input_rate_usd_per_mtok: null, output_rate_usd_per_mtok: null,
262
+ cost_usd: 0,
263
+ });
264
+ const row = getMessageUsage("m-no-cache");
265
+ expect(row?.cache_creation_input_tokens).toBeNull();
266
+ expect(row?.cache_read_input_tokens).toBeNull();
267
+ });
234
268
  });
@@ -24,6 +24,13 @@ export interface MessageUsageInput {
24
24
  // assembly. NULL/undefined when unknown (very old assistant turns
25
25
  // persisted before the breakdown was wired up, or non-LLM persists).
26
26
  tier_usage?: TierUsage | null;
27
+ // Anthropic prompt caching (PR #181). Both fields are disjoint from
28
+ // `input_tokens`: total billable input tokens =
29
+ // input_tokens + cache_creation_input_tokens + cache_read_input_tokens
30
+ // priced at 1×, 1.25×, and 0.1× the input rate respectively. NULL/zero
31
+ // for providers that don't expose cache counts.
32
+ cache_creation_input_tokens?: number | null;
33
+ cache_read_input_tokens?: number | null;
27
34
  }
28
35
 
29
36
  export interface TierUsage {
@@ -37,7 +44,7 @@ export interface TierUsage {
37
44
  context_window_tokens: number;
38
45
  }
39
46
 
40
- export interface MessageUsageRow extends Omit<MessageUsageInput, "tier_usage"> {
47
+ export interface MessageUsageRow extends Omit<MessageUsageInput, "tier_usage" | "cache_creation_input_tokens" | "cache_read_input_tokens"> {
41
48
  created_at: string;
42
49
  hot_tokens: number | null;
43
50
  warm_tokens: number | null;
@@ -47,6 +54,8 @@ export interface MessageUsageRow extends Omit<MessageUsageInput, "tier_usage"> {
47
54
  warm_budget_tokens: number | null;
48
55
  facts_budget_tokens: number | null;
49
56
  context_window_tokens: number | null;
57
+ cache_creation_input_tokens: number | null;
58
+ cache_read_input_tokens: number | null;
50
59
  }
51
60
 
52
61
  export function recordMessageUsage(input: MessageUsageInput): void {
@@ -58,8 +67,9 @@ export function recordMessageUsage(input: MessageUsageInput): void {
58
67
  model_config_name, input_tokens, output_tokens,
59
68
  input_rate_usd_per_mtok, output_rate_usd_per_mtok, cost_usd, created_at,
60
69
  hot_tokens, warm_tokens, facts_tokens, overhead_tokens,
61
- hot_budget_tokens, warm_budget_tokens, facts_budget_tokens, context_window_tokens
62
- ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)`,
70
+ hot_budget_tokens, warm_budget_tokens, facts_budget_tokens, context_window_tokens,
71
+ cache_creation_input_tokens, cache_read_input_tokens
72
+ ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)`,
63
73
  ).run(
64
74
  input.message_id,
65
75
  input.thread_id,
@@ -82,6 +92,8 @@ export function recordMessageUsage(input: MessageUsageInput): void {
82
92
  t?.warm_budget_tokens ?? null,
83
93
  t?.facts_budget_tokens ?? null,
84
94
  t?.context_window_tokens ?? null,
95
+ input.cache_creation_input_tokens ?? null,
96
+ input.cache_read_input_tokens ?? null,
85
97
  );
86
98
  }
87
99
 
@@ -80,6 +80,58 @@ describe("estimateCostUsd", () => {
80
80
  it("scales sub-million token counts proportionally", () => {
81
81
  expect(estimateCostUsd(500_000, 100_000, { inputPer1M: 2, outputPer1M: 10 })).toBeCloseTo(2, 6);
82
82
  });
83
+
84
+ describe("anthropic prompt-cache pricing", () => {
85
+ const rates = { inputPer1M: 3, outputPer1M: 15 };
86
+
87
+ it("ignores cache breakdown when not provided", () => {
88
+ // Sanity: existing call signature unchanged.
89
+ expect(estimateCostUsd(1_000_000, 0, rates)).toBe(3);
90
+ });
91
+
92
+ it("prices cache writes at 1.25× the input rate", () => {
93
+ // 1M cache_creation tokens × $3/M × 1.25 = $3.75
94
+ expect(
95
+ estimateCostUsd(0, 0, rates, { cache_creation_input_tokens: 1_000_000 }),
96
+ ).toBeCloseTo(3.75, 6);
97
+ });
98
+
99
+ it("prices cache reads at 0.1× the input rate", () => {
100
+ // 1M cache_read tokens × $3/M × 0.1 = $0.30
101
+ expect(
102
+ estimateCostUsd(0, 0, rates, { cache_read_input_tokens: 1_000_000 }),
103
+ ).toBeCloseTo(0.3, 6);
104
+ });
105
+
106
+ it("sums fresh + cache_creation + cache_read + output (Anthropic-style turn)", () => {
107
+ // 100k fresh input ($0.30) + 50k cache_creation ($0.1875)
108
+ // + 800k cache_read ($0.24) + 20k output ($0.30) = $1.0275
109
+ const cost = estimateCostUsd(100_000, 20_000, rates, {
110
+ cache_creation_input_tokens: 50_000,
111
+ cache_read_input_tokens: 800_000,
112
+ });
113
+ expect(cost).toBeCloseTo(0.3 + 0.1875 + 0.24 + 0.3, 6);
114
+ });
115
+
116
+ it("does not double-bill cache when input rate is null", () => {
117
+ // No input rate → cache multipliers have nothing to multiply against.
118
+ expect(
119
+ estimateCostUsd(0, 100_000, { inputPer1M: null, outputPer1M: 5 }, {
120
+ cache_creation_input_tokens: 1_000_000,
121
+ cache_read_input_tokens: 1_000_000,
122
+ }),
123
+ ).toBeCloseTo(0.5, 6);
124
+ });
125
+
126
+ it("treats nullish cache fields as zero", () => {
127
+ expect(
128
+ estimateCostUsd(0, 0, rates, {
129
+ cache_creation_input_tokens: null,
130
+ cache_read_input_tokens: undefined,
131
+ }),
132
+ ).toBe(0);
133
+ });
134
+ });
83
135
  });
84
136
 
85
137
  describe("getPricingTables", () => {
@@ -317,15 +317,40 @@ function inferRatesFromSignals(signals: string[]): {
317
317
  };
318
318
  }
319
319
 
320
+ // Anthropic prompt-cache multipliers, applied against the standard input
321
+ // rate. Cache writes are billed at 1.25× input ("cache create"); cache
322
+ // reads are billed at 0.1× input ("cache hit"). Source:
323
+ // https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#pricing
324
+ // We apply the same multipliers to other providers that publish a
325
+ // cache-token breakdown — OpenAI's prompt caching, for example, also
326
+ // quotes a 0.5× read multiplier, but its API surfaces only `cached_tokens`
327
+ // (no separate write count) and a future PR can split that out. For now
328
+ // any provider that emits both fields will be priced as above.
329
+ export const CACHE_CREATION_INPUT_RATE_MULTIPLIER = 1.25;
330
+ export const CACHE_READ_INPUT_RATE_MULTIPLIER = 0.1;
331
+
332
+ export interface CacheTokenBreakdown {
333
+ cache_creation_input_tokens?: number | null;
334
+ cache_read_input_tokens?: number | null;
335
+ }
336
+
320
337
  export function estimateCostUsd(
321
338
  inputTokens: number,
322
339
  outputTokens: number,
323
340
  rates: Pick<ProviderRates, "inputPer1M" | "outputPer1M">,
341
+ cache?: CacheTokenBreakdown | null,
324
342
  ): number {
325
343
  const inputRate = rates.inputPer1M;
326
344
  const outputRate = rates.outputPer1M;
327
345
  if (inputRate == null && outputRate == null) return 0;
328
346
  const inCost = inputRate == null ? 0 : (inputTokens / 1_000_000) * inputRate;
329
347
  const outCost = outputRate == null ? 0 : (outputTokens / 1_000_000) * outputRate;
330
- return inCost + outCost;
348
+ let cacheCost = 0;
349
+ if (cache && inputRate != null) {
350
+ const create = cache.cache_creation_input_tokens ?? 0;
351
+ const read = cache.cache_read_input_tokens ?? 0;
352
+ if (create > 0) cacheCost += (create / 1_000_000) * inputRate * CACHE_CREATION_INPUT_RATE_MULTIPLIER;
353
+ if (read > 0) cacheCost += (read / 1_000_000) * inputRate * CACHE_READ_INPUT_RATE_MULTIPLIER;
354
+ }
355
+ return inCost + outCost + cacheCost;
331
356
  }
@@ -29,3 +29,7 @@ import "./outlook";
29
29
  import "./outlook-calendar";
30
30
  import "./delegate";
31
31
  import "./system_config";
32
+ import "./list-tools";
33
+ import "./providers-info";
34
+ import "./mcp-servers-info";
35
+ import "./extension-surfaces";
@@ -0,0 +1,79 @@
1
+ import { describe, it, expect, afterAll } from "vitest";
2
+ import { mkdtempSync, rmSync } from "node:fs";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
5
+
6
+ const tmpRoot = mkdtempSync(join(tmpdir(), "jarela-test-extension-surfaces-"));
7
+ process.env.HOME = tmpRoot;
8
+ process.env.USERPROFILE = tmpRoot;
9
+ process.env.JARELA_DB_DIR = join(tmpRoot, ".jarela-dbdir");
10
+ afterAll(() => {
11
+ try { rmSync(tmpRoot, { recursive: true, force: true }); } catch {}
12
+ });
13
+
14
+ const { describeExtensionSurfacesTool } = await import("./extension-surfaces");
15
+
16
+ interface Surface {
17
+ id: string;
18
+ name: string;
19
+ summary: string;
20
+ registration_entrypoint: string;
21
+ doc_section: string;
22
+ example_path?: string;
23
+ introspection_tool?: string;
24
+ related_adrs: string[];
25
+ }
26
+
27
+ interface Result {
28
+ surfaces: Surface[];
29
+ count: number;
30
+ guide_path: string;
31
+ contract_paths: string[];
32
+ notes: string[];
33
+ }
34
+
35
+ describe("describe_extension_surfaces", () => {
36
+ let out: Result;
37
+
38
+ it("returns the curated catalog with all required fields", async () => {
39
+ out = JSON.parse(await describeExtensionSurfacesTool.invoke({})) as Result;
40
+ expect(out.count).toBe(out.surfaces.length);
41
+ expect(out.guide_path).toBe("docs/EXTENDING.md");
42
+ expect(out.contract_paths.length).toBeGreaterThan(0);
43
+ expect(out.notes.length).toBeGreaterThan(0);
44
+ });
45
+
46
+ it("includes the core extension points", () => {
47
+ const ids = out.surfaces.map((s) => s.id).sort();
48
+ expect(ids).toContain("llm_provider_builtin");
49
+ expect(ids).toContain("llm_provider_external");
50
+ expect(ids).toContain("builtin_tool");
51
+ expect(ids).toContain("mcp_server");
52
+ expect(ids).toContain("agent_harness");
53
+ expect(ids).toContain("integration_manifest");
54
+ expect(ids).toContain("brand_overlay");
55
+ });
56
+
57
+ it("every surface has a registration entrypoint, doc section, and at least one ADR reference", () => {
58
+ for (const s of out.surfaces) {
59
+ expect(s.registration_entrypoint).toBeTruthy();
60
+ expect(s.doc_section.startsWith("docs/EXTENDING.md#")).toBe(true);
61
+ expect(Array.isArray(s.related_adrs)).toBe(true);
62
+ expect(s.related_adrs.length).toBeGreaterThan(0);
63
+ }
64
+ });
65
+
66
+ it("introspection_tool references match real tool names", () => {
67
+ const expectedTools = new Set([
68
+ "list_providers",
69
+ "list_tools",
70
+ "list_mcp_servers",
71
+ "list_integrations",
72
+ ]);
73
+ for (const s of out.surfaces) {
74
+ if (s.introspection_tool) {
75
+ expect(expectedTools.has(s.introspection_tool)).toBe(true);
76
+ }
77
+ }
78
+ });
79
+ });