@circuitwall/jarela 0.14.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. package/.next/standalone/.next/BUILD_ID +1 -1
  2. package/.next/standalone/.next/app-path-routes-manifest.json +1 -1
  3. package/.next/standalone/.next/build-manifest.json +2 -2
  4. package/.next/standalone/.next/prerender-manifest.json +3 -3
  5. package/.next/standalone/.next/server/app/_global-error/page_client-reference-manifest.js +1 -1
  6. package/.next/standalone/.next/server/app/_global-error.html +1 -1
  7. package/.next/standalone/.next/server/app/_global-error.rsc +1 -1
  8. package/.next/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  9. package/.next/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  10. package/.next/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  11. package/.next/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  12. package/.next/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  13. package/.next/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  14. package/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  15. package/.next/standalone/.next/server/app/_not-found.html +2 -2
  16. package/.next/standalone/.next/server/app/_not-found.rsc +2 -2
  17. package/.next/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +2 -2
  18. package/.next/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  19. package/.next/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +2 -2
  20. package/.next/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  21. package/.next/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  22. package/.next/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +2 -2
  23. package/.next/standalone/.next/server/app/api/v1/agents/[id]/route.js +6 -1
  24. package/.next/standalone/.next/server/app/api/v1/agents/[id]/route.js.map +1 -1
  25. package/.next/standalone/.next/server/app/api/v1/agents/route.js +6 -1
  26. package/.next/standalone/.next/server/app/api/v1/agents/route.js.map +1 -1
  27. package/.next/standalone/.next/server/app/api/v1/bridges/[id]/route.js +9 -1
  28. package/.next/standalone/.next/server/app/api/v1/bridges/[id]/route.js.map +1 -1
  29. package/.next/standalone/.next/server/app/api/v1/bridges/route.js +9 -1
  30. package/.next/standalone/.next/server/app/api/v1/bridges/route.js.map +1 -1
  31. package/.next/standalone/.next/server/app/api/v1/builtin-tools/route.js +36 -29
  32. package/.next/standalone/.next/server/app/api/v1/builtin-tools/route.js.map +1 -1
  33. package/.next/standalone/.next/server/app/api/v1/events/route.js +7 -1
  34. package/.next/standalone/.next/server/app/api/v1/events/route.js.map +1 -1
  35. package/.next/standalone/.next/server/app/api/v1/extensions/route.js +3 -3
  36. package/.next/standalone/.next/server/app/api/v1/extensions/route.js.map +1 -1
  37. package/.next/standalone/.next/server/app/api/v1/extensions/tools/[name]/secrets/route.js +4 -4
  38. package/.next/standalone/.next/server/app/api/v1/extensions/tools/[name]/secrets/route.js.map +1 -1
  39. package/.next/standalone/.next/server/app/api/v1/health/route.js +7 -1
  40. package/.next/standalone/.next/server/app/api/v1/health/route.js.map +1 -1
  41. package/.next/standalone/.next/server/app/api/v1/mcp-servers/[name]/route.js +9 -1
  42. package/.next/standalone/.next/server/app/api/v1/mcp-servers/[name]/route.js.map +1 -1
  43. package/.next/standalone/.next/server/app/api/v1/mcp-servers/route.js +9 -1
  44. package/.next/standalone/.next/server/app/api/v1/mcp-servers/route.js.map +1 -1
  45. package/.next/standalone/.next/server/app/api/v1/models/route.js +6 -1
  46. package/.next/standalone/.next/server/app/api/v1/models/route.js.map +1 -1
  47. package/.next/standalone/.next/server/app/api/v1/page-capture/route.js +7 -1
  48. package/.next/standalone/.next/server/app/api/v1/page-capture/route.js.map +1 -1
  49. package/.next/standalone/.next/server/app/api/v1/pending-actions/[id]/approve/route.js +14 -7
  50. package/.next/standalone/.next/server/app/api/v1/pending-actions/[id]/approve/route.js.map +1 -1
  51. package/.next/standalone/.next/server/app/api/v1/providers/[provider]/models/route.js +28 -0
  52. package/.next/standalone/.next/server/app/api/v1/providers/[provider]/models/route.js.map +1 -1
  53. package/.next/standalone/.next/server/app/api/v1/providers/route.js +7 -1
  54. package/.next/standalone/.next/server/app/api/v1/providers/route.js.map +1 -1
  55. package/.next/standalone/.next/server/app/api/v1/threads/[thread_id]/route.js +16 -2
  56. package/.next/standalone/.next/server/app/api/v1/threads/[thread_id]/route.js.map +1 -1
  57. package/.next/standalone/.next/server/app/api/v1/threads/[thread_id]/run/route.js +8 -1
  58. package/.next/standalone/.next/server/app/api/v1/threads/[thread_id]/run/route.js.map +1 -1
  59. package/.next/standalone/.next/server/app/api/v1/threads/route.js +6 -1
  60. package/.next/standalone/.next/server/app/api/v1/threads/route.js.map +1 -1
  61. package/.next/standalone/.next/server/app/api/v1/tools/route.js +10 -3
  62. package/.next/standalone/.next/server/app/api/v1/tools/route.js.map +1 -1
  63. package/.next/standalone/.next/server/app/index.html +2 -2
  64. package/.next/standalone/.next/server/app/index.rsc +3 -3
  65. package/.next/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +2 -2
  66. package/.next/standalone/.next/server/app/index.segments/_full.segment.rsc +3 -3
  67. package/.next/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  68. package/.next/standalone/.next/server/app/index.segments/_index.segment.rsc +2 -2
  69. package/.next/standalone/.next/server/app/index.segments/_tree.segment.rsc +2 -2
  70. package/.next/standalone/.next/server/app/page.js +56 -0
  71. package/.next/standalone/.next/server/app/page.js.map +1 -1
  72. package/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -1
  73. package/.next/standalone/.next/server/app/setup/page_client-reference-manifest.js +1 -1
  74. package/.next/standalone/.next/server/app/setup.html +1 -1
  75. package/.next/standalone/.next/server/app/setup.rsc +2 -2
  76. package/.next/standalone/.next/server/app/setup.segments/_full.segment.rsc +2 -2
  77. package/.next/standalone/.next/server/app/setup.segments/_head.segment.rsc +1 -1
  78. package/.next/standalone/.next/server/app/setup.segments/_index.segment.rsc +2 -2
  79. package/.next/standalone/.next/server/app/setup.segments/_tree.segment.rsc +2 -2
  80. package/.next/standalone/.next/server/app/setup.segments/setup/__PAGE__.segment.rsc +1 -1
  81. package/.next/standalone/.next/server/app/setup.segments/setup.segment.rsc +1 -1
  82. package/.next/standalone/.next/server/app-paths-manifest.json +1 -1
  83. package/.next/standalone/.next/server/chunks/1683.js +2 -2
  84. package/.next/standalone/.next/server/chunks/2082.js +122 -13
  85. package/.next/standalone/.next/server/chunks/2082.js.map +1 -1
  86. package/.next/standalone/.next/server/chunks/210.js +3 -3
  87. package/.next/standalone/.next/server/chunks/210.js.map +1 -1
  88. package/.next/standalone/.next/server/chunks/239.js +1902 -1487
  89. package/.next/standalone/.next/server/chunks/239.js.map +1 -1
  90. package/.next/standalone/.next/server/chunks/2447.js +9 -1
  91. package/.next/standalone/.next/server/chunks/2447.js.map +1 -1
  92. package/.next/standalone/.next/server/chunks/423.js +125 -16
  93. package/.next/standalone/.next/server/chunks/423.js.map +1 -1
  94. package/.next/standalone/.next/server/chunks/4631.js +36 -29
  95. package/.next/standalone/.next/server/chunks/4631.js.map +1 -1
  96. package/.next/standalone/.next/server/chunks/5937.js +3 -2
  97. package/.next/standalone/.next/server/chunks/5937.js.map +1 -1
  98. package/.next/standalone/.next/server/chunks/{947.js → 8866.js} +11321 -10883
  99. package/.next/standalone/.next/server/chunks/8866.js.map +1 -0
  100. package/.next/standalone/.next/server/chunks/9032.js +3 -3
  101. package/.next/standalone/.next/server/chunks/9032.js.map +1 -1
  102. package/.next/standalone/.next/server/middleware-build-manifest.js +2 -2
  103. package/.next/standalone/.next/server/middleware.js +122 -13
  104. package/.next/standalone/.next/server/pages/404.html +2 -2
  105. package/.next/standalone/.next/server/pages/500.html +1 -1
  106. package/.next/standalone/.next/server/proxy.js.map +1 -1
  107. package/.next/standalone/.next/server/server-reference-manifest.json +1 -1
  108. package/.next/standalone/.next/static/chunks/app/{page-473b39ec30c7f569.js → page-a7cae65f235e2942.js} +57 -1
  109. package/.next/standalone/.next/static/chunks/app/page-a7cae65f235e2942.js.map +1 -0
  110. package/.next/standalone/.next/static/css/{6f8b1a84bcbcd467.css → e57bdbbbb5a05779.css} +2 -2
  111. package/.next/standalone/.next/static/css/e57bdbbbb5a05779.css.map +1 -0
  112. package/.next/standalone/package.json +9 -1
  113. package/CHANGELOG.md +90 -0
  114. package/README.md +30 -2
  115. package/api/types.ts +8 -0
  116. package/app/api/v1/agents/[id]/route.ts +7 -0
  117. package/app/api/v1/agents/route.ts +7 -0
  118. package/app/api/v1/events/route.ts +8 -0
  119. package/app/api/v1/extensions/route.ts +2 -2
  120. package/app/api/v1/extensions/tools/[name]/secrets/route.ts +3 -3
  121. package/app/api/v1/health/route.ts +8 -0
  122. package/app/api/v1/models/route.ts +7 -0
  123. package/app/api/v1/page-capture/route.ts +8 -0
  124. package/app/api/v1/providers/route.ts +8 -0
  125. package/app/api/v1/threads/[thread_id]/route.ts +8 -0
  126. package/app/api/v1/threads/[thread_id]/run/route.ts +9 -0
  127. package/app/api/v1/threads/route.ts +7 -0
  128. package/app/api/v1/tools/route.ts +9 -0
  129. package/components/chat/ContextUsageBar.tsx +44 -0
  130. package/lib/agents/llm.ts +25 -2
  131. package/lib/agents/run-thread.ts +13 -1
  132. package/lib/agents/stream-collector.ts +9 -1
  133. package/lib/api/serializers.test.ts +15 -0
  134. package/lib/api/serializers.ts +8 -0
  135. package/lib/db/migrations.ts +15 -0
  136. package/lib/health/runner.test.ts +24 -2
  137. package/lib/mcp/registry.ts +14 -6
  138. package/lib/providers/anthropic.test.ts +95 -0
  139. package/lib/providers/anthropic.ts +106 -10
  140. package/lib/providers/jarela-chat-model.ts +9 -1
  141. package/lib/providers/known-context-windows.ts +21 -0
  142. package/lib/providers/types.ts +21 -1
  143. package/lib/stores/message-usage.test.ts +34 -0
  144. package/lib/stores/message-usage.ts +15 -3
  145. package/lib/stores/pricing.test.ts +52 -0
  146. package/lib/stores/pricing.ts +26 -1
  147. package/lib/tools/builtins.ts +4 -0
  148. package/lib/tools/extension-surfaces.test.ts +79 -0
  149. package/lib/tools/extension-surfaces.ts +153 -0
  150. package/lib/tools/index.ts +27 -8
  151. package/lib/tools/list-tools.test.ts +76 -0
  152. package/lib/tools/list-tools.ts +84 -0
  153. package/lib/tools/mcp-servers-info.test.ts +73 -0
  154. package/lib/tools/mcp-servers-info.ts +71 -0
  155. package/lib/tools/providers-info.test.ts +73 -0
  156. package/lib/tools/providers-info.ts +106 -0
  157. package/lib/tools/registry.ts +36 -25
  158. package/lib/tools/types.ts +13 -0
  159. package/package.json +9 -1
  160. package/.next/standalone/.next/server/chunks/947.js.map +0 -1
  161. package/.next/standalone/.next/static/chunks/app/page-473b39ec30c7f569.js.map +0 -1
  162. package/.next/standalone/.next/static/css/6f8b1a84bcbcd467.css.map +0 -1
  163. /package/.next/standalone/.next/static/{T0p2VVPsJPj44rwbmjaFb → d_vhp-lJqfdjRFpnLVIqZ}/_buildManifest.js +0 -0
  164. /package/.next/standalone/.next/static/{T0p2VVPsJPj44rwbmjaFb → d_vhp-lJqfdjRFpnLVIqZ}/_ssgManifest.js +0 -0
@@ -1,3 +1,10 @@
1
+ /**
2
+ * @public — `GET /api/v1/agents/[id]`, `PATCH /api/v1/agents/[id]`,
3
+ * `DELETE /api/v1/agents/[id]`
4
+ *
5
+ * Per-agent CRUD on a single config. See `docs/api.md`.
6
+ */
7
+
1
8
  import { NextRequest, NextResponse } from "next/server";
2
9
  import {
3
10
  getAgentConfig,
@@ -1,3 +1,10 @@
1
+ /**
2
+ * @public — `GET /api/v1/agents` (list), `POST /api/v1/agents` (upsert)
3
+ *
4
+ * Agent-config CRUD: identity, instructions, tool policy, model config.
5
+ * See `docs/api.md`.
6
+ */
7
+
1
8
  import { NextRequest } from "next/server";
2
9
  import {
3
10
  listAgentConfigs,
@@ -1,3 +1,11 @@
1
+ /**
2
+ * @public — `GET /api/v1/events` (Server-Sent Events)
3
+ *
4
+ * Live notification stream — run completion, watcher fires, queue
5
+ * progress, etc. UI subscribes here for real-time updates; external
6
+ * scripts can subscribe over the same SSE wire format. See `docs/api.md`.
7
+ */
8
+
1
9
  import { NextRequest } from "next/server";
2
10
  import { recentSince, subscribe } from "@/lib/notifications/bus";
3
11
  import { startScheduler } from "@/lib/scheduler";
@@ -4,13 +4,13 @@ import {
4
4
  } from "@/lib/providers/external";
5
5
  import { BUILTIN_PROVIDER_NAMES } from "@/lib/providers";
6
6
  import { loadExternalTools, getToolsDir } from "@/lib/tools/external";
7
- import { BUILTIN_TOOL_NAMES } from "@/lib/tools";
7
+ import { getBuiltinToolNames } from "@/lib/tools";
8
8
  import { describeToolSecrets } from "@/lib/stores/tool-secrets";
9
9
  import { cachedJson } from "@/lib/api/responses";
10
10
 
11
11
  export function GET() {
12
12
  const provs = loadExternalProvidersDetailed(BUILTIN_PROVIDER_NAMES);
13
- const tools = loadExternalTools(BUILTIN_TOOL_NAMES);
13
+ const tools = loadExternalTools(getBuiltinToolNames());
14
14
 
15
15
  return cachedJson({
16
16
  directories: {
@@ -13,7 +13,7 @@
13
13
  import { NextRequest, NextResponse } from "next/server";
14
14
  import { z } from "zod";
15
15
  import { loadExternalTools } from "@/lib/tools/external";
16
- import { BUILTIN_TOOL_NAMES } from "@/lib/tools";
16
+ import { getBuiltinToolNames } from "@/lib/tools";
17
17
  import {
18
18
  describeToolSecrets,
19
19
  setToolSecret,
@@ -31,7 +31,7 @@ export async function GET(
31
31
  { params }: { params: Promise<{ name: string }> },
32
32
  ) {
33
33
  const { name } = await params;
34
- const tools = loadExternalTools(BUILTIN_TOOL_NAMES);
34
+ const tools = loadExternalTools(getBuiltinToolNames());
35
35
  const slots = tools.secrets.get(name);
36
36
  if (!slots) {
37
37
  return NextResponse.json({ error: "tool not found" }, { status: 404 });
@@ -58,7 +58,7 @@ export async function PUT(
58
58
  );
59
59
  }
60
60
 
61
- const tools = loadExternalTools(BUILTIN_TOOL_NAMES);
61
+ const tools = loadExternalTools(getBuiltinToolNames());
62
62
  const slots = tools.secrets.get(name);
63
63
  if (!slots) {
64
64
  return NextResponse.json({ error: "tool not found" }, { status: 404 });
@@ -1,3 +1,11 @@
1
+ /**
2
+ * @public — `GET /api/v1/health`
3
+ *
4
+ * Liveness/readiness probe. Returns DB-path, agent count, and basic
5
+ * runtime metadata. The browser extension and external uptime monitors
6
+ * poll this. See `docs/api.md`.
7
+ */
8
+
1
9
  import { NextResponse } from "next/server";
2
10
  import { listAgentConfigs } from "@/lib/stores/agent-configs";
3
11
  import { DB_PATH, getDb } from "@/lib/db";
@@ -1,3 +1,10 @@
1
+ /**
2
+ * @public — `GET /api/v1/models` (list), `POST /api/v1/models` (upsert)
3
+ *
4
+ * Model-config catalog: per-model parameter presets that agents bind to
5
+ * by name (`model_config_name`). See `docs/api.md`.
6
+ */
7
+
1
8
  import { NextRequest } from "next/server";
2
9
  import { listModelConfigs, upsertModelConfig } from "@/lib/stores/model-config";
3
10
  import { errorResponse, createdResponse, cachedJson } from "@/lib/api/responses";
@@ -1,3 +1,11 @@
1
+ /**
2
+ * @public — `POST /api/v1/page-capture` (with CORS `OPTIONS` preflight)
3
+ *
4
+ * Browser-extension upload endpoint: receives the active page's URL,
5
+ * title, and selected/full text and routes it into the active thread.
6
+ * See `docs/api.md`.
7
+ */
8
+
1
9
  import { handlePageCapture, handlePageCaptureOptions } from "@/lib/api/page-capture";
2
10
 
3
11
  export const POST = handlePageCapture;
@@ -1,3 +1,11 @@
1
+ /**
2
+ * @public — `GET /api/v1/providers`
3
+ *
4
+ * Lists every registered LLM provider name (built-in + external `.cjs`
5
+ * plugins). The agent-callable equivalent is the `list_providers` tool.
6
+ * See `docs/api.md`.
7
+ */
8
+
1
9
  import { cachedJson } from "@/lib/api/responses";
2
10
  import { listProviderNames } from "@/lib/providers";
3
11
 
@@ -1,3 +1,11 @@
1
+ /**
2
+ * @public — `GET /api/v1/threads/[thread_id]` (read messages),
3
+ * `PATCH /api/v1/threads/[thread_id]` (rename / set agent),
4
+ * `DELETE /api/v1/threads/[thread_id]` (drop thread)
5
+ *
6
+ * Thread-level CRUD. See `docs/api.md`.
7
+ */
8
+
1
9
  import { NextRequest, NextResponse } from "next/server";
2
10
  import {
3
11
  deleteThread,
@@ -1,3 +1,12 @@
1
+ /**
2
+ * @public — `POST /api/v1/threads/[thread_id]/run` (submit run),
3
+ * `GET /api/v1/threads/[thread_id]/run` (subscribe via SSE)
4
+ *
5
+ * Agent execution endpoint. Submit a run, then stream tokens, tool
6
+ * calls, and final state. The split-and-subscribe shape lets reconnects
7
+ * pick up an in-flight stream. See `docs/api.md`.
8
+ */
9
+
1
10
  import { NextRequest } from "next/server";
2
11
  import type { StreamOptions, StreamChunk } from "@/lib/agents/base";
3
12
  import type { ContentPart } from "@/lib/tools/types";
@@ -1,3 +1,10 @@
1
+ /**
2
+ * @public — `GET /api/v1/threads` (list), `POST /api/v1/threads` (create)
3
+ *
4
+ * Thread lifecycle. Threads are the unit of conversation history; every
5
+ * agent run lives inside one. See `docs/api.md`.
6
+ */
7
+
1
8
  import { NextRequest, NextResponse } from "next/server";
2
9
  import { createThread, listThreads } from "@/lib/stores/threads";
3
10
  import { getAgentConfig } from "@/lib/stores/agent-configs";
@@ -1,3 +1,12 @@
1
+ /**
2
+ * @public — `GET /api/v1/tools`
3
+ *
4
+ * Lists every tool in the agent's pool — built-in, external (loaded from
5
+ * `~/.jarela/tools/*.cjs`), and MCP — with category, capability, source,
6
+ * and per-tool stats. The agent-callable equivalent is the `list_tools`
7
+ * tool. See `docs/api.md`.
8
+ */
9
+
1
10
  import { NextResponse } from "next/server";
2
11
  import { getAllToolsAsync, getToolCategory, getToolCapability, getToolGroup, getToolSource } from "@/lib/tools";
3
12
  import { cachedJson } from "@/lib/api/responses";
@@ -69,6 +69,14 @@ export function ContextUsageBar({ usage, fallbackContextWindow }: Props) {
69
69
  const warmUsed = usage.warm_tokens!;
70
70
  const factsUsed = usage.facts_tokens!;
71
71
  const overheadUsed = usage.overhead_tokens!;
72
+ // Anthropic prompt-cache breakdown (ADR-0062). Disjoint from
73
+ // hot/warm/facts/overhead: those tiers count fresh input, while these
74
+ // count tokens served from / written to the prompt cache. Surface them
75
+ // in the tooltip and expanded panel so the user can see when caching
76
+ // is firing for this turn.
77
+ const cacheRead = usage.cache_read_input_tokens ?? 0;
78
+ const cacheCreation = usage.cache_creation_input_tokens ?? 0;
79
+ const cacheActive = cacheRead > 0 || cacheCreation > 0;
72
80
 
73
81
  // Overhead's "budget" is whatever it actually consumed — there's no slider
74
82
  // for it. Shown as a fixed-size segment so it doesn't visually compete
@@ -90,6 +98,13 @@ export function ContextUsageBar({ usage, fallbackContextWindow }: Props) {
90
98
  `Context window: ${cap.toLocaleString()} tokens (the model's full capacity)`,
91
99
  `This turn's prompt used ${(hotUsed + warmUsed + factsUsed + overheadUsed).toLocaleString()} tokens`,
92
100
  `Reply generated: ${usage.output_tokens.toLocaleString()} tokens`,
101
+ ...(cacheActive
102
+ ? [
103
+ "",
104
+ `Prompt cache: ${cacheRead.toLocaleString()} read · ${cacheCreation.toLocaleString()} written`,
105
+ "(cache reads bill at 0.1× input, writes at 1.25×)",
106
+ ]
107
+ : []),
93
108
  "",
94
109
  "Each coloured slot's width = budget for that tier; filled portion = actually used.",
95
110
  "Red = tier overflowed its budget. Grey tail = headroom reserved for the reply.",
@@ -104,12 +119,41 @@ export function ContextUsageBar({ usage, fallbackContextWindow }: Props) {
104
119
  {trailing > 0 && <div className="h-full bg-surface-3" style={{ width: `${toPct(trailing)}%` }} aria-hidden title={`Reserved for reply: ${trailing.toLocaleString()} tokens (${Math.round((trailing/cap)*100)}% of window)`} />}
105
120
  </div>
106
121
  </button>
122
+ {cacheActive && !showDetails && (
123
+ <div
124
+ className="mt-0.5 px-2 text-[10px] text-violet-500/80"
125
+ title={[
126
+ "Prompt cache (ADR-0062). Reads bill at 0.1× input, writes at 1.25×.",
127
+ cacheRead > 0 ? `${cacheRead.toLocaleString()} tokens served from cache.` : "",
128
+ cacheCreation > 0 ? `${cacheCreation.toLocaleString()} tokens written to cache.` : "",
129
+ ].filter(Boolean).join("\n")}
130
+ >
131
+ {cacheRead > 0 && <>cache hit · {fmtTokens(cacheRead)} read</>}
132
+ {cacheRead > 0 && cacheCreation > 0 && " · "}
133
+ {cacheCreation > 0 && <>cache write · {fmtTokens(cacheCreation)}</>}
134
+ </div>
135
+ )}
107
136
  {showDetails && (
108
137
  <div className="mt-1 px-2 pb-1 grid grid-cols-2 gap-x-3 gap-y-0.5 text-[10px] text-fg-faint">
109
138
  <Row label="Hot" color="text-accent" used={hotUsed} budget={hotBudget} hint="Recent messages kept verbatim" />
110
139
  <Row label="Warm" color="text-amber-500" used={warmUsed} budget={warmBudget} hint="Older history compressed into rolling summary" />
111
140
  <Row label="Facts" color="text-teal-500" used={factsUsed} budget={factsBudget} hint="Retrieved long-term memory + recall snippets" />
112
141
  <Row label="Overhead" color="text-fg-muted" used={overheadUsed} budget={overheadUsed} hint="System prompt + per-message scaffolding" />
142
+ {cacheActive && (
143
+ <span
144
+ className="col-span-2 text-violet-500"
145
+ title={[
146
+ "Prompt cache (ADR-0062). Disjoint from the tiers above.",
147
+ `Read ${cacheRead.toLocaleString()} tokens — billed at 0.1× input rate.`,
148
+ `Wrote ${cacheCreation.toLocaleString()} tokens — billed at 1.25× input rate.`,
149
+ "Reads pay off on subsequent turns; writes are an investment.",
150
+ ].join("\n")}
151
+ >
152
+ <span className="text-violet-500">Cache</span>{" "}
153
+ read {fmtTokens(cacheRead)}
154
+ {cacheCreation > 0 ? ` · created ${fmtTokens(cacheCreation)}` : ""}
155
+ </span>
156
+ )}
113
157
  <span
114
158
  className="col-span-2 mt-0.5 border-t border-border pt-0.5"
115
159
  title={`Output: tokens the model generated in its reply.\nWindow: total context capacity of this model.`}
package/lib/agents/llm.ts CHANGED
@@ -134,6 +134,12 @@ export async function* streamWithConfig(
134
134
  // JarelaChatModel; we sum them so the final figure covers the whole turn.
135
135
  let usageInputTokens = 0;
136
136
  let usageOutputTokens = 0;
137
+ // PR #181 + cache-fidelity follow-up: Anthropic prompt-cache reads/writes
138
+ // arrive as a separate breakdown via `input_token_details`. Sum them
139
+ // independently so the dashboard can report cost correctly (cache reads
140
+ // are 10× cheaper, cache writes 1.25× more expensive than fresh input).
141
+ let usageCacheCreationTokens = 0;
142
+ let usageCacheReadTokens = 0;
137
143
  let sawUsage = false;
138
144
  // Tracks whether the model hit max_tokens mid-stream. JarelaChatModel tags
139
145
  // the final chunk with additional_kwargs.stop_reason="length" when this
@@ -190,6 +196,11 @@ export async function* streamWithConfig(
190
196
  if (usage && (usage.input_tokens > 0 || usage.output_tokens > 0)) {
191
197
  usageInputTokens += usage.input_tokens ?? 0;
192
198
  usageOutputTokens += usage.output_tokens ?? 0;
199
+ const details = usage.input_token_details;
200
+ if (details) {
201
+ usageCacheCreationTokens += details.cache_creation ?? 0;
202
+ usageCacheReadTokens += details.cache_read ?? 0;
203
+ }
193
204
  sawUsage = true;
194
205
  }
195
206
  if (typeof chunk.content === "string" && chunk.content) {
@@ -269,7 +280,13 @@ export async function* streamWithConfig(
269
280
  data: {
270
281
  message_id: `llm-${threadId}-${Date.now()}`,
271
282
  usage: sawUsage
272
- ? { input_tokens: usageInputTokens, output_tokens: usageOutputTokens, source: "provider" }
283
+ ? {
284
+ input_tokens: usageInputTokens,
285
+ output_tokens: usageOutputTokens,
286
+ cache_creation_input_tokens: usageCacheCreationTokens,
287
+ cache_read_input_tokens: usageCacheReadTokens,
288
+ source: "provider",
289
+ }
273
290
  : { input_tokens: 0, output_tokens: totalOutputTokens, source: "estimate" },
274
291
  provider: cfg.provider,
275
292
  model_id: cfg.model_id,
@@ -375,7 +392,13 @@ export async function* streamWithConfig(
375
392
  data: {
376
393
  message_id: `llm-${threadId}-${Date.now()}`,
377
394
  usage: sawUsage
378
- ? { input_tokens: usageInputTokens, output_tokens: usageOutputTokens, source: "provider" }
395
+ ? {
396
+ input_tokens: usageInputTokens,
397
+ output_tokens: usageOutputTokens,
398
+ cache_creation_input_tokens: usageCacheCreationTokens,
399
+ cache_read_input_tokens: usageCacheReadTokens,
400
+ source: "provider",
401
+ }
379
402
  : { input_tokens: 0, output_tokens: totalOutputTokens, source: "estimate" },
380
403
  provider: cfg.provider,
381
404
  model_id: cfg.model_id,
@@ -464,6 +464,11 @@ async function* stallRetryStream(
464
464
  export interface AssistantUsageSnapshot {
465
465
  input_tokens: number;
466
466
  output_tokens: number;
467
+ // Anthropic prompt-cache breakdown (PR #181). Disjoint from input_tokens:
468
+ // total billable input = input_tokens + cache_creation + cache_read,
469
+ // priced at 1×, 1.25×, and 0.1× the input rate respectively.
470
+ cache_creation_input_tokens?: number;
471
+ cache_read_input_tokens?: number;
467
472
  provider: string;
468
473
  model_id: string;
469
474
  model_config_name: string | null;
@@ -566,8 +571,13 @@ export function persistAssistantMessage(
566
571
  const rates = hasProviderUsage
567
572
  ? modelRatesFor(tables, usage!.provider, usage!.model_id)
568
573
  : { inputPer1M: null, outputPer1M: null };
574
+ const cacheCreation = hasProviderUsage ? (usage!.cache_creation_input_tokens ?? 0) : 0;
575
+ const cacheRead = hasProviderUsage ? (usage!.cache_read_input_tokens ?? 0) : 0;
569
576
  const cost = hasProviderUsage
570
- ? estimateCostUsd(usage!.input_tokens, usage!.output_tokens, rates)
577
+ ? estimateCostUsd(usage!.input_tokens, usage!.output_tokens, rates, {
578
+ cache_creation_input_tokens: cacheCreation,
579
+ cache_read_input_tokens: cacheRead,
580
+ })
571
581
  : 0;
572
582
  recordMessageUsage({
573
583
  message_id: row.msg_id,
@@ -582,6 +592,8 @@ export function persistAssistantMessage(
582
592
  input_rate_usd_per_mtok: rates.inputPer1M,
583
593
  output_rate_usd_per_mtok: rates.outputPer1M,
584
594
  cost_usd: cost,
595
+ cache_creation_input_tokens: cacheCreation > 0 ? cacheCreation : null,
596
+ cache_read_input_tokens: cacheRead > 0 ? cacheRead : null,
585
597
  tier_usage: contextSnapshot
586
598
  ? {
587
599
  hot_tokens: contextSnapshot.hot_tokens,
@@ -81,7 +81,13 @@ export async function collectStream(
81
81
  }
82
82
  case "done": {
83
83
  const d = chunk.data as {
84
- usage?: { input_tokens?: number; output_tokens?: number; source?: string };
84
+ usage?: {
85
+ input_tokens?: number;
86
+ output_tokens?: number;
87
+ cache_creation_input_tokens?: number;
88
+ cache_read_input_tokens?: number;
89
+ source?: string;
90
+ };
85
91
  provider?: string;
86
92
  model_id?: string;
87
93
  model_config_name?: string | null;
@@ -90,6 +96,8 @@ export async function collectStream(
90
96
  result.usage = {
91
97
  input_tokens: d.usage.input_tokens ?? 0,
92
98
  output_tokens: d.usage.output_tokens ?? 0,
99
+ cache_creation_input_tokens: d.usage.cache_creation_input_tokens ?? 0,
100
+ cache_read_input_tokens: d.usage.cache_read_input_tokens ?? 0,
93
101
  provider: d.provider,
94
102
  model_id: d.model_id,
95
103
  model_config_name: d.model_config_name ?? null,
@@ -161,6 +161,8 @@ function makeUsageRow(overrides: Partial<MessageUsageRow> = {}): MessageUsageRow
161
161
  warm_budget_tokens: 20_000,
162
162
  facts_budget_tokens: 10_000,
163
163
  context_window_tokens: 100_000,
164
+ cache_creation_input_tokens: null,
165
+ cache_read_input_tokens: null,
164
166
  ...overrides,
165
167
  };
166
168
  }
@@ -198,6 +200,10 @@ describe("messageUsageToResponse", () => {
198
200
  warm_budget_tokens: 20_000,
199
201
  facts_budget_tokens: 10_000,
200
202
  context_window_tokens: 100_000,
203
+ // Anthropic prompt-cache breakdown carries through. NULL by default
204
+ // (legacy rows + non-Anthropic providers) — see PR #181 follow-up.
205
+ cache_creation_input_tokens: null,
206
+ cache_read_input_tokens: null,
201
207
  });
202
208
  // Cost + provenance fields stay server-side; the bar doesn't need them.
203
209
  expect(out).not.toHaveProperty("cost_usd");
@@ -205,6 +211,15 @@ describe("messageUsageToResponse", () => {
205
211
  expect(out).not.toHaveProperty("agent_id");
206
212
  });
207
213
 
214
+ it("surfaces Anthropic cache token breakdown when populated", () => {
215
+ const out = messageUsageToResponse(makeUsageRow({
216
+ cache_creation_input_tokens: 4_000,
217
+ cache_read_input_tokens: 80_000,
218
+ }))!;
219
+ expect(out.cache_creation_input_tokens).toBe(4_000);
220
+ expect(out.cache_read_input_tokens).toBe(80_000);
221
+ });
222
+
208
223
  it("preserves NULL tier columns for legacy snapshots", () => {
209
224
  const out = messageUsageToResponse(makeUsageRow({
210
225
  hot_tokens: null, warm_tokens: null, facts_tokens: null, overhead_tokens: null,
@@ -78,6 +78,12 @@ export function mcpServerToResponse(r: McpServerRow) {
78
78
  * chat panel's `ContextUsageBar` consumes. Returns `null` for messages
79
79
  * that have no snapshot (user turns and legacy assistant rows recorded
80
80
  * before the per-turn snapshot landed in ADR-0041).
81
+ *
82
+ * Anthropic prompt-cache tokens (PR #181 + the cache-fidelity follow-up)
83
+ * are surfaced as additive fields so future UI work can render a
84
+ * "served from cache" badge without another wire change. Both are
85
+ * `null` for rows that predate cache plumbing or for providers that
86
+ * don't expose a cache breakdown.
81
87
  */
82
88
  export function messageUsageToResponse(u: MessageUsageRow | undefined | null) {
83
89
  if (!u) return null;
@@ -92,6 +98,8 @@ export function messageUsageToResponse(u: MessageUsageRow | undefined | null) {
92
98
  warm_budget_tokens: u.warm_budget_tokens,
93
99
  facts_budget_tokens: u.facts_budget_tokens,
94
100
  context_window_tokens: u.context_window_tokens,
101
+ cache_creation_input_tokens: u.cache_creation_input_tokens,
102
+ cache_read_input_tokens: u.cache_read_input_tokens,
95
103
  };
96
104
  }
97
105
 
@@ -283,6 +283,7 @@ export function runMigrations(db: DatabaseSync): void {
283
283
  ensureScheduledTasksReactionKindColumns(db);
284
284
  ensureMessageUsageTable(db);
285
285
  ensureMessageUsageTierColumns(db);
286
+ ensureMessageUsageCacheColumns(db);
286
287
  ensureThreadContextPinColumns(db);
287
288
  ensureThreadChannelSummariesTable(db);
288
289
  seedModelConfigs(db);
@@ -728,6 +729,20 @@ function ensureMessageUsageTierColumns(db: DatabaseSync): void {
728
729
  if (!names.has("context_window_tokens")) db.exec("ALTER TABLE message_usage ADD COLUMN context_window_tokens INTEGER");
729
730
  }
730
731
 
732
+ // PR #181 enabled Anthropic prompt caching, but the per-turn usage snapshot
733
+ // only captured `input_tokens` / `output_tokens`. Anthropic returns cache
734
+ // reads and writes as separate counts (priced at 0.1× and 1.25× the input
735
+ // rate respectively), so without these columns the dashboard underreports
736
+ // cost on cache-creating turns and *over*reports on cache-hitting turns.
737
+ // Both columns are nullable: legacy rows and non-Anthropic providers leave
738
+ // them NULL.
739
+ function ensureMessageUsageCacheColumns(db: DatabaseSync): void {
740
+ const cols = db.prepare("PRAGMA table_info(message_usage)").all() as Array<{ name: string }>;
741
+ const names = new Set(cols.map((c) => c.name));
742
+ if (!names.has("cache_creation_input_tokens")) db.exec("ALTER TABLE message_usage ADD COLUMN cache_creation_input_tokens INTEGER");
743
+ if (!names.has("cache_read_input_tokens")) db.exec("ALTER TABLE message_usage ADD COLUMN cache_read_input_tokens INTEGER");
744
+ }
745
+
731
746
  function seedAgentConfigs(db: DatabaseSync): void {
732
747
  // Only seed on first run — once the user has any agents we must not
733
748
  // resurrect ones they've deleted (e.g. the legacy "echo" / "llm" defaults).
@@ -46,8 +46,30 @@ describe("health runner", () => {
46
46
  for (const k of ["atlassian", "github", "google", "gmail", "outlook", "anthropic", "jira_align"]) {
47
47
  deleteMemory("integrations", k);
48
48
  }
49
- delete process.env.OPENAI_API_KEY;
50
- delete process.env.DEEPSEEK_API_KEY;
49
+ // Several integrations have env-var fallbacks that bypass the
50
+ // `integrations` namespace (`_resolveGithubAuth` reads GITHUB_TOKEN /
51
+ // GH_TOKEN before falling back to the store; Atlassian and Jira Align
52
+ // do the same with their respective vars). The test runs in whatever
53
+ // shell the developer happens to have, which often has GITHUB_TOKEN
54
+ // set for the gh CLI — leaving an "unconfigured" assertion silently
55
+ // false. Clear them all here so each case opts in by setting only what
56
+ // it needs.
57
+ for (const v of [
58
+ "OPENAI_API_KEY",
59
+ "DEEPSEEK_API_KEY",
60
+ "GITHUB_TOKEN",
61
+ "GH_TOKEN",
62
+ "ATLASSIAN_URL",
63
+ "ATLASSIAN_EMAIL",
64
+ "ATLASSIAN_API_TOKEN",
65
+ "JIRA_ALIGN_URL",
66
+ "JIRA_ALIGN_TOKEN",
67
+ "GMAIL_CLIENT_ID",
68
+ "GMAIL_CLIENT_SECRET",
69
+ "GMAIL_REFRESH_TOKEN",
70
+ ]) {
71
+ delete process.env[v];
72
+ }
51
73
  });
52
74
 
53
75
  it("publishes a single alert on first failure (not one per cycle)", async () => {
@@ -1,9 +1,17 @@
1
- // MCP server registry types + variable substitution.
2
- //
3
- // Discovery is online via the official MCP Registry (see
4
- // `lib/mcp/upstream-registry.ts` and ADR-0013). Entries flow through this
5
- // module's types so the picker UI, install action, and `applyVariables`
6
- // substitution path stay decoupled from the upstream wire format.
1
+ /**
2
+ * @public
3
+ *
4
+ * MCP server registry types + variable substitution.
5
+ *
6
+ * Discovery is online via the official MCP Registry (see
7
+ * `lib/mcp/upstream-registry.ts` and ADR-0013). Entries flow through this
8
+ * module's types so the picker UI, install action, and `applyVariables`
9
+ * substitution path stay decoupled from the upstream wire format.
10
+ *
11
+ * All exports in this file are part of the package's stable public
12
+ * surface (per `package.json#exports`). Removing or breaking them counts
13
+ * as a breaking change under the deprecation policy in CONTRIBUTING.md.
14
+ */
7
15
 
8
16
  export interface RegistryVariable {
9
17
  /** Unique key used as `${key}` in spec strings or env values. */
@@ -0,0 +1,95 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import type Anthropic from "@anthropic-ai/sdk";
3
+ import {
4
+ withSystemCacheControl,
5
+ withToolsCacheControl,
6
+ withLastToolResultCacheControl,
7
+ } from "./anthropic";
8
+
9
+ describe("withSystemCacheControl", () => {
10
+ it("wraps non-empty text in a TextBlockParam with ephemeral cache_control", () => {
11
+ expect(withSystemCacheControl("you are helpful")).toEqual([
12
+ { type: "text", text: "you are helpful", cache_control: { type: "ephemeral" } },
13
+ ]);
14
+ });
15
+
16
+ it("returns undefined for empty string so the system field is omitted", () => {
17
+ expect(withSystemCacheControl("")).toBeUndefined();
18
+ });
19
+ });
20
+
21
+ describe("withToolsCacheControl", () => {
22
+ const tool = (name: string): Anthropic.Tool => ({
23
+ name,
24
+ description: "",
25
+ input_schema: { type: "object", properties: {} } as Anthropic.Tool.InputSchema,
26
+ });
27
+
28
+ it("returns the input unchanged when no tools are provided", () => {
29
+ expect(withToolsCacheControl([])).toEqual([]);
30
+ });
31
+
32
+ it("marks only the last tool with cache_control", () => {
33
+ const out = withToolsCacheControl([tool("a"), tool("b"), tool("c")]);
34
+ expect(out).toHaveLength(3);
35
+ expect((out[0] as Anthropic.Tool & { cache_control?: unknown }).cache_control).toBeUndefined();
36
+ expect((out[1] as Anthropic.Tool & { cache_control?: unknown }).cache_control).toBeUndefined();
37
+ expect((out[2] as Anthropic.Tool & { cache_control?: unknown }).cache_control).toEqual({ type: "ephemeral" });
38
+ });
39
+
40
+ it("does not mutate the input array", () => {
41
+ const tools = [tool("a"), tool("b")];
42
+ const snapshot = JSON.stringify(tools);
43
+ withToolsCacheControl(tools);
44
+ expect(JSON.stringify(tools)).toBe(snapshot);
45
+ });
46
+ });
47
+
48
+ describe("withLastToolResultCacheControl", () => {
49
+ it("returns messages unchanged when none contain a tool_result", () => {
50
+ const msgs: Anthropic.MessageParam[] = [
51
+ { role: "user", content: "hi" },
52
+ { role: "assistant", content: [{ type: "text", text: "hello" }] },
53
+ ];
54
+ expect(withLastToolResultCacheControl(msgs)).toEqual(msgs);
55
+ });
56
+
57
+ it("marks the last tool_result block in the last message that has one", () => {
58
+ const msgs: Anthropic.MessageParam[] = [
59
+ {
60
+ role: "user",
61
+ content: [
62
+ { type: "tool_result", tool_use_id: "t1", content: "old" },
63
+ ],
64
+ },
65
+ { role: "assistant", content: [{ type: "text", text: "thinking" }] },
66
+ {
67
+ role: "user",
68
+ content: [
69
+ { type: "tool_result", tool_use_id: "t2", content: "fresh-A" },
70
+ { type: "tool_result", tool_use_id: "t3", content: "fresh-B" },
71
+ ],
72
+ },
73
+ ];
74
+ const out = withLastToolResultCacheControl(msgs);
75
+ const lastMsgContent = out[out.length - 1].content as Anthropic.ContentBlockParam[];
76
+ expect((lastMsgContent[0] as { cache_control?: unknown }).cache_control).toBeUndefined();
77
+ expect((lastMsgContent[1] as { cache_control?: unknown }).cache_control).toEqual({ type: "ephemeral" });
78
+ // Older tool_result on prior message must remain unmarked — only the
79
+ // most recent breakpoint is needed for incremental within-turn caching.
80
+ const firstMsgContent = out[0].content as Anthropic.ContentBlockParam[];
81
+ expect((firstMsgContent[0] as { cache_control?: unknown }).cache_control).toBeUndefined();
82
+ });
83
+
84
+ it("does not mutate the input messages array", () => {
85
+ const msgs: Anthropic.MessageParam[] = [
86
+ {
87
+ role: "user",
88
+ content: [{ type: "tool_result", tool_use_id: "t1", content: "x" }],
89
+ },
90
+ ];
91
+ const snapshot = JSON.stringify(msgs);
92
+ withLastToolResultCacheControl(msgs);
93
+ expect(JSON.stringify(msgs)).toBe(snapshot);
94
+ });
95
+ });