@circuitwall/jarela 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/.next/standalone/.next/BUILD_ID +1 -1
  2. package/.next/standalone/.next/build-manifest.json +2 -2
  3. package/.next/standalone/.next/prerender-manifest.json +3 -3
  4. package/.next/standalone/.next/server/app/_global-error/page_client-reference-manifest.js +1 -1
  5. package/.next/standalone/.next/server/app/_global-error.html +1 -1
  6. package/.next/standalone/.next/server/app/_global-error.rsc +1 -1
  7. package/.next/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  8. package/.next/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  9. package/.next/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  10. package/.next/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  11. package/.next/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  12. package/.next/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  13. package/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  14. package/.next/standalone/.next/server/app/_not-found.html +1 -1
  15. package/.next/standalone/.next/server/app/_not-found.rsc +1 -1
  16. package/.next/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  17. package/.next/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  18. package/.next/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  19. package/.next/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  20. package/.next/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  21. package/.next/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  22. package/.next/standalone/.next/server/app/api/v1/bridges/[id]/chats/route.js +3 -3
  23. package/.next/standalone/.next/server/app/api/v1/bridges/[id]/chats/route.js.nft.json +1 -1
  24. package/.next/standalone/.next/server/app/api/v1/bridges/[id]/lookup/route.js +3 -3
  25. package/.next/standalone/.next/server/app/api/v1/bridges/[id]/lookup/route.js.nft.json +1 -1
  26. package/.next/standalone/.next/server/app/api/v1/bridges/[id]/pair/route.js +3 -3
  27. package/.next/standalone/.next/server/app/api/v1/bridges/[id]/pair/route.js.nft.json +1 -1
  28. package/.next/standalone/.next/server/app/api/v1/bridges/[id]/route.js +3 -3
  29. package/.next/standalone/.next/server/app/api/v1/bridges/[id]/route.js.nft.json +1 -1
  30. package/.next/standalone/.next/server/app/api/v1/bridges/[id]/status/route.js +3 -3
  31. package/.next/standalone/.next/server/app/api/v1/bridges/[id]/status/route.js.nft.json +1 -1
  32. package/.next/standalone/.next/server/app/api/v1/builtin-tools/route.js +218 -7
  33. package/.next/standalone/.next/server/app/api/v1/builtin-tools/route.js.map +1 -1
  34. package/.next/standalone/.next/server/app/api/v1/events/route.js +3 -3
  35. package/.next/standalone/.next/server/app/api/v1/events/route.js.nft.json +1 -1
  36. package/.next/standalone/.next/server/app/api/v1/extension/agents/route.js +8 -1
  37. package/.next/standalone/.next/server/app/api/v1/extension/agents/route.js.map +1 -1
  38. package/.next/standalone/.next/server/app/api/v1/extension/fill/route.js +8 -1
  39. package/.next/standalone/.next/server/app/api/v1/extension/fill/route.js.map +1 -1
  40. package/.next/standalone/.next/server/app/api/v1/extension/refine/route.js +8 -1
  41. package/.next/standalone/.next/server/app/api/v1/extension/refine/route.js.map +1 -1
  42. package/.next/standalone/.next/server/app/api/v1/extension/turn/route.js +8 -1
  43. package/.next/standalone/.next/server/app/api/v1/extension/turn/route.js.map +1 -1
  44. package/.next/standalone/.next/server/app/api/v1/extensions/route.js +2 -2
  45. package/.next/standalone/.next/server/app/api/v1/extensions/tools/[name]/secrets/route.js +2 -2
  46. package/.next/standalone/.next/server/app/api/v1/page-capture/route.js +37 -3
  47. package/.next/standalone/.next/server/app/api/v1/page-capture/route.js.map +1 -1
  48. package/.next/standalone/.next/server/app/api/v1/tools/route.js +2 -2
  49. package/.next/standalone/.next/server/app/page.js +10 -18
  50. package/.next/standalone/.next/server/app/page.js.map +1 -1
  51. package/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -1
  52. package/.next/standalone/.next/server/app/setup/page_client-reference-manifest.js +1 -1
  53. package/.next/standalone/.next/server/chunks/210.js +1 -1
  54. package/.next/standalone/.next/server/chunks/239.js +5335 -5230
  55. package/.next/standalone/.next/server/chunks/239.js.map +1 -1
  56. package/.next/standalone/.next/server/chunks/{1683.js → 241.js} +210 -36
  57. package/.next/standalone/.next/server/chunks/241.js.map +1 -0
  58. package/.next/standalone/.next/server/chunks/{8135.js → 2539.js} +218 -36
  59. package/.next/standalone/.next/server/chunks/2539.js.map +1 -0
  60. package/.next/standalone/.next/server/chunks/4631.js +218 -7
  61. package/.next/standalone/.next/server/chunks/4631.js.map +1 -1
  62. package/.next/standalone/.next/server/chunks/8866.js +13389 -13073
  63. package/.next/standalone/.next/server/chunks/8866.js.map +1 -1
  64. package/.next/standalone/.next/server/chunks/9032.js +1 -1
  65. package/.next/standalone/.next/server/chunks/9032.js.map +1 -1
  66. package/.next/standalone/.next/server/middleware-build-manifest.js +2 -2
  67. package/.next/standalone/.next/server/pages/404.html +1 -1
  68. package/.next/standalone/.next/server/pages/500.html +1 -1
  69. package/.next/standalone/.next/server/server-reference-manifest.json +1 -1
  70. package/.next/standalone/.next/static/chunks/app/{page-62e0d5f2404b403b.js → page-74846c864241b96d.js} +11 -19
  71. package/.next/standalone/.next/static/chunks/app/page-74846c864241b96d.js.map +1 -0
  72. package/.next/standalone/package.json +2 -1
  73. package/CHANGELOG.md +98 -0
  74. package/README.md +51 -26
  75. package/components/chat/InputBar.tsx +10 -1
  76. package/components/ui/BootScreen.tsx +0 -10
  77. package/lib/agents/agent-turn.ts +9 -0
  78. package/lib/agents/prepare/request.ts +9 -0
  79. package/lib/agents/run-thread.ts +9 -1
  80. package/lib/api/extension-turn.ts +7 -0
  81. package/lib/api/page-capture.test.ts +58 -0
  82. package/lib/api/page-capture.ts +31 -1
  83. package/lib/bridges/attachment-store.test.ts +440 -0
  84. package/lib/bridges/attachment-store.ts +184 -0
  85. package/lib/bridges/whatsapp.ts +50 -32
  86. package/lib/tools/async-results-tool.ts +114 -0
  87. package/lib/tools/async-results.test.ts +481 -0
  88. package/lib/tools/async-results.ts +165 -0
  89. package/lib/tools/builtins.ts +1 -0
  90. package/lib/tools/wallclock.ts +114 -8
  91. package/package.json +2 -1
  92. package/.next/standalone/.next/server/chunks/1683.js.map +0 -1
  93. package/.next/standalone/.next/server/chunks/8135.js.map +0 -1
  94. package/.next/standalone/.next/static/chunks/app/page-62e0d5f2404b403b.js.map +0 -1
  95. /package/.next/standalone/.next/static/{2xWP8843jbntFGKLnHK6R → AV5AO0yTRABo-NgwxhDe7}/_buildManifest.js +0 -0
  96. /package/.next/standalone/.next/static/{2xWP8843jbntFGKLnHK6R → AV5AO0yTRABo-NgwxhDe7}/_ssgManifest.js +0 -0
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@circuitwall/jarela",
3
- "version": "1.2.0",
3
+ "version": "1.4.0",
4
4
  "description": "Jarela — local chat interface for LangGraph agents (multi-provider, single-process, SQLite-backed).",
5
5
  "license": "Apache-2.0",
6
6
  "author": "Andrew Ge Wu",
@@ -100,6 +100,7 @@
100
100
  "test:live:isolated:full": "node scripts/live-test-isolated.mjs --llm",
101
101
  "test:e2e": "playwright test",
102
102
  "test:e2e:ui": "playwright test --ui",
103
+ "promo:record": "node scripts/promo-record.mjs",
103
104
  "release:docker": "node scripts/release-docker.mjs",
104
105
  "release:docker:dry": "node scripts/release-docker.mjs --dry-run"
105
106
  },
package/CHANGELOG.md CHANGED
@@ -7,6 +7,104 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [1.4.0] - 2026-06-08
11
+
12
+ ### Added
13
+
14
+ - **Browser-extension element screenshot.** The page-capture flow now
15
+ ships a cropped PNG of the picked element alongside the text. The
16
+ content script grabs the visible viewport via
17
+ `chrome.tabs.captureVisibleTab` (loopback only, via the service worker)
18
+ and crops it to the element's bounding rect through `OffscreenCanvas`
19
+ at `devicePixelRatio`. The server validates the base64 payload (≤ 4 MB
20
+ encoded), persists the user message as a multipart `ContentPart[]` of
21
+ `[text, image]` so the bubble renders the picture inline, and
22
+ forwards the image part to the silent observer turn so vision-capable
23
+ agents see it on the immediate follow-up run. Falls back cleanly to
24
+ text-only capture if the snapshot is denied. See
25
+ [`docs/api.md`](./docs/api.md#post-apiv1page-capture) for the updated
26
+ request schema.
27
+ - **Promo video recorder.** `npm run promo:record` (via
28
+ [`scripts/promo-record.mjs`](./scripts/promo-record.mjs)) drives your
29
+ real local install in a 9:16 vertical PWA viewport and records a
30
+ dark-theme `.webm` of the tap-to-unlock intro, agent picker, a
31
+ human-paced chat turn, and a tour of every side panel. First run
32
+ saves auth state to `promo/.storage.json` and reuses it thereafter.
33
+
34
+ ## [1.3.0] - 2026-06-08
35
+
36
+ Two new agent capabilities and a hardening pass on tool wall-clocks.
37
+ Bridge adapters (WhatsApp today) now spill large remote attachments
38
+ to a local store instead of inlining them into the LLM context, and
39
+ the agent picks them up by path through ``file_read``. Long-running
40
+ tool calls can now be fired asynchronously: the LLM gets a tracking
41
+ key back immediately and pulls the result later via a new built-in.
42
+
43
+ ### Added
44
+
45
+ - **Bridge attachment spill store**
46
+ ([#215](https://github.com/CircuitWall/jarela/pull/215)). Inbound
47
+ bridge messages no longer base64-inline every document, voice note,
48
+ audio, or video into the next prompt. Buffers are persisted under
49
+ ``<dataDir>/bridge-attachments/<bridge>/<YYYY-MM-DD>/<id>-<name>``
50
+ with sanitised paths, an SHA-256, and a future-facing
51
+ ``pruneBridgeAttachments({ maxAgeMs })`` helper; the prompt body
52
+ carries a text pointer telling the agent to use ``file_read`` to
53
+ inspect the contents. Images and stickers ≤ 1 MB still inline so
54
+ vision works out of the box.
55
+ - **Async tool execution (``async_run`` wrapper + ``tool_result_get``)**
56
+ ([#216](https://github.com/CircuitWall/jarela/pull/216)). Every
57
+ tool's schema now exposes an optional ``async_run: boolean``. When
58
+ set, the wrapper returns ``{ok, async, key, tool, started_at,
59
+ deadline_ms, hint}`` immediately and runs the work detached; the
60
+ LLM picks the result up via the new built-in
61
+ ``tool_result_get(key, wait_ms?, consume?)``. ``tool_result_list``
62
+ returns summaries without dumping result bodies. In-process store
63
+ with a 10-minute TTL and a 256-entry cap (oldest finished evicted
64
+ first, then oldest pending with a warn).
65
+
66
+ ### Changed
67
+
68
+ - **Hard ceiling on tool ``deadline_ms``**
69
+ ([#216](https://github.com/CircuitWall/jarela/pull/216)). The
70
+ wall-clock budget the LLM can pick is now clamped to 30 minutes by
71
+ default. Values above the ceiling are clamped and a one-line
72
+ ``console.warn`` is emitted naming the tool, the requested value,
73
+ and the ceiling. Operators can raise or lower the cap with the new
74
+ ``JARELA_TOOL_MAX_DEADLINE_MS`` environment variable (integer
75
+ milliseconds). Applies to both sync and ``async_run`` paths.
76
+
77
+ ### Fixed
78
+
79
+ - **E2E menu specs no longer race the boot agent picker**
80
+ ([#217](https://github.com/CircuitWall/jarela/pull/217)). Three
81
+ Playwright specs (``layout``, ``credentials``, ``setup-reorg``)
82
+ were intermittently failing because the BootScreen overlay
83
+ intercepted clicks on the header menu button. A new
84
+ ``waitForAppReady(page)`` helper picks the default agent tile and
85
+ waits for the overlay to detach before the test drives the UI.
86
+
87
+ ### Configuration
88
+
89
+ - ``JARELA_TOOL_MAX_DEADLINE_MS`` — overrides the per-tool
90
+ wall-clock ceiling (default 1800000 ms / 30 min). Set to a smaller
91
+ value to tighten the cap, or larger if a regulated workload genuinely
92
+ needs long synchronous calls.
93
+
94
+ Two follow-up fixes on top of 1.2.0.
95
+
96
+ ### Fixed
97
+
98
+ - **Boot agent picker always shows after login**
99
+ ([#213](https://github.com/CircuitWall/jarela/pull/213)). The picker
100
+ was being skipped in some session states; it now reliably appears so
101
+ the user actively chooses an agent at boot instead of silently
102
+ inheriting one.
103
+ - **Extension UX polish on one-shot turns**
104
+ ([#212](https://github.com/CircuitWall/jarela/pull/212)). Custom
105
+ intent collapses by default, Enter submits, writes are queued, and
106
+ one-shot turns drop the quality gates that didn't apply to them.
107
+
10
108
  ## [1.2.0] - 2026-06-08
11
109
 
12
110
  Security, runtime resilience, and a broad UI consolidation pass.
package/README.md CHANGED
@@ -1,29 +1,31 @@
1
-
2
-
3
-
4
- <p align="center">
5
- <img src="./public/logo-mark-transparent.png" alt="Jarela" width="160" />
1
+ <p align="center">
2
+ <picture>
3
+ <source media="(prefers-color-scheme: dark)" srcset="./public/icon-512.png" />
4
+ <img src="./public/icon-512-light.png" alt="Jarela" width="140" />
5
+ </picture>
6
6
  </p>
7
7
 
8
8
  <h1 align="center">Jarela</h1>
9
9
 
10
10
  <p align="center">
11
- <b>A local-first, browser-based GUI for orchestrating multi-provider LLM agents.</b><br/>
12
- <sub>Next.js 16 + LangGraph + SQLite. PWA-installable. No cloud backend, no telemetry.</sub>
11
+ <b>A local-first, browser-based GUI for orchestrating multi-provider LLM agents.</b>
12
+ </p>
13
+ <p align="center">
14
+ <sub>Next.js 16 &middot; LangGraph &middot; SQLite &middot; PWA-installable &middot; no cloud backend, no telemetry</sub>
13
15
  </p>
14
16
 
15
17
  <p align="center">
16
- <a href="#quick-start">Quick start</a> ·
17
- <a href="#configuration-guide-home--work">Config guide</a> ·
18
- <a href="#supported-platforms">Platforms</a> ·
19
- <a href="#features">Features</a> ·
20
- <a href="#productivity-stacks-google--microsoft-at-parity">Google + Microsoft</a> ·
21
- <a href="#built-in-toolbelt">Tools</a> ·
22
- <a href="#providers">Providers</a> ·
23
- <a href="#connections">Connections</a> ·
24
- <a href="./docs/EXTENDING.md">Extending</a> ·
25
- <a href="./docs/ARCHITECTURE.md">Architecture</a> ·
26
- <a href="./CONTRIBUTING.md">Contributing</a> ·
18
+ <a href="#quick-start">Quick start</a> &middot;
19
+ <a href="#configuration-guide-home--work">Config guide</a> &middot;
20
+ <a href="#supported-platforms">Platforms</a> &middot;
21
+ <a href="#features">Features</a> &middot;
22
+ <a href="#productivity-stacks-google--microsoft-at-parity">Google + Microsoft</a> &middot;
23
+ <a href="#built-in-toolbelt">Tools</a> &middot;
24
+ <a href="#providers">Providers</a> &middot;
25
+ <a href="#connections">Connections</a> &middot;
26
+ <a href="./docs/EXTENDING.md">Extending</a> &middot;
27
+ <a href="./docs/ARCHITECTURE.md">Architecture</a> &middot;
28
+ <a href="./CONTRIBUTING.md">Contributing</a> &middot;
27
29
  <a href="#documentation">Docs</a>
28
30
  </p>
29
31
 
@@ -51,15 +53,16 @@
51
53
  </a>
52
54
  </p>
53
55
 
54
- ---
55
-
56
56
  <p align="center">
57
- <video src="https://github.com/user-attachments/assets/0f33f8d3-07bb-4850-9fcc-cfc97036f180" controls width="640" muted>
58
- Your browser doesn't support embedded video.
59
- <a href="https://github.com/user-attachments/assets/0f33f8d3-07bb-4850-9fcc-cfc97036f180">Download the clip</a>.
57
+ <video src="./docs/assets/jarela-promo.webm" poster="./docs/assets/jarela-promo-poster.jpg" controls muted playsinline width="320">
58
+ <img src="./docs/assets/jarela-promo-poster.jpg" alt="Jarela promo &mdash; PIN unlock, agent picker, chat, panel tour" width="320" />
60
59
  </video>
60
+ <br/>
61
+ <sub><i>Tap-to-unlock &rarr; agent picker &rarr; human-paced chat &rarr; full panel tour</i> &middot; <a href="./docs/assets/jarela-promo.webm">Download .webm</a></sub>
61
62
  </p>
62
63
 
64
+ ---
65
+
63
66
  ## Quick start
64
67
 
65
68
  Get to a working local agent in under 10 minutes:
@@ -239,9 +242,12 @@ create an Outlook Calendar invite in the same turn.
239
242
  **Memory**, **Documents**, **Profile**, **Bridges**, **Scheduled tasks**,
240
243
  and **Pending approvals**.
241
244
  - **Browser extension** ([`browser-extension/`](./browser-extension)) —
242
- Chrome MV3, click an element on any page and POST it to your local
243
- Jarela as a new user message (ADR-0018). Loopback only; toolbar icon
244
- greys out when Jarela isn't running.
245
+ Chrome MV3, click an element on any page and POST it (with a cropped
246
+ PNG of the picked element) to your local Jarela as a new user message
247
+ (ADR-0018). The screenshot is rendered inline in the chat bubble and
248
+ forwarded to vision-capable agents on the silent observer turn that
249
+ fires immediately after the capture. Loopback only; toolbar icon greys
250
+ out when Jarela isn't running.
245
251
 
246
252
  ### Operational
247
253
 
@@ -914,6 +920,25 @@ on every push and PR: `lint + tsc --noEmit + next build`, then the same
914
920
  live integration suite against the production server output. The build
915
921
  badge at the top of this README links straight to the latest run.
916
922
 
923
+ ## Recording a promo video
924
+
925
+ [scripts/promo-record.mjs](./scripts/promo-record.mjs) drives your real
926
+ local install (default `http://localhost:4312`) inside a 540&times;960
927
+ vertical (9:16) PWA viewport and records a `.webm` of a five-scene tour
928
+ in dark theme: a simulated PIN unlock, agent picker, a human-paced chat
929
+ turn, every side panel, and a closing pose.
930
+
931
+ ```bash
932
+ npm run dev # in one terminal
933
+ npm run promo:record # in another — output lands in ./promo/
934
+ ```
935
+
936
+ The first run opens a headed Chromium so you can manually unlock the
937
+ install if needed; the resulting auth state is saved to
938
+ `promo/.storage.json` and reused on every subsequent run. Override the
939
+ target with `JARELA_PROMO_URL`, the chat line with `JARELA_PROMO_MSG`,
940
+ or skip the actual send with `JARELA_PROMO_SKIP_CHAT=1`.
941
+
917
942
  ## Security
918
943
 
919
944
  - **CSRF / origin guard** ([lib/auth/access.ts](./lib/auth/access.ts))
@@ -73,6 +73,12 @@ function fileToContentPart(file: File): Promise<ContentPart> {
73
73
  });
74
74
  }
75
75
 
76
+ function attachmentKey(a: ContentPart, i: number): string {
77
+ if (a.type === "text") return `text:${i}:${a.text.length}`;
78
+ const name = a.type === "file" ? a.name : "";
79
+ return `${a.type}:${a.media_type}:${name}:${a.data.length}:${a.data.slice(0, 16)}`;
80
+ }
81
+
76
82
  export function InputBar({ attachments, onAttachmentsChange, onSubmit, onQueue, onStop, streaming, disabled, placeholder, voiceEnabled, agentId, onVoiceTranscript }: Props) {
77
83
  // Text state is intentionally LOCAL. Lifting it to ChatView would re-render
78
84
  // the entire message list (every MessageBubble + ReactMarkdown pass) on
@@ -268,7 +274,10 @@ export function InputBar({ attachments, onAttachmentsChange, onSubmit, onQueue,
268
274
  {attachments.length > 0 && (
269
275
  <div className="flex flex-wrap gap-2 mb-2">
270
276
  {attachments.map((a, i) => (
271
- <div key={i} className="relative group shrink-0">
277
+ // Content-derived key using the index reused DOM nodes when
278
+ // earlier attachments were removed, flashing the wrong preview
279
+ // (and the wrong filename) into the slot of the survivor.
280
+ <div key={attachmentKey(a, i)} className="relative group shrink-0">
272
281
  {a.type === "image" ? (
273
282
  // eslint-disable-next-line @next/next/no-img-element
274
283
  <img
@@ -150,16 +150,6 @@ export function BootScreen({ agents, agentsLoaded, activeAgentId, onPickAgent, s
150
150
  };
151
151
  }, [activeAgentId, pickedId, agentsLoaded, markStep]);
152
152
 
153
- // Returning users with a saved default skip the manual tile-click.
154
- useEffect(() => {
155
- if (suppressed) return;
156
- if (!agentsLoaded) return;
157
- if (activeAgentId || pickedId) return;
158
- if (!defaultAgent) return;
159
- setPickedId(defaultAgent.id);
160
- onPickAgent(defaultAgent.id);
161
- }, [suppressed, agentsLoaded, activeAgentId, pickedId, defaultAgent, onPickAgent]);
162
-
163
153
  if (done) return null;
164
154
  if (suppressed) return null;
165
155
 
@@ -41,6 +41,14 @@ export interface RunAgentTurnRequest {
41
41
  * the category default.
42
42
  */
43
43
  context_profile_override?: Partial<TurnContextProfile> | null;
44
+
45
+ /**
46
+ * Skip the stall-retry + strict-citation audit wrapper. One-shot
47
+ * callers (browser-extension fill / rewrite) want the raw assistant
48
+ * text without the `↻` separator or pre-retry stall prose that the
49
+ * wrapper would otherwise inject into the streamed content.
50
+ */
51
+ disable_quality_gates?: boolean;
44
52
  }
45
53
 
46
54
  export interface RunAgentTurnResult {
@@ -76,6 +84,7 @@ export async function runAgentTurn(req: RunAgentTurnRequest): Promise<RunAgentTu
76
84
  attachments: req.attachments,
77
85
  user_category: req.user_category ?? null,
78
86
  context_profile: contextProfile,
87
+ disable_quality_gates: req.disable_quality_gates,
79
88
  signal: active.abort.signal,
80
89
  _pinned_model_config_name: pinnedModelConfigName,
81
90
  _skip_persist_message: req.skip_persist_user_message,
@@ -41,6 +41,15 @@ export interface ThreadRunRequest {
41
41
  */
42
42
  context_profile?: TurnContextProfile;
43
43
 
44
+ /**
45
+ * Skip the post-stream stall-retry + strict-citation audit wrapper for
46
+ * this turn. Use for one-shot callers (browser-extension fill / rewrite)
47
+ * that consume `assistantContent` as raw text and would otherwise type
48
+ * the visible `↻` separator and the original stalled prose into the
49
+ * user's input field. Chat callers leave undefined.
50
+ */
51
+ disable_quality_gates?: boolean;
52
+
44
53
  /**
45
54
  * Internal - public callers leave undefined. When set by the submission
46
55
  * path, this freezes the effective model config for the turn so queued
@@ -292,8 +292,16 @@ export async function prepareThreadRun(req: ThreadRunRequest): Promise<PreparedT
292
292
  // Overhead = the assembled system prompt + per-message scaffolding, which
293
293
  // is more accurate than the budget's static overhead allowance.
294
294
  const overheadTokens = estimateTokens(systemPrompt);
295
+ // One-shot callers (extension fill/rewrite) consume `assistantContent` as
296
+ // raw text. The stall-retry wrapper would otherwise leak the `↻` separator
297
+ // and the pre-retry stalled prose into the user's input field, and the
298
+ // strict-citation audit (which lives inside the same wrapper) would do
299
+ // the same with retry continuations. Bypass it entirely for those callers.
300
+ const stream = req.disable_quality_gates
301
+ ? rawStream
302
+ : stallRetryStream(rawStream, req, allowedTools, retriesLeft);
295
303
  return {
296
- stream: stallRetryStream(rawStream, req, allowedTools, retriesLeft),
304
+ stream,
297
305
  thread_id: req.thread_id,
298
306
  context_snapshot: {
299
307
  context_window_tokens: historyWindow.budget.contextWindowTokens,
@@ -91,6 +91,13 @@ async function runExtensionAction(action: z.infer<typeof ExtensionAction>, input
91
91
  message: prompt,
92
92
  user_category: "extension",
93
93
  assistant_category: "extension",
94
+ // The extension types `assistantContent` directly into the user's
95
+ // input field. The stall-retry wrapper and the strict-citation audit
96
+ // would otherwise inject the `↻` separator, the original stalled
97
+ // prose, and audit-retry continuations into that text — pollution
98
+ // the user then has to manually scrub. Both gates are chat
99
+ // affordances; skip them for one-shot writes.
100
+ disable_quality_gates: true,
94
101
  });
95
102
 
96
103
  // Ping the events bus so any open chat view on this thread re-fetches.
@@ -307,3 +307,61 @@ describe("handlePageCapture — response shape", () => {
307
307
  });
308
308
  });
309
309
  });
310
+
311
+ describe("handlePageCapture — screenshot attachment", () => {
312
+ // 1x1 transparent PNG, base64-encoded (no data: prefix).
313
+ const tinyPng =
314
+ "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=";
315
+
316
+ it("rejects screenshot with invalid base64", async () => {
317
+ const res = await handlePageCapture(makeReq({ ...validBody, screenshot: "not base64!!" }));
318
+ expect(res.status).toBe(400);
319
+ expect(addMessageMock).not.toHaveBeenCalled();
320
+ });
321
+
322
+ it("rejects screenshot exceeding the size cap", async () => {
323
+ const huge = "A".repeat(4_000_001);
324
+ const res = await handlePageCapture(makeReq({ ...validBody, screenshot: huge }));
325
+ expect(res.status).toBe(400);
326
+ });
327
+
328
+ it("persists user message as a JSON ContentPart[] with text + image when screenshot is present", async () => {
329
+ const res = await handlePageCapture(makeReq({ ...validBody, screenshot: tinyPng }));
330
+ expect(res.status).toBe(200);
331
+ const stored = addMessageMock.mock.calls[0][2] as string;
332
+ const parsed = JSON.parse(stored) as Array<{ type: string; text?: string; media_type?: string; data?: string }>;
333
+ expect(Array.isArray(parsed)).toBe(true);
334
+ expect(parsed).toHaveLength(2);
335
+ expect(parsed[0]).toMatchObject({ type: "text" });
336
+ expect(parsed[0].text).toContain("Captured from");
337
+ expect(parsed[0].text).toContain("Screenshot attached.");
338
+ expect(parsed[1]).toEqual({ type: "image", media_type: "image/png", data: tinyPng });
339
+ });
340
+
341
+ it("forwards the screenshot as a vision attachment to the silent observer run", async () => {
342
+ await handlePageCapture(makeReq({ ...validBody, screenshot: tinyPng }));
343
+ expect(runAgentTurnMock).toHaveBeenCalledWith(expect.objectContaining({
344
+ attachments: [{ type: "image", media_type: "image/png", data: tinyPng }],
345
+ }));
346
+ });
347
+
348
+ it("honors a custom screenshotMediaType", async () => {
349
+ await handlePageCapture(makeReq({ ...validBody, screenshot: tinyPng, screenshotMediaType: "image/jpeg" }));
350
+ const stored = addMessageMock.mock.calls[0][2] as string;
351
+ const parsed = JSON.parse(stored) as Array<{ type: string; media_type?: string }>;
352
+ expect(parsed[1].media_type).toBe("image/jpeg");
353
+ });
354
+
355
+ it("keeps the legacy string-content path when no screenshot is sent", async () => {
356
+ await handlePageCapture(makeReq(validBody));
357
+ const stored = addMessageMock.mock.calls[0][2] as string;
358
+ // Not JSON-parseable as an array — it's the legacy plaintext body.
359
+ expect(() => JSON.parse(stored)).toThrow();
360
+ expect(stored).toContain("Captured from");
361
+ expect(stored).not.toContain("Screenshot attached.");
362
+ expect(runAgentTurnMock).toHaveBeenCalledWith(expect.objectContaining({
363
+ attachments: undefined,
364
+ }));
365
+ });
366
+ });
367
+
@@ -13,12 +13,18 @@ import {
13
13
  } from "@/lib/stores/agent-configs";
14
14
  import { publish } from "@/lib/notifications/bus";
15
15
  import { runAgentTurn } from "@/lib/agents/agent-turn";
16
+ import type { ContentPart } from "@/lib/tools/types";
16
17
 
17
18
  // 100KB UTF-8 cap on captured text. The LLM context window is the real
18
19
  // constraint; this cap exists to keep a runaway "<body>" pick from
19
20
  // trashing the conversation. See ADR-0018.
20
21
  export const MAX_TEXT_BYTES = 100_000;
21
22
 
23
+ // Hard cap on the inline element screenshot (base64 chars). 4 MB of
24
+ // base64 ≈ 3 MB decoded — generous for a single cropped element while
25
+ // still bounding the SQLite row and the LLM vision payload.
26
+ export const MAX_SCREENSHOT_B64 = 4_000_000;
27
+
22
28
  // Preamble prepended to the LLM call for the silent observer run.
23
29
  // The captured content is already persisted in the DB — this wrapper
24
30
  // instructs the agent to observe without replying, matching bridge
@@ -37,6 +43,13 @@ const Body = z.object({
37
43
  tagName: z.string().max(64).optional(),
38
44
  text: z.string(),
39
45
  capturedAt: z.string().datetime(),
46
+ // Optional base64-encoded PNG of just the picked element (no data: URL
47
+ // prefix). The content script crops `chrome.tabs.captureVisibleTab`
48
+ // to the element bounding box before sending. When present, it is
49
+ // attached to the persisted user message as an image ContentPart so
50
+ // the chat UI renders it inline and vision-capable agents can see it.
51
+ screenshot: z.string().regex(/^[A-Za-z0-9+/=]+$/).max(MAX_SCREENSHOT_B64).optional(),
52
+ screenshotMediaType: z.string().regex(/^image\/[a-z0-9.+-]+$/).max(64).optional(),
40
53
  });
41
54
 
42
55
  function truncateUtf8(s: string, maxBytes: number): { text: string; truncated: boolean; originalBytes: number } {
@@ -102,12 +115,14 @@ function composeBody(args: {
102
115
  text: string;
103
116
  truncated: boolean;
104
117
  originalBytes: number;
118
+ hasScreenshot?: boolean;
105
119
  }): string {
106
120
  const heading = args.title
107
121
  ? `📎 Captured from [${args.title}](${args.url})`
108
122
  : `📎 Captured from <${args.url}>`;
109
123
  const lines = [heading];
110
124
  if (args.selector) lines.push(`Element: \`${args.selector}\``);
125
+ if (args.hasScreenshot) lines.push("Screenshot attached.");
111
126
  if (args.truncated) {
112
127
  lines.push(`> ⚠ Truncated to ${MAX_TEXT_BYTES.toLocaleString()} bytes (original was ${args.originalBytes.toLocaleString()} bytes)`);
113
128
  }
@@ -158,9 +173,23 @@ export async function handlePageCapture(req: Request): Promise<Response> {
158
173
  text,
159
174
  truncated,
160
175
  originalBytes,
176
+ hasScreenshot: Boolean(input.screenshot),
161
177
  });
162
178
 
163
- const msg = addMessage(thread_id, "user", messageBody, undefined, "page_capture");
179
+ // When a screenshot is included, persist the user turn as a multipart
180
+ // ContentPart[] (text + image) — that's the same shape the chat UI and
181
+ // agent runner expect for inline images, so the picture renders in the
182
+ // bubble on reload and vision-capable models can see it on the silent
183
+ // observer turn. Without a screenshot we keep the legacy string body
184
+ // to avoid touching messages that never had an image.
185
+ const screenshotPart: ContentPart | null = input.screenshot
186
+ ? { type: "image", media_type: input.screenshotMediaType ?? "image/png", data: input.screenshot }
187
+ : null;
188
+ const storedContent: string = screenshotPart
189
+ ? JSON.stringify([{ type: "text", text: messageBody }, screenshotPart] satisfies ContentPart[])
190
+ : messageBody;
191
+
192
+ const msg = addMessage(thread_id, "user", storedContent, undefined, "page_capture");
164
193
 
165
194
  // Fire a silent observer run so the agent ingests the captured context
166
195
  // without being forced to reply — matching bridge silent/observer mode.
@@ -170,6 +199,7 @@ export async function handlePageCapture(req: Request): Promise<Response> {
170
199
  thread_id,
171
200
  queue_source: "extension",
172
201
  message: `${SILENT_CAPTURE_PREAMBLE}\n\n${messageBody}`,
202
+ attachments: screenshotPart ? [screenshotPart] : undefined,
173
203
  user_category: "page_capture",
174
204
  assistant_category: "page_capture",
175
205
  silent: true,