@circuitwall/jarela 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.next/standalone/.next/BUILD_ID +1 -1
- package/.next/standalone/.next/build-manifest.json +2 -2
- package/.next/standalone/.next/prerender-manifest.json +3 -3
- package/.next/standalone/.next/server/app/_global-error/page_client-reference-manifest.js +1 -1
- package/.next/standalone/.next/server/app/_global-error.html +1 -1
- package/.next/standalone/.next/server/app/_global-error.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
- package/.next/standalone/.next/server/app/_not-found.html +1 -1
- package/.next/standalone/.next/server/app/_not-found.rsc +1 -1
- package/.next/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/api/v1/bridges/[id]/chats/route.js +3 -3
- package/.next/standalone/.next/server/app/api/v1/bridges/[id]/chats/route.js.nft.json +1 -1
- package/.next/standalone/.next/server/app/api/v1/bridges/[id]/lookup/route.js +3 -3
- package/.next/standalone/.next/server/app/api/v1/bridges/[id]/lookup/route.js.nft.json +1 -1
- package/.next/standalone/.next/server/app/api/v1/bridges/[id]/pair/route.js +3 -3
- package/.next/standalone/.next/server/app/api/v1/bridges/[id]/pair/route.js.nft.json +1 -1
- package/.next/standalone/.next/server/app/api/v1/bridges/[id]/route.js +3 -3
- package/.next/standalone/.next/server/app/api/v1/bridges/[id]/route.js.nft.json +1 -1
- package/.next/standalone/.next/server/app/api/v1/bridges/[id]/status/route.js +3 -3
- package/.next/standalone/.next/server/app/api/v1/bridges/[id]/status/route.js.nft.json +1 -1
- package/.next/standalone/.next/server/app/api/v1/builtin-tools/route.js +218 -7
- package/.next/standalone/.next/server/app/api/v1/builtin-tools/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/events/route.js +3 -3
- package/.next/standalone/.next/server/app/api/v1/events/route.js.nft.json +1 -1
- package/.next/standalone/.next/server/app/api/v1/extension/agents/route.js +8 -1
- package/.next/standalone/.next/server/app/api/v1/extension/agents/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/extension/fill/route.js +8 -1
- package/.next/standalone/.next/server/app/api/v1/extension/fill/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/extension/refine/route.js +8 -1
- package/.next/standalone/.next/server/app/api/v1/extension/refine/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/extension/turn/route.js +8 -1
- package/.next/standalone/.next/server/app/api/v1/extension/turn/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/extensions/route.js +2 -2
- package/.next/standalone/.next/server/app/api/v1/extensions/tools/[name]/secrets/route.js +2 -2
- package/.next/standalone/.next/server/app/api/v1/page-capture/route.js +37 -3
- package/.next/standalone/.next/server/app/api/v1/page-capture/route.js.map +1 -1
- package/.next/standalone/.next/server/app/api/v1/tools/route.js +2 -2
- package/.next/standalone/.next/server/app/page.js +10 -18
- package/.next/standalone/.next/server/app/page.js.map +1 -1
- package/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -1
- package/.next/standalone/.next/server/app/setup/page_client-reference-manifest.js +1 -1
- package/.next/standalone/.next/server/chunks/210.js +1 -1
- package/.next/standalone/.next/server/chunks/239.js +5335 -5230
- package/.next/standalone/.next/server/chunks/239.js.map +1 -1
- package/.next/standalone/.next/server/chunks/{1683.js → 241.js} +210 -36
- package/.next/standalone/.next/server/chunks/241.js.map +1 -0
- package/.next/standalone/.next/server/chunks/{8135.js → 2539.js} +218 -36
- package/.next/standalone/.next/server/chunks/2539.js.map +1 -0
- package/.next/standalone/.next/server/chunks/4631.js +218 -7
- package/.next/standalone/.next/server/chunks/4631.js.map +1 -1
- package/.next/standalone/.next/server/chunks/8866.js +13389 -13073
- package/.next/standalone/.next/server/chunks/8866.js.map +1 -1
- package/.next/standalone/.next/server/chunks/9032.js +1 -1
- package/.next/standalone/.next/server/chunks/9032.js.map +1 -1
- package/.next/standalone/.next/server/middleware-build-manifest.js +2 -2
- package/.next/standalone/.next/server/pages/404.html +1 -1
- package/.next/standalone/.next/server/pages/500.html +1 -1
- package/.next/standalone/.next/server/server-reference-manifest.json +1 -1
- package/.next/standalone/.next/static/chunks/app/{page-62e0d5f2404b403b.js → page-74846c864241b96d.js} +11 -19
- package/.next/standalone/.next/static/chunks/app/page-74846c864241b96d.js.map +1 -0
- package/.next/standalone/package.json +2 -1
- package/CHANGELOG.md +98 -0
- package/README.md +51 -26
- package/components/chat/InputBar.tsx +10 -1
- package/components/ui/BootScreen.tsx +0 -10
- package/lib/agents/agent-turn.ts +9 -0
- package/lib/agents/prepare/request.ts +9 -0
- package/lib/agents/run-thread.ts +9 -1
- package/lib/api/extension-turn.ts +7 -0
- package/lib/api/page-capture.test.ts +58 -0
- package/lib/api/page-capture.ts +31 -1
- package/lib/bridges/attachment-store.test.ts +440 -0
- package/lib/bridges/attachment-store.ts +184 -0
- package/lib/bridges/whatsapp.ts +50 -32
- package/lib/tools/async-results-tool.ts +114 -0
- package/lib/tools/async-results.test.ts +481 -0
- package/lib/tools/async-results.ts +165 -0
- package/lib/tools/builtins.ts +1 -0
- package/lib/tools/wallclock.ts +114 -8
- package/package.json +2 -1
- package/.next/standalone/.next/server/chunks/1683.js.map +0 -1
- package/.next/standalone/.next/server/chunks/8135.js.map +0 -1
- package/.next/standalone/.next/static/chunks/app/page-62e0d5f2404b403b.js.map +0 -1
- /package/.next/standalone/.next/static/{2xWP8843jbntFGKLnHK6R → AV5AO0yTRABo-NgwxhDe7}/_buildManifest.js +0 -0
- /package/.next/standalone/.next/static/{2xWP8843jbntFGKLnHK6R → AV5AO0yTRABo-NgwxhDe7}/_ssgManifest.js +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@circuitwall/jarela",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.4.0",
|
|
4
4
|
"description": "Jarela — local chat interface for LangGraph agents (multi-provider, single-process, SQLite-backed).",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"author": "Andrew Ge Wu",
|
|
@@ -100,6 +100,7 @@
|
|
|
100
100
|
"test:live:isolated:full": "node scripts/live-test-isolated.mjs --llm",
|
|
101
101
|
"test:e2e": "playwright test",
|
|
102
102
|
"test:e2e:ui": "playwright test --ui",
|
|
103
|
+
"promo:record": "node scripts/promo-record.mjs",
|
|
103
104
|
"release:docker": "node scripts/release-docker.mjs",
|
|
104
105
|
"release:docker:dry": "node scripts/release-docker.mjs --dry-run"
|
|
105
106
|
},
|
package/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,104 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [1.4.0] - 2026-06-08
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- **Browser-extension element screenshot.** The page-capture flow now
|
|
15
|
+
ships a cropped PNG of the picked element alongside the text. The
|
|
16
|
+
content script grabs the visible viewport via
|
|
17
|
+
`chrome.tabs.captureVisibleTab` (loopback only, via the service worker)
|
|
18
|
+
and crops it to the element's bounding rect through `OffscreenCanvas`
|
|
19
|
+
at `devicePixelRatio`. The server validates the base64 payload (≤ 4 MB
|
|
20
|
+
encoded), persists the user message as a multipart `ContentPart[]` of
|
|
21
|
+
`[text, image]` so the bubble renders the picture inline, and
|
|
22
|
+
forwards the image part to the silent observer turn so vision-capable
|
|
23
|
+
agents see it on the immediate follow-up run. Falls back cleanly to
|
|
24
|
+
text-only capture if the snapshot is denied. See
|
|
25
|
+
[`docs/api.md`](./docs/api.md#post-apiv1page-capture) for the updated
|
|
26
|
+
request schema.
|
|
27
|
+
- **Promo video recorder.** `npm run promo:record` (via
|
|
28
|
+
[`scripts/promo-record.mjs`](./scripts/promo-record.mjs)) drives your
|
|
29
|
+
real local install in a 9:16 vertical PWA viewport and records a
|
|
30
|
+
dark-theme `.webm` of the tap-to-unlock intro, agent picker, a
|
|
31
|
+
human-paced chat turn, and a tour of every side panel. First run
|
|
32
|
+
saves auth state to `promo/.storage.json` and reuses it thereafter.
|
|
33
|
+
|
|
34
|
+
## [1.3.0] - 2026-06-08
|
|
35
|
+
|
|
36
|
+
Two new agent capabilities and a hardening pass on tool wall-clocks.
|
|
37
|
+
Bridge adapters (WhatsApp today) now spill large remote attachments
|
|
38
|
+
to a local store instead of inlining them into the LLM context, and
|
|
39
|
+
the agent picks them up by path through ``file_read``. Long-running
|
|
40
|
+
tool calls can now be fired asynchronously: the LLM gets a tracking
|
|
41
|
+
key back immediately and pulls the result later via a new built-in.
|
|
42
|
+
|
|
43
|
+
### Added
|
|
44
|
+
|
|
45
|
+
- **Bridge attachment spill store**
|
|
46
|
+
([#215](https://github.com/CircuitWall/jarela/pull/215)). Inbound
|
|
47
|
+
bridge messages no longer base64-inline every document, voice note,
|
|
48
|
+
audio, or video into the next prompt. Buffers are persisted under
|
|
49
|
+
``<dataDir>/bridge-attachments/<bridge>/<YYYY-MM-DD>/<id>-<name>``
|
|
50
|
+
with sanitised paths, an SHA-256, and a future-facing
|
|
51
|
+
``pruneBridgeAttachments({ maxAgeMs })`` helper; the prompt body
|
|
52
|
+
carries a text pointer telling the agent to use ``file_read`` to
|
|
53
|
+
inspect the contents. Images and stickers ≤ 1 MB still inline so
|
|
54
|
+
vision works out of the box.
|
|
55
|
+
- **Async tool execution (``async_run`` wrapper + ``tool_result_get``)**
|
|
56
|
+
([#216](https://github.com/CircuitWall/jarela/pull/216)). Every
|
|
57
|
+
tool's schema now exposes an optional ``async_run: boolean``. When
|
|
58
|
+
set, the wrapper returns ``{ok, async, key, tool, started_at,
|
|
59
|
+
deadline_ms, hint}`` immediately and runs the work detached; the
|
|
60
|
+
LLM picks the result up via the new built-in
|
|
61
|
+
``tool_result_get(key, wait_ms?, consume?)``. ``tool_result_list``
|
|
62
|
+
returns summaries without dumping result bodies. In-process store
|
|
63
|
+
with a 10-minute TTL and a 256-entry cap (oldest finished evicted
|
|
64
|
+
first, then oldest pending with a warn).
|
|
65
|
+
|
|
66
|
+
### Changed
|
|
67
|
+
|
|
68
|
+
- **Hard ceiling on tool ``deadline_ms``**
|
|
69
|
+
([#216](https://github.com/CircuitWall/jarela/pull/216)). The
|
|
70
|
+
wall-clock budget the LLM can pick is now clamped to 30 minutes by
|
|
71
|
+
default. Values above the ceiling are clamped and a one-line
|
|
72
|
+
``console.warn`` is emitted naming the tool, the requested value,
|
|
73
|
+
and the ceiling. Operators can raise or lower the cap with the new
|
|
74
|
+
``JARELA_TOOL_MAX_DEADLINE_MS`` environment variable (integer
|
|
75
|
+
milliseconds). Applies to both sync and ``async_run`` paths.
|
|
76
|
+
|
|
77
|
+
### Fixed
|
|
78
|
+
|
|
79
|
+
- **E2E menu specs no longer race the boot agent picker**
|
|
80
|
+
([#217](https://github.com/CircuitWall/jarela/pull/217)). Three
|
|
81
|
+
Playwright specs (``layout``, ``credentials``, ``setup-reorg``)
|
|
82
|
+
were intermittently failing because the BootScreen overlay
|
|
83
|
+
intercepted clicks on the header menu button. A new
|
|
84
|
+
``waitForAppReady(page)`` helper picks the default agent tile and
|
|
85
|
+
waits for the overlay to detach before the test drives the UI.
|
|
86
|
+
|
|
87
|
+
### Configuration
|
|
88
|
+
|
|
89
|
+
- ``JARELA_TOOL_MAX_DEADLINE_MS`` — overrides the per-tool
|
|
90
|
+
wall-clock ceiling (default 1800000 ms / 30 min). Set to a smaller
|
|
91
|
+
value to tighten the cap, or larger if a regulated workload genuinely
|
|
92
|
+
needs long synchronous calls.
|
|
93
|
+
|
|
94
|
+
Two follow-up fixes on top of 1.2.0.
|
|
95
|
+
|
|
96
|
+
### Fixed
|
|
97
|
+
|
|
98
|
+
- **Boot agent picker always shows after login**
|
|
99
|
+
([#213](https://github.com/CircuitWall/jarela/pull/213)). The picker
|
|
100
|
+
was being skipped in some session states; it now reliably appears so
|
|
101
|
+
the user actively chooses an agent at boot instead of silently
|
|
102
|
+
inheriting one.
|
|
103
|
+
- **Extension UX polish on one-shot turns**
|
|
104
|
+
([#212](https://github.com/CircuitWall/jarela/pull/212)). Custom
|
|
105
|
+
intent collapses by default, Enter submits, writes are queued, and
|
|
106
|
+
one-shot turns drop the quality gates that didn't apply to them.
|
|
107
|
+
|
|
10
108
|
## [1.2.0] - 2026-06-08
|
|
11
109
|
|
|
12
110
|
Security, runtime resilience, and a broad UI consolidation pass.
|
package/README.md
CHANGED
|
@@ -1,29 +1,31 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
<p align="center">
|
|
2
|
+
<picture>
|
|
3
|
+
<source media="(prefers-color-scheme: dark)" srcset="./public/icon-512.png" />
|
|
4
|
+
<img src="./public/icon-512-light.png" alt="Jarela" width="140" />
|
|
5
|
+
</picture>
|
|
6
6
|
</p>
|
|
7
7
|
|
|
8
8
|
<h1 align="center">Jarela</h1>
|
|
9
9
|
|
|
10
10
|
<p align="center">
|
|
11
|
-
<b>A local-first, browser-based GUI for orchestrating multi-provider LLM agents.</b
|
|
12
|
-
|
|
11
|
+
<b>A local-first, browser-based GUI for orchestrating multi-provider LLM agents.</b>
|
|
12
|
+
</p>
|
|
13
|
+
<p align="center">
|
|
14
|
+
<sub>Next.js 16 · LangGraph · SQLite · PWA-installable · no cloud backend, no telemetry</sub>
|
|
13
15
|
</p>
|
|
14
16
|
|
|
15
17
|
<p align="center">
|
|
16
|
-
<a href="#quick-start">Quick start</a>
|
|
17
|
-
<a href="#configuration-guide-home--work">Config guide</a>
|
|
18
|
-
<a href="#supported-platforms">Platforms</a>
|
|
19
|
-
<a href="#features">Features</a>
|
|
20
|
-
<a href="#productivity-stacks-google--microsoft-at-parity">Google + Microsoft</a>
|
|
21
|
-
<a href="#built-in-toolbelt">Tools</a>
|
|
22
|
-
<a href="#providers">Providers</a>
|
|
23
|
-
<a href="#connections">Connections</a>
|
|
24
|
-
<a href="./docs/EXTENDING.md">Extending</a>
|
|
25
|
-
<a href="./docs/ARCHITECTURE.md">Architecture</a>
|
|
26
|
-
<a href="./CONTRIBUTING.md">Contributing</a>
|
|
18
|
+
<a href="#quick-start">Quick start</a> ·
|
|
19
|
+
<a href="#configuration-guide-home--work">Config guide</a> ·
|
|
20
|
+
<a href="#supported-platforms">Platforms</a> ·
|
|
21
|
+
<a href="#features">Features</a> ·
|
|
22
|
+
<a href="#productivity-stacks-google--microsoft-at-parity">Google + Microsoft</a> ·
|
|
23
|
+
<a href="#built-in-toolbelt">Tools</a> ·
|
|
24
|
+
<a href="#providers">Providers</a> ·
|
|
25
|
+
<a href="#connections">Connections</a> ·
|
|
26
|
+
<a href="./docs/EXTENDING.md">Extending</a> ·
|
|
27
|
+
<a href="./docs/ARCHITECTURE.md">Architecture</a> ·
|
|
28
|
+
<a href="./CONTRIBUTING.md">Contributing</a> ·
|
|
27
29
|
<a href="#documentation">Docs</a>
|
|
28
30
|
</p>
|
|
29
31
|
|
|
@@ -51,15 +53,16 @@
|
|
|
51
53
|
</a>
|
|
52
54
|
</p>
|
|
53
55
|
|
|
54
|
-
---
|
|
55
|
-
|
|
56
56
|
<p align="center">
|
|
57
|
-
<video src="
|
|
58
|
-
|
|
59
|
-
<a href="https://github.com/user-attachments/assets/0f33f8d3-07bb-4850-9fcc-cfc97036f180">Download the clip</a>.
|
|
57
|
+
<video src="./docs/assets/jarela-promo.webm" poster="./docs/assets/jarela-promo-poster.jpg" controls muted playsinline width="320">
|
|
58
|
+
<img src="./docs/assets/jarela-promo-poster.jpg" alt="Jarela promo — PIN unlock, agent picker, chat, panel tour" width="320" />
|
|
60
59
|
</video>
|
|
60
|
+
<br/>
|
|
61
|
+
<sub><i>Tap-to-unlock → agent picker → human-paced chat → full panel tour</i> · <a href="./docs/assets/jarela-promo.webm">Download .webm</a></sub>
|
|
61
62
|
</p>
|
|
62
63
|
|
|
64
|
+
---
|
|
65
|
+
|
|
63
66
|
## Quick start
|
|
64
67
|
|
|
65
68
|
Get to a working local agent in under 10 minutes:
|
|
@@ -239,9 +242,12 @@ create an Outlook Calendar invite in the same turn.
|
|
|
239
242
|
**Memory**, **Documents**, **Profile**, **Bridges**, **Scheduled tasks**,
|
|
240
243
|
and **Pending approvals**.
|
|
241
244
|
- **Browser extension** ([`browser-extension/`](./browser-extension)) —
|
|
242
|
-
Chrome MV3, click an element on any page and POST it
|
|
243
|
-
Jarela as a new user message
|
|
244
|
-
|
|
245
|
+
Chrome MV3, click an element on any page and POST it (with a cropped
|
|
246
|
+
PNG of the picked element) to your local Jarela as a new user message
|
|
247
|
+
(ADR-0018). The screenshot is rendered inline in the chat bubble and
|
|
248
|
+
forwarded to vision-capable agents on the silent observer turn that
|
|
249
|
+
fires immediately after the capture. Loopback only; toolbar icon greys
|
|
250
|
+
out when Jarela isn't running.
|
|
245
251
|
|
|
246
252
|
### Operational
|
|
247
253
|
|
|
@@ -914,6 +920,25 @@ on every push and PR: `lint + tsc --noEmit + next build`, then the same
|
|
|
914
920
|
live integration suite against the production server output. The build
|
|
915
921
|
badge at the top of this README links straight to the latest run.
|
|
916
922
|
|
|
923
|
+
## Recording a promo video
|
|
924
|
+
|
|
925
|
+
[scripts/promo-record.mjs](./scripts/promo-record.mjs) drives your real
|
|
926
|
+
local install (default `http://localhost:4312`) inside a 540×960
|
|
927
|
+
vertical (9:16) PWA viewport and records a `.webm` of a five-scene tour
|
|
928
|
+
in dark theme: a simulated PIN unlock, agent picker, a human-paced chat
|
|
929
|
+
turn, every side panel, and a closing pose.
|
|
930
|
+
|
|
931
|
+
```bash
|
|
932
|
+
npm run dev # in one terminal
|
|
933
|
+
npm run promo:record # in another — output lands in ./promo/
|
|
934
|
+
```
|
|
935
|
+
|
|
936
|
+
The first run opens a headed Chromium so you can manually unlock the
|
|
937
|
+
install if needed; the resulting auth state is saved to
|
|
938
|
+
`promo/.storage.json` and reused on every subsequent run. Override the
|
|
939
|
+
target with `JARELA_PROMO_URL`, the chat line with `JARELA_PROMO_MSG`,
|
|
940
|
+
or skip the actual send with `JARELA_PROMO_SKIP_CHAT=1`.
|
|
941
|
+
|
|
917
942
|
## Security
|
|
918
943
|
|
|
919
944
|
- **CSRF / origin guard** ([lib/auth/access.ts](./lib/auth/access.ts))
|
|
@@ -73,6 +73,12 @@ function fileToContentPart(file: File): Promise<ContentPart> {
|
|
|
73
73
|
});
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
+
function attachmentKey(a: ContentPart, i: number): string {
|
|
77
|
+
if (a.type === "text") return `text:${i}:${a.text.length}`;
|
|
78
|
+
const name = a.type === "file" ? a.name : "";
|
|
79
|
+
return `${a.type}:${a.media_type}:${name}:${a.data.length}:${a.data.slice(0, 16)}`;
|
|
80
|
+
}
|
|
81
|
+
|
|
76
82
|
export function InputBar({ attachments, onAttachmentsChange, onSubmit, onQueue, onStop, streaming, disabled, placeholder, voiceEnabled, agentId, onVoiceTranscript }: Props) {
|
|
77
83
|
// Text state is intentionally LOCAL. Lifting it to ChatView would re-render
|
|
78
84
|
// the entire message list (every MessageBubble + ReactMarkdown pass) on
|
|
@@ -268,7 +274,10 @@ export function InputBar({ attachments, onAttachmentsChange, onSubmit, onQueue,
|
|
|
268
274
|
{attachments.length > 0 && (
|
|
269
275
|
<div className="flex flex-wrap gap-2 mb-2">
|
|
270
276
|
{attachments.map((a, i) => (
|
|
271
|
-
|
|
277
|
+
// Content-derived key — using the index reused DOM nodes when
|
|
278
|
+
// earlier attachments were removed, flashing the wrong preview
|
|
279
|
+
// (and the wrong filename) into the slot of the survivor.
|
|
280
|
+
<div key={attachmentKey(a, i)} className="relative group shrink-0">
|
|
272
281
|
{a.type === "image" ? (
|
|
273
282
|
// eslint-disable-next-line @next/next/no-img-element
|
|
274
283
|
<img
|
|
@@ -150,16 +150,6 @@ export function BootScreen({ agents, agentsLoaded, activeAgentId, onPickAgent, s
|
|
|
150
150
|
};
|
|
151
151
|
}, [activeAgentId, pickedId, agentsLoaded, markStep]);
|
|
152
152
|
|
|
153
|
-
// Returning users with a saved default skip the manual tile-click.
|
|
154
|
-
useEffect(() => {
|
|
155
|
-
if (suppressed) return;
|
|
156
|
-
if (!agentsLoaded) return;
|
|
157
|
-
if (activeAgentId || pickedId) return;
|
|
158
|
-
if (!defaultAgent) return;
|
|
159
|
-
setPickedId(defaultAgent.id);
|
|
160
|
-
onPickAgent(defaultAgent.id);
|
|
161
|
-
}, [suppressed, agentsLoaded, activeAgentId, pickedId, defaultAgent, onPickAgent]);
|
|
162
|
-
|
|
163
153
|
if (done) return null;
|
|
164
154
|
if (suppressed) return null;
|
|
165
155
|
|
package/lib/agents/agent-turn.ts
CHANGED
|
@@ -41,6 +41,14 @@ export interface RunAgentTurnRequest {
|
|
|
41
41
|
* the category default.
|
|
42
42
|
*/
|
|
43
43
|
context_profile_override?: Partial<TurnContextProfile> | null;
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Skip the stall-retry + strict-citation audit wrapper. One-shot
|
|
47
|
+
* callers (browser-extension fill / rewrite) want the raw assistant
|
|
48
|
+
* text without the `↻` separator or pre-retry stall prose that the
|
|
49
|
+
* wrapper would otherwise inject into the streamed content.
|
|
50
|
+
*/
|
|
51
|
+
disable_quality_gates?: boolean;
|
|
44
52
|
}
|
|
45
53
|
|
|
46
54
|
export interface RunAgentTurnResult {
|
|
@@ -76,6 +84,7 @@ export async function runAgentTurn(req: RunAgentTurnRequest): Promise<RunAgentTu
|
|
|
76
84
|
attachments: req.attachments,
|
|
77
85
|
user_category: req.user_category ?? null,
|
|
78
86
|
context_profile: contextProfile,
|
|
87
|
+
disable_quality_gates: req.disable_quality_gates,
|
|
79
88
|
signal: active.abort.signal,
|
|
80
89
|
_pinned_model_config_name: pinnedModelConfigName,
|
|
81
90
|
_skip_persist_message: req.skip_persist_user_message,
|
|
@@ -41,6 +41,15 @@ export interface ThreadRunRequest {
|
|
|
41
41
|
*/
|
|
42
42
|
context_profile?: TurnContextProfile;
|
|
43
43
|
|
|
44
|
+
/**
|
|
45
|
+
* Skip the post-stream stall-retry + strict-citation audit wrapper for
|
|
46
|
+
* this turn. Use for one-shot callers (browser-extension fill / rewrite)
|
|
47
|
+
* that consume `assistantContent` as raw text and would otherwise type
|
|
48
|
+
* the visible `↻` separator and the original stalled prose into the
|
|
49
|
+
* user's input field. Chat callers leave undefined.
|
|
50
|
+
*/
|
|
51
|
+
disable_quality_gates?: boolean;
|
|
52
|
+
|
|
44
53
|
/**
|
|
45
54
|
* Internal - public callers leave undefined. When set by the submission
|
|
46
55
|
* path, this freezes the effective model config for the turn so queued
|
package/lib/agents/run-thread.ts
CHANGED
|
@@ -292,8 +292,16 @@ export async function prepareThreadRun(req: ThreadRunRequest): Promise<PreparedT
|
|
|
292
292
|
// Overhead = the assembled system prompt + per-message scaffolding, which
|
|
293
293
|
// is more accurate than the budget's static overhead allowance.
|
|
294
294
|
const overheadTokens = estimateTokens(systemPrompt);
|
|
295
|
+
// One-shot callers (extension fill/rewrite) consume `assistantContent` as
|
|
296
|
+
// raw text. The stall-retry wrapper would otherwise leak the `↻` separator
|
|
297
|
+
// and the pre-retry stalled prose into the user's input field, and the
|
|
298
|
+
// strict-citation audit (which lives inside the same wrapper) would do
|
|
299
|
+
// the same with retry continuations. Bypass it entirely for those callers.
|
|
300
|
+
const stream = req.disable_quality_gates
|
|
301
|
+
? rawStream
|
|
302
|
+
: stallRetryStream(rawStream, req, allowedTools, retriesLeft);
|
|
295
303
|
return {
|
|
296
|
-
stream
|
|
304
|
+
stream,
|
|
297
305
|
thread_id: req.thread_id,
|
|
298
306
|
context_snapshot: {
|
|
299
307
|
context_window_tokens: historyWindow.budget.contextWindowTokens,
|
|
@@ -91,6 +91,13 @@ async function runExtensionAction(action: z.infer<typeof ExtensionAction>, input
|
|
|
91
91
|
message: prompt,
|
|
92
92
|
user_category: "extension",
|
|
93
93
|
assistant_category: "extension",
|
|
94
|
+
// The extension types `assistantContent` directly into the user's
|
|
95
|
+
// input field. The stall-retry wrapper and the strict-citation audit
|
|
96
|
+
// would otherwise inject the `↻` separator, the original stalled
|
|
97
|
+
// prose, and audit-retry continuations into that text — pollution
|
|
98
|
+
// the user then has to manually scrub. Both gates are chat
|
|
99
|
+
// affordances; skip them for one-shot writes.
|
|
100
|
+
disable_quality_gates: true,
|
|
94
101
|
});
|
|
95
102
|
|
|
96
103
|
// Ping the events bus so any open chat view on this thread re-fetches.
|
|
@@ -307,3 +307,61 @@ describe("handlePageCapture — response shape", () => {
|
|
|
307
307
|
});
|
|
308
308
|
});
|
|
309
309
|
});
|
|
310
|
+
|
|
311
|
+
describe("handlePageCapture — screenshot attachment", () => {
|
|
312
|
+
// 1x1 transparent PNG, base64-encoded (no data: prefix).
|
|
313
|
+
const tinyPng =
|
|
314
|
+
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=";
|
|
315
|
+
|
|
316
|
+
it("rejects screenshot with invalid base64", async () => {
|
|
317
|
+
const res = await handlePageCapture(makeReq({ ...validBody, screenshot: "not base64!!" }));
|
|
318
|
+
expect(res.status).toBe(400);
|
|
319
|
+
expect(addMessageMock).not.toHaveBeenCalled();
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
it("rejects screenshot exceeding the size cap", async () => {
|
|
323
|
+
const huge = "A".repeat(4_000_001);
|
|
324
|
+
const res = await handlePageCapture(makeReq({ ...validBody, screenshot: huge }));
|
|
325
|
+
expect(res.status).toBe(400);
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
it("persists user message as a JSON ContentPart[] with text + image when screenshot is present", async () => {
|
|
329
|
+
const res = await handlePageCapture(makeReq({ ...validBody, screenshot: tinyPng }));
|
|
330
|
+
expect(res.status).toBe(200);
|
|
331
|
+
const stored = addMessageMock.mock.calls[0][2] as string;
|
|
332
|
+
const parsed = JSON.parse(stored) as Array<{ type: string; text?: string; media_type?: string; data?: string }>;
|
|
333
|
+
expect(Array.isArray(parsed)).toBe(true);
|
|
334
|
+
expect(parsed).toHaveLength(2);
|
|
335
|
+
expect(parsed[0]).toMatchObject({ type: "text" });
|
|
336
|
+
expect(parsed[0].text).toContain("Captured from");
|
|
337
|
+
expect(parsed[0].text).toContain("Screenshot attached.");
|
|
338
|
+
expect(parsed[1]).toEqual({ type: "image", media_type: "image/png", data: tinyPng });
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
it("forwards the screenshot as a vision attachment to the silent observer run", async () => {
|
|
342
|
+
await handlePageCapture(makeReq({ ...validBody, screenshot: tinyPng }));
|
|
343
|
+
expect(runAgentTurnMock).toHaveBeenCalledWith(expect.objectContaining({
|
|
344
|
+
attachments: [{ type: "image", media_type: "image/png", data: tinyPng }],
|
|
345
|
+
}));
|
|
346
|
+
});
|
|
347
|
+
|
|
348
|
+
it("honors a custom screenshotMediaType", async () => {
|
|
349
|
+
await handlePageCapture(makeReq({ ...validBody, screenshot: tinyPng, screenshotMediaType: "image/jpeg" }));
|
|
350
|
+
const stored = addMessageMock.mock.calls[0][2] as string;
|
|
351
|
+
const parsed = JSON.parse(stored) as Array<{ type: string; media_type?: string }>;
|
|
352
|
+
expect(parsed[1].media_type).toBe("image/jpeg");
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
it("keeps the legacy string-content path when no screenshot is sent", async () => {
|
|
356
|
+
await handlePageCapture(makeReq(validBody));
|
|
357
|
+
const stored = addMessageMock.mock.calls[0][2] as string;
|
|
358
|
+
// Not JSON-parseable as an array — it's the legacy plaintext body.
|
|
359
|
+
expect(() => JSON.parse(stored)).toThrow();
|
|
360
|
+
expect(stored).toContain("Captured from");
|
|
361
|
+
expect(stored).not.toContain("Screenshot attached.");
|
|
362
|
+
expect(runAgentTurnMock).toHaveBeenCalledWith(expect.objectContaining({
|
|
363
|
+
attachments: undefined,
|
|
364
|
+
}));
|
|
365
|
+
});
|
|
366
|
+
});
|
|
367
|
+
|
package/lib/api/page-capture.ts
CHANGED
|
@@ -13,12 +13,18 @@ import {
|
|
|
13
13
|
} from "@/lib/stores/agent-configs";
|
|
14
14
|
import { publish } from "@/lib/notifications/bus";
|
|
15
15
|
import { runAgentTurn } from "@/lib/agents/agent-turn";
|
|
16
|
+
import type { ContentPart } from "@/lib/tools/types";
|
|
16
17
|
|
|
17
18
|
// 100KB UTF-8 cap on captured text. The LLM context window is the real
|
|
18
19
|
// constraint; this cap exists to keep a runaway "<body>" pick from
|
|
19
20
|
// trashing the conversation. See ADR-0018.
|
|
20
21
|
export const MAX_TEXT_BYTES = 100_000;
|
|
21
22
|
|
|
23
|
+
// Hard cap on the inline element screenshot (base64 chars). 4 MB of
|
|
24
|
+
// base64 ≈ 3 MB decoded — generous for a single cropped element while
|
|
25
|
+
// still bounding the SQLite row and the LLM vision payload.
|
|
26
|
+
export const MAX_SCREENSHOT_B64 = 4_000_000;
|
|
27
|
+
|
|
22
28
|
// Preamble prepended to the LLM call for the silent observer run.
|
|
23
29
|
// The captured content is already persisted in the DB — this wrapper
|
|
24
30
|
// instructs the agent to observe without replying, matching bridge
|
|
@@ -37,6 +43,13 @@ const Body = z.object({
|
|
|
37
43
|
tagName: z.string().max(64).optional(),
|
|
38
44
|
text: z.string(),
|
|
39
45
|
capturedAt: z.string().datetime(),
|
|
46
|
+
// Optional base64-encoded PNG of just the picked element (no data: URL
|
|
47
|
+
// prefix). The content script crops `chrome.tabs.captureVisibleTab`
|
|
48
|
+
// to the element bounding box before sending. When present, it is
|
|
49
|
+
// attached to the persisted user message as an image ContentPart so
|
|
50
|
+
// the chat UI renders it inline and vision-capable agents can see it.
|
|
51
|
+
screenshot: z.string().regex(/^[A-Za-z0-9+/=]+$/).max(MAX_SCREENSHOT_B64).optional(),
|
|
52
|
+
screenshotMediaType: z.string().regex(/^image\/[a-z0-9.+-]+$/).max(64).optional(),
|
|
40
53
|
});
|
|
41
54
|
|
|
42
55
|
function truncateUtf8(s: string, maxBytes: number): { text: string; truncated: boolean; originalBytes: number } {
|
|
@@ -102,12 +115,14 @@ function composeBody(args: {
|
|
|
102
115
|
text: string;
|
|
103
116
|
truncated: boolean;
|
|
104
117
|
originalBytes: number;
|
|
118
|
+
hasScreenshot?: boolean;
|
|
105
119
|
}): string {
|
|
106
120
|
const heading = args.title
|
|
107
121
|
? `📎 Captured from [${args.title}](${args.url})`
|
|
108
122
|
: `📎 Captured from <${args.url}>`;
|
|
109
123
|
const lines = [heading];
|
|
110
124
|
if (args.selector) lines.push(`Element: \`${args.selector}\``);
|
|
125
|
+
if (args.hasScreenshot) lines.push("Screenshot attached.");
|
|
111
126
|
if (args.truncated) {
|
|
112
127
|
lines.push(`> ⚠ Truncated to ${MAX_TEXT_BYTES.toLocaleString()} bytes (original was ${args.originalBytes.toLocaleString()} bytes)`);
|
|
113
128
|
}
|
|
@@ -158,9 +173,23 @@ export async function handlePageCapture(req: Request): Promise<Response> {
|
|
|
158
173
|
text,
|
|
159
174
|
truncated,
|
|
160
175
|
originalBytes,
|
|
176
|
+
hasScreenshot: Boolean(input.screenshot),
|
|
161
177
|
});
|
|
162
178
|
|
|
163
|
-
|
|
179
|
+
// When a screenshot is included, persist the user turn as a multipart
|
|
180
|
+
// ContentPart[] (text + image) — that's the same shape the chat UI and
|
|
181
|
+
// agent runner expect for inline images, so the picture renders in the
|
|
182
|
+
// bubble on reload and vision-capable models can see it on the silent
|
|
183
|
+
// observer turn. Without a screenshot we keep the legacy string body
|
|
184
|
+
// to avoid touching messages that never had an image.
|
|
185
|
+
const screenshotPart: ContentPart | null = input.screenshot
|
|
186
|
+
? { type: "image", media_type: input.screenshotMediaType ?? "image/png", data: input.screenshot }
|
|
187
|
+
: null;
|
|
188
|
+
const storedContent: string = screenshotPart
|
|
189
|
+
? JSON.stringify([{ type: "text", text: messageBody }, screenshotPart] satisfies ContentPart[])
|
|
190
|
+
: messageBody;
|
|
191
|
+
|
|
192
|
+
const msg = addMessage(thread_id, "user", storedContent, undefined, "page_capture");
|
|
164
193
|
|
|
165
194
|
// Fire a silent observer run so the agent ingests the captured context
|
|
166
195
|
// without being forced to reply — matching bridge silent/observer mode.
|
|
@@ -170,6 +199,7 @@ export async function handlePageCapture(req: Request): Promise<Response> {
|
|
|
170
199
|
thread_id,
|
|
171
200
|
queue_source: "extension",
|
|
172
201
|
message: `${SILENT_CAPTURE_PREAMBLE}\n\n${messageBody}`,
|
|
202
|
+
attachments: screenshotPart ? [screenshotPart] : undefined,
|
|
173
203
|
user_category: "page_capture",
|
|
174
204
|
assistant_category: "page_capture",
|
|
175
205
|
silent: true,
|