@circuitwall/jarela 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/.next/standalone/.next/BUILD_ID +1 -1
  2. package/.next/standalone/.next/build-manifest.json +2 -2
  3. package/.next/standalone/.next/prerender-manifest.json +3 -3
  4. package/.next/standalone/.next/server/app/_global-error/page_client-reference-manifest.js +1 -1
  5. package/.next/standalone/.next/server/app/_global-error.html +1 -1
  6. package/.next/standalone/.next/server/app/_global-error.rsc +1 -1
  7. package/.next/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  8. package/.next/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  9. package/.next/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  10. package/.next/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  11. package/.next/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  12. package/.next/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  13. package/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  14. package/.next/standalone/.next/server/app/_not-found.html +1 -1
  15. package/.next/standalone/.next/server/app/_not-found.rsc +1 -1
  16. package/.next/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  17. package/.next/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  18. package/.next/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  19. package/.next/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  20. package/.next/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  21. package/.next/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  22. package/.next/standalone/.next/server/app/api/v1/page-capture/route.js +37 -3
  23. package/.next/standalone/.next/server/app/api/v1/page-capture/route.js.map +1 -1
  24. package/.next/standalone/.next/server/app/page.js +10 -2
  25. package/.next/standalone/.next/server/app/page.js.map +1 -1
  26. package/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -1
  27. package/.next/standalone/.next/server/app/setup/page_client-reference-manifest.js +1 -1
  28. package/.next/standalone/.next/server/middleware-build-manifest.js +2 -2
  29. package/.next/standalone/.next/server/pages/404.html +1 -1
  30. package/.next/standalone/.next/server/pages/500.html +1 -1
  31. package/.next/standalone/.next/server/server-reference-manifest.json +1 -1
  32. package/.next/standalone/.next/static/chunks/app/{page-2ab710949b62a638.js → page-74846c864241b96d.js} +11 -3
  33. package/.next/standalone/.next/static/chunks/app/page-74846c864241b96d.js.map +1 -0
  34. package/.next/standalone/package.json +2 -1
  35. package/CHANGELOG.md +24 -0
  36. package/README.md +51 -26
  37. package/components/chat/InputBar.tsx +10 -1
  38. package/lib/api/page-capture.test.ts +58 -0
  39. package/lib/api/page-capture.ts +31 -1
  40. package/package.json +2 -1
  41. package/.next/standalone/.next/static/chunks/app/page-2ab710949b62a638.js.map +0 -1
  42. /package/.next/standalone/.next/static/{ZKy7LJ3KXj2TIyKOg_fBH → AV5AO0yTRABo-NgwxhDe7}/_buildManifest.js +0 -0
  43. /package/.next/standalone/.next/static/{ZKy7LJ3KXj2TIyKOg_fBH → AV5AO0yTRABo-NgwxhDe7}/_ssgManifest.js +0 -0
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@circuitwall/jarela",
3
- "version": "1.3.0",
3
+ "version": "1.4.0",
4
4
  "description": "Jarela — local chat interface for LangGraph agents (multi-provider, single-process, SQLite-backed).",
5
5
  "license": "Apache-2.0",
6
6
  "author": "Andrew Ge Wu",
@@ -100,6 +100,7 @@
100
100
  "test:live:isolated:full": "node scripts/live-test-isolated.mjs --llm",
101
101
  "test:e2e": "playwright test",
102
102
  "test:e2e:ui": "playwright test --ui",
103
+ "promo:record": "node scripts/promo-record.mjs",
103
104
  "release:docker": "node scripts/release-docker.mjs",
104
105
  "release:docker:dry": "node scripts/release-docker.mjs --dry-run"
105
106
  },
package/CHANGELOG.md CHANGED
@@ -7,6 +7,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [1.4.0] - 2026-06-08
11
+
12
+ ### Added
13
+
14
+ - **Browser-extension element screenshot.** The page-capture flow now
15
+ ships a cropped PNG of the picked element alongside the text. The
16
+ content script grabs the visible viewport via
17
+ `chrome.tabs.captureVisibleTab` (loopback only, via the service worker)
18
+ and crops it to the element's bounding rect through `OffscreenCanvas`
19
+ at `devicePixelRatio`. The server validates the base64 payload (≤ 4 MB
20
+ encoded), persists the user message as a multipart `ContentPart[]` of
21
+ `[text, image]` so the bubble renders the picture inline, and
22
+ forwards the image part to the silent observer turn so vision-capable
23
+ agents see it on the immediate follow-up run. Falls back cleanly to
24
+ text-only capture if the snapshot is denied. See
25
+ [`docs/api.md`](./docs/api.md#post-apiv1page-capture) for the updated
26
+ request schema.
27
+ - **Promo video recorder.** `npm run promo:record` (via
28
+ [`scripts/promo-record.mjs`](./scripts/promo-record.mjs)) drives your
29
+ real local install in a 9:16 vertical PWA viewport and records a
30
+ dark-theme `.webm` of the tap-to-unlock intro, agent picker, a
31
+ human-paced chat turn, and a tour of every side panel. First run
32
+ saves auth state to `promo/.storage.json` and reuses it thereafter.
33
+
10
34
  ## [1.3.0] - 2026-06-08
11
35
 
12
36
  Two new agent capabilities and a hardening pass on tool wall-clocks.
package/README.md CHANGED
@@ -1,29 +1,31 @@
1
-
2
-
3
-
4
- <p align="center">
5
- <img src="./public/logo-mark-transparent.png" alt="Jarela" width="160" />
1
+ <p align="center">
2
+ <picture>
3
+ <source media="(prefers-color-scheme: dark)" srcset="./public/icon-512.png" />
4
+ <img src="./public/icon-512-light.png" alt="Jarela" width="140" />
5
+ </picture>
6
6
  </p>
7
7
 
8
8
  <h1 align="center">Jarela</h1>
9
9
 
10
10
  <p align="center">
11
- <b>A local-first, browser-based GUI for orchestrating multi-provider LLM agents.</b><br/>
12
- <sub>Next.js 16 + LangGraph + SQLite. PWA-installable. No cloud backend, no telemetry.</sub>
11
+ <b>A local-first, browser-based GUI for orchestrating multi-provider LLM agents.</b>
12
+ </p>
13
+ <p align="center">
14
+ <sub>Next.js 16 &middot; LangGraph &middot; SQLite &middot; PWA-installable &middot; no cloud backend, no telemetry</sub>
13
15
  </p>
14
16
 
15
17
  <p align="center">
16
- <a href="#quick-start">Quick start</a> ·
17
- <a href="#configuration-guide-home--work">Config guide</a> ·
18
- <a href="#supported-platforms">Platforms</a> ·
19
- <a href="#features">Features</a> ·
20
- <a href="#productivity-stacks-google--microsoft-at-parity">Google + Microsoft</a> ·
21
- <a href="#built-in-toolbelt">Tools</a> ·
22
- <a href="#providers">Providers</a> ·
23
- <a href="#connections">Connections</a> ·
24
- <a href="./docs/EXTENDING.md">Extending</a> ·
25
- <a href="./docs/ARCHITECTURE.md">Architecture</a> ·
26
- <a href="./CONTRIBUTING.md">Contributing</a> ·
18
+ <a href="#quick-start">Quick start</a> &middot;
19
+ <a href="#configuration-guide-home--work">Config guide</a> &middot;
20
+ <a href="#supported-platforms">Platforms</a> &middot;
21
+ <a href="#features">Features</a> &middot;
22
+ <a href="#productivity-stacks-google--microsoft-at-parity">Google + Microsoft</a> &middot;
23
+ <a href="#built-in-toolbelt">Tools</a> &middot;
24
+ <a href="#providers">Providers</a> &middot;
25
+ <a href="#connections">Connections</a> &middot;
26
+ <a href="./docs/EXTENDING.md">Extending</a> &middot;
27
+ <a href="./docs/ARCHITECTURE.md">Architecture</a> &middot;
28
+ <a href="./CONTRIBUTING.md">Contributing</a> &middot;
27
29
  <a href="#documentation">Docs</a>
28
30
  </p>
29
31
 
@@ -51,15 +53,16 @@
51
53
  </a>
52
54
  </p>
53
55
 
54
- ---
55
-
56
56
  <p align="center">
57
- <video src="https://github.com/user-attachments/assets/0f33f8d3-07bb-4850-9fcc-cfc97036f180" controls width="640" muted>
58
- Your browser doesn't support embedded video.
59
- <a href="https://github.com/user-attachments/assets/0f33f8d3-07bb-4850-9fcc-cfc97036f180">Download the clip</a>.
57
+ <video src="./docs/assets/jarela-promo.webm" poster="./docs/assets/jarela-promo-poster.jpg" controls muted playsinline width="320">
58
+ <img src="./docs/assets/jarela-promo-poster.jpg" alt="Jarela promo &mdash; PIN unlock, agent picker, chat, panel tour" width="320" />
60
59
  </video>
60
+ <br/>
61
+ <sub><i>Tap-to-unlock &rarr; agent picker &rarr; human-paced chat &rarr; full panel tour</i> &middot; <a href="./docs/assets/jarela-promo.webm">Download .webm</a></sub>
61
62
  </p>
62
63
 
64
+ ---
65
+
63
66
  ## Quick start
64
67
 
65
68
  Get to a working local agent in under 10 minutes:
@@ -239,9 +242,12 @@ create an Outlook Calendar invite in the same turn.
239
242
  **Memory**, **Documents**, **Profile**, **Bridges**, **Scheduled tasks**,
240
243
  and **Pending approvals**.
241
244
  - **Browser extension** ([`browser-extension/`](./browser-extension)) —
242
- Chrome MV3, click an element on any page and POST it to your local
243
- Jarela as a new user message (ADR-0018). Loopback only; toolbar icon
244
- greys out when Jarela isn't running.
245
+ Chrome MV3, click an element on any page and POST it (with a cropped
246
+ PNG of the picked element) to your local Jarela as a new user message
247
+ (ADR-0018). The screenshot is rendered inline in the chat bubble and
248
+ forwarded to vision-capable agents on the silent observer turn that
249
+ fires immediately after the capture. Loopback only; toolbar icon greys
250
+ out when Jarela isn't running.
245
251
 
246
252
  ### Operational
247
253
 
@@ -914,6 +920,25 @@ on every push and PR: `lint + tsc --noEmit + next build`, then the same
914
920
  live integration suite against the production server output. The build
915
921
  badge at the top of this README links straight to the latest run.
916
922
 
923
+ ## Recording a promo video
924
+
925
+ [scripts/promo-record.mjs](./scripts/promo-record.mjs) drives your real
926
+ local install (default `http://localhost:4312`) inside a 540&times;960
927
+ vertical (9:16) PWA viewport and records a `.webm` of a five-scene tour
928
+ in dark theme: a simulated PIN unlock, agent picker, a human-paced chat
929
+ turn, every side panel, and a closing pose.
930
+
931
+ ```bash
932
+ npm run dev # in one terminal
933
+ npm run promo:record # in another — output lands in ./promo/
934
+ ```
935
+
936
+ The first run opens a headed Chromium so you can manually unlock the
937
+ install if needed; the resulting auth state is saved to
938
+ `promo/.storage.json` and reused on every subsequent run. Override the
939
+ target with `JARELA_PROMO_URL`, the chat line with `JARELA_PROMO_MSG`,
940
+ or skip the actual send with `JARELA_PROMO_SKIP_CHAT=1`.
941
+
917
942
  ## Security
918
943
 
919
944
  - **CSRF / origin guard** ([lib/auth/access.ts](./lib/auth/access.ts))
@@ -73,6 +73,12 @@ function fileToContentPart(file: File): Promise<ContentPart> {
73
73
  });
74
74
  }
75
75
 
76
+ function attachmentKey(a: ContentPart, i: number): string {
77
+ if (a.type === "text") return `text:${i}:${a.text.length}`;
78
+ const name = a.type === "file" ? a.name : "";
79
+ return `${a.type}:${a.media_type}:${name}:${a.data.length}:${a.data.slice(0, 16)}`;
80
+ }
81
+
76
82
  export function InputBar({ attachments, onAttachmentsChange, onSubmit, onQueue, onStop, streaming, disabled, placeholder, voiceEnabled, agentId, onVoiceTranscript }: Props) {
77
83
  // Text state is intentionally LOCAL. Lifting it to ChatView would re-render
78
84
  // the entire message list (every MessageBubble + ReactMarkdown pass) on
@@ -268,7 +274,10 @@ export function InputBar({ attachments, onAttachmentsChange, onSubmit, onQueue,
268
274
  {attachments.length > 0 && (
269
275
  <div className="flex flex-wrap gap-2 mb-2">
270
276
  {attachments.map((a, i) => (
271
- <div key={i} className="relative group shrink-0">
277
+ // Content-derived key using the index reused DOM nodes when
278
+ // earlier attachments were removed, flashing the wrong preview
279
+ // (and the wrong filename) into the slot of the survivor.
280
+ <div key={attachmentKey(a, i)} className="relative group shrink-0">
272
281
  {a.type === "image" ? (
273
282
  // eslint-disable-next-line @next/next/no-img-element
274
283
  <img
@@ -307,3 +307,61 @@ describe("handlePageCapture — response shape", () => {
307
307
  });
308
308
  });
309
309
  });
310
+
311
+ describe("handlePageCapture — screenshot attachment", () => {
312
+ // 1x1 transparent PNG, base64-encoded (no data: prefix).
313
+ const tinyPng =
314
+ "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=";
315
+
316
+ it("rejects screenshot with invalid base64", async () => {
317
+ const res = await handlePageCapture(makeReq({ ...validBody, screenshot: "not base64!!" }));
318
+ expect(res.status).toBe(400);
319
+ expect(addMessageMock).not.toHaveBeenCalled();
320
+ });
321
+
322
+ it("rejects screenshot exceeding the size cap", async () => {
323
+ const huge = "A".repeat(4_000_001);
324
+ const res = await handlePageCapture(makeReq({ ...validBody, screenshot: huge }));
325
+ expect(res.status).toBe(400);
326
+ });
327
+
328
+ it("persists user message as a JSON ContentPart[] with text + image when screenshot is present", async () => {
329
+ const res = await handlePageCapture(makeReq({ ...validBody, screenshot: tinyPng }));
330
+ expect(res.status).toBe(200);
331
+ const stored = addMessageMock.mock.calls[0][2] as string;
332
+ const parsed = JSON.parse(stored) as Array<{ type: string; text?: string; media_type?: string; data?: string }>;
333
+ expect(Array.isArray(parsed)).toBe(true);
334
+ expect(parsed).toHaveLength(2);
335
+ expect(parsed[0]).toMatchObject({ type: "text" });
336
+ expect(parsed[0].text).toContain("Captured from");
337
+ expect(parsed[0].text).toContain("Screenshot attached.");
338
+ expect(parsed[1]).toEqual({ type: "image", media_type: "image/png", data: tinyPng });
339
+ });
340
+
341
+ it("forwards the screenshot as a vision attachment to the silent observer run", async () => {
342
+ await handlePageCapture(makeReq({ ...validBody, screenshot: tinyPng }));
343
+ expect(runAgentTurnMock).toHaveBeenCalledWith(expect.objectContaining({
344
+ attachments: [{ type: "image", media_type: "image/png", data: tinyPng }],
345
+ }));
346
+ });
347
+
348
+ it("honors a custom screenshotMediaType", async () => {
349
+ await handlePageCapture(makeReq({ ...validBody, screenshot: tinyPng, screenshotMediaType: "image/jpeg" }));
350
+ const stored = addMessageMock.mock.calls[0][2] as string;
351
+ const parsed = JSON.parse(stored) as Array<{ type: string; media_type?: string }>;
352
+ expect(parsed[1].media_type).toBe("image/jpeg");
353
+ });
354
+
355
+ it("keeps the legacy string-content path when no screenshot is sent", async () => {
356
+ await handlePageCapture(makeReq(validBody));
357
+ const stored = addMessageMock.mock.calls[0][2] as string;
358
+ // Not JSON-parseable as an array — it's the legacy plaintext body.
359
+ expect(() => JSON.parse(stored)).toThrow();
360
+ expect(stored).toContain("Captured from");
361
+ expect(stored).not.toContain("Screenshot attached.");
362
+ expect(runAgentTurnMock).toHaveBeenCalledWith(expect.objectContaining({
363
+ attachments: undefined,
364
+ }));
365
+ });
366
+ });
367
+
@@ -13,12 +13,18 @@ import {
13
13
  } from "@/lib/stores/agent-configs";
14
14
  import { publish } from "@/lib/notifications/bus";
15
15
  import { runAgentTurn } from "@/lib/agents/agent-turn";
16
+ import type { ContentPart } from "@/lib/tools/types";
16
17
 
17
18
  // 100KB UTF-8 cap on captured text. The LLM context window is the real
18
19
  // constraint; this cap exists to keep a runaway "<body>" pick from
19
20
  // trashing the conversation. See ADR-0018.
20
21
  export const MAX_TEXT_BYTES = 100_000;
21
22
 
23
+ // Hard cap on the inline element screenshot (base64 chars). 4 MB of
24
+ // base64 ≈ 3 MB decoded — generous for a single cropped element while
25
+ // still bounding the SQLite row and the LLM vision payload.
26
+ export const MAX_SCREENSHOT_B64 = 4_000_000;
27
+
22
28
  // Preamble prepended to the LLM call for the silent observer run.
23
29
  // The captured content is already persisted in the DB — this wrapper
24
30
  // instructs the agent to observe without replying, matching bridge
@@ -37,6 +43,13 @@ const Body = z.object({
37
43
  tagName: z.string().max(64).optional(),
38
44
  text: z.string(),
39
45
  capturedAt: z.string().datetime(),
46
+ // Optional base64-encoded PNG of just the picked element (no data: URL
47
+ // prefix). The content script crops `chrome.tabs.captureVisibleTab`
48
+ // to the element bounding box before sending. When present, it is
49
+ // attached to the persisted user message as an image ContentPart so
50
+ // the chat UI renders it inline and vision-capable agents can see it.
51
+ screenshot: z.string().regex(/^[A-Za-z0-9+/=]+$/).max(MAX_SCREENSHOT_B64).optional(),
52
+ screenshotMediaType: z.string().regex(/^image\/[a-z0-9.+-]+$/).max(64).optional(),
40
53
  });
41
54
 
42
55
  function truncateUtf8(s: string, maxBytes: number): { text: string; truncated: boolean; originalBytes: number } {
@@ -102,12 +115,14 @@ function composeBody(args: {
102
115
  text: string;
103
116
  truncated: boolean;
104
117
  originalBytes: number;
118
+ hasScreenshot?: boolean;
105
119
  }): string {
106
120
  const heading = args.title
107
121
  ? `📎 Captured from [${args.title}](${args.url})`
108
122
  : `📎 Captured from <${args.url}>`;
109
123
  const lines = [heading];
110
124
  if (args.selector) lines.push(`Element: \`${args.selector}\``);
125
+ if (args.hasScreenshot) lines.push("Screenshot attached.");
111
126
  if (args.truncated) {
112
127
  lines.push(`> ⚠ Truncated to ${MAX_TEXT_BYTES.toLocaleString()} bytes (original was ${args.originalBytes.toLocaleString()} bytes)`);
113
128
  }
@@ -158,9 +173,23 @@ export async function handlePageCapture(req: Request): Promise<Response> {
158
173
  text,
159
174
  truncated,
160
175
  originalBytes,
176
+ hasScreenshot: Boolean(input.screenshot),
161
177
  });
162
178
 
163
- const msg = addMessage(thread_id, "user", messageBody, undefined, "page_capture");
179
+ // When a screenshot is included, persist the user turn as a multipart
180
+ // ContentPart[] (text + image) — that's the same shape the chat UI and
181
+ // agent runner expect for inline images, so the picture renders in the
182
+ // bubble on reload and vision-capable models can see it on the silent
183
+ // observer turn. Without a screenshot we keep the legacy string body
184
+ // to avoid touching messages that never had an image.
185
+ const screenshotPart: ContentPart | null = input.screenshot
186
+ ? { type: "image", media_type: input.screenshotMediaType ?? "image/png", data: input.screenshot }
187
+ : null;
188
+ const storedContent: string = screenshotPart
189
+ ? JSON.stringify([{ type: "text", text: messageBody }, screenshotPart] satisfies ContentPart[])
190
+ : messageBody;
191
+
192
+ const msg = addMessage(thread_id, "user", storedContent, undefined, "page_capture");
164
193
 
165
194
  // Fire a silent observer run so the agent ingests the captured context
166
195
  // without being forced to reply — matching bridge silent/observer mode.
@@ -170,6 +199,7 @@ export async function handlePageCapture(req: Request): Promise<Response> {
170
199
  thread_id,
171
200
  queue_source: "extension",
172
201
  message: `${SILENT_CAPTURE_PREAMBLE}\n\n${messageBody}`,
202
+ attachments: screenshotPart ? [screenshotPart] : undefined,
173
203
  user_category: "page_capture",
174
204
  assistant_category: "page_capture",
175
205
  silent: true,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@circuitwall/jarela",
3
- "version": "1.3.0",
3
+ "version": "1.4.0",
4
4
  "description": "Jarela — local chat interface for LangGraph agents (multi-provider, single-process, SQLite-backed).",
5
5
  "license": "Apache-2.0",
6
6
  "author": "Andrew Ge Wu",
@@ -100,6 +100,7 @@
100
100
  "test:live:isolated:full": "node scripts/live-test-isolated.mjs --llm",
101
101
  "test:e2e": "playwright test",
102
102
  "test:e2e:ui": "playwright test --ui",
103
+ "promo:record": "node scripts/promo-record.mjs",
103
104
  "release:docker": "node scripts/release-docker.mjs",
104
105
  "release:docker:dry": "node scripts/release-docker.mjs --dry-run"
105
106
  },