npm - @circuitwall/jarela - Versions diffs - 1.2.0 → 1.4.0 - Mend

@circuitwall/jarela 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

package/.next/standalone/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@circuitwall/jarela",
-  "version": "1.2.0",
+  "version": "1.4.0",
   "description": "Jarela — local chat interface for LangGraph agents (multi-provider, single-process, SQLite-backed).",
   "license": "Apache-2.0",
   "author": "Andrew Ge Wu",
@@ -100,6 +100,7 @@
     "test:live:isolated:full": "node scripts/live-test-isolated.mjs --llm",
     "test:e2e": "playwright test",
     "test:e2e:ui": "playwright test --ui",
+    "promo:record": "node scripts/promo-record.mjs",
     "release:docker": "node scripts/release-docker.mjs",
     "release:docker:dry": "node scripts/release-docker.mjs --dry-run"
   },

package/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,104 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [1.4.0] - 2026-06-08
+### Added
+- **Browser-extension element screenshot.** The page-capture flow now
+  ships a cropped PNG of the picked element alongside the text. The
+  content script grabs the visible viewport via
+  `chrome.tabs.captureVisibleTab` (loopback only, via the service worker)
+  and crops it to the element's bounding rect through `OffscreenCanvas`
+  at `devicePixelRatio`. The server validates the base64 payload (≤ 4 MB
+  encoded), persists the user message as a multipart `ContentPart[]` of
+  `[text, image]` so the bubble renders the picture inline, and
+  forwards the image part to the silent observer turn so vision-capable
+  agents see it on the immediate follow-up run. Falls back cleanly to
+  text-only capture if the snapshot is denied. See
+  [`docs/api.md`](./docs/api.md#post-apiv1page-capture) for the updated
+  request schema.
+- **Promo video recorder.** `npm run promo:record` (via
+  [`scripts/promo-record.mjs`](./scripts/promo-record.mjs)) drives your
+  real local install in a 9:16 vertical PWA viewport and records a
+  dark-theme `.webm` of the tap-to-unlock intro, agent picker, a
+  human-paced chat turn, and a tour of every side panel. First run
+  saves auth state to `promo/.storage.json` and reuses it thereafter.
+## [1.3.0] - 2026-06-08
+Two new agent capabilities and a hardening pass on tool wall-clocks.
+Bridge adapters (WhatsApp today) now spill large remote attachments
+to a local store instead of inlining them into the LLM context, and
+the agent picks them up by path through ``file_read``. Long-running
+tool calls can now be fired asynchronously: the LLM gets a tracking
+key back immediately and pulls the result later via a new built-in.
+### Added
+- **Bridge attachment spill store**
+  ([#215](https://github.com/CircuitWall/jarela/pull/215)). Inbound
+  bridge messages no longer base64-inline every document, voice note,
+  audio, or video into the next prompt. Buffers are persisted under
+  ``<dataDir>/bridge-attachments/<bridge>/<YYYY-MM-DD>/<id>-<name>``
+  with sanitised paths, an SHA-256, and a future-facing
+  ``pruneBridgeAttachments({ maxAgeMs })`` helper; the prompt body
+  carries a text pointer telling the agent to use ``file_read`` to
+  inspect the contents. Images and stickers ≤ 1 MB still inline so
+  vision works out of the box.
+- **Async tool execution (``async_run`` wrapper + ``tool_result_get``)**
+  ([#216](https://github.com/CircuitWall/jarela/pull/216)). Every
+  tool's schema now exposes an optional ``async_run: boolean``. When
+  set, the wrapper returns ``{ok, async, key, tool, started_at,
+  deadline_ms, hint}`` immediately and runs the work detached; the
+  LLM picks the result up via the new built-in
+  ``tool_result_get(key, wait_ms?, consume?)``. ``tool_result_list``
+  returns summaries without dumping result bodies. In-process store
+  with a 10-minute TTL and a 256-entry cap (oldest finished evicted
+  first, then oldest pending with a warn).
+### Changed
+- **Hard ceiling on tool ``deadline_ms``**
+  ([#216](https://github.com/CircuitWall/jarela/pull/216)). The
+  wall-clock budget the LLM can pick is now clamped to 30 minutes by
+  default. Values above the ceiling are clamped and a one-line
+  ``console.warn`` is emitted naming the tool, the requested value,
+  and the ceiling. Operators can raise or lower the cap with the new
+  ``JARELA_TOOL_MAX_DEADLINE_MS`` environment variable (integer
+  milliseconds). Applies to both sync and ``async_run`` paths.
+### Fixed
+- **E2E menu specs no longer race the boot agent picker**
+  ([#217](https://github.com/CircuitWall/jarela/pull/217)). Three
+  Playwright specs (``layout``, ``credentials``, ``setup-reorg``)
+  were intermittently failing because the BootScreen overlay
+  intercepted clicks on the header menu button. A new
+  ``waitForAppReady(page)`` helper picks the default agent tile and
+  waits for the overlay to detach before the test drives the UI.
+### Configuration
+- ``JARELA_TOOL_MAX_DEADLINE_MS`` — overrides the per-tool
+  wall-clock ceiling (default 1800000 ms / 30 min). Set to a smaller
+  value to tighten the cap, or larger if a regulated workload genuinely
+  needs long synchronous calls.
+Two follow-up fixes on top of 1.2.0.
+### Fixed
+- **Boot agent picker always shows after login**
+  ([#213](https://github.com/CircuitWall/jarela/pull/213)). The picker
+  was being skipped in some session states; it now reliably appears so
+  the user actively chooses an agent at boot instead of silently
+  inheriting one.
+- **Extension UX polish on one-shot turns**
+  ([#212](https://github.com/CircuitWall/jarela/pull/212)). Custom
+  intent collapses by default, Enter submits, writes are queued, and
+  one-shot turns drop the quality gates that didn't apply to them.
 ## [1.2.0] - 2026-06-08
 Security, runtime resilience, and a broad UI consolidation pass.

package/README.md CHANGED Viewed

@@ -1,29 +1,31 @@
-<p align="center">
-  <img src="./public/logo-mark-transparent.png" alt="Jarela" width="160" />
+<p align="center">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" srcset="./public/icon-512.png" />
+    <img src="./public/icon-512-light.png" alt="Jarela" width="140" />
+  </picture>
 </p>
 <h1 align="center">Jarela</h1>
 <p align="center">
-  <b>A local-first, browser-based GUI for orchestrating multi-provider LLM agents.</b><br/>
-  <sub>Next.js 16 + LangGraph + SQLite. PWA-installable. No cloud backend, no telemetry.</sub>
+  <b>A local-first, browser-based GUI for orchestrating multi-provider LLM agents.</b>
+</p>
+<p align="center">
+  <sub>Next.js 16 &middot; LangGraph &middot; SQLite &middot; PWA-installable &middot; no cloud backend, no telemetry</sub>
 </p>
 <p align="center">
-  <a href="#quick-start">Quick start</a> ·
-  <a href="#configuration-guide-home--work">Config guide</a> ·
-  <a href="#supported-platforms">Platforms</a> ·
-  <a href="#features">Features</a> ·
-  <a href="#productivity-stacks-google--microsoft-at-parity">Google + Microsoft</a> ·
-  <a href="#built-in-toolbelt">Tools</a> ·
-  <a href="#providers">Providers</a> ·
-  <a href="#connections">Connections</a> ·
-  <a href="./docs/EXTENDING.md">Extending</a> ·
-  <a href="./docs/ARCHITECTURE.md">Architecture</a> ·
-  <a href="./CONTRIBUTING.md">Contributing</a> ·
+  <a href="#quick-start">Quick start</a> &middot;
+  <a href="#configuration-guide-home--work">Config guide</a> &middot;
+  <a href="#supported-platforms">Platforms</a> &middot;
+  <a href="#features">Features</a> &middot;
+  <a href="#productivity-stacks-google--microsoft-at-parity">Google + Microsoft</a> &middot;
+  <a href="#built-in-toolbelt">Tools</a> &middot;
+  <a href="#providers">Providers</a> &middot;
+  <a href="#connections">Connections</a> &middot;
+  <a href="./docs/EXTENDING.md">Extending</a> &middot;
+  <a href="./docs/ARCHITECTURE.md">Architecture</a> &middot;
+  <a href="./CONTRIBUTING.md">Contributing</a> &middot;
   <a href="#documentation">Docs</a>
 </p>
@@ -51,15 +53,16 @@
   </a>
 </p>
----
 <p align="center">
-  <video src="https://github.com/user-attachments/assets/0f33f8d3-07bb-4850-9fcc-cfc97036f180" controls width="640" muted>
-    Your browser doesn't support embedded video.
-    <a href="https://github.com/user-attachments/assets/0f33f8d3-07bb-4850-9fcc-cfc97036f180">Download the clip</a>.
+  <video src="./docs/assets/jarela-promo.webm" poster="./docs/assets/jarela-promo-poster.jpg" controls muted playsinline width="320">
+    <img src="./docs/assets/jarela-promo-poster.jpg" alt="Jarela promo &mdash; PIN unlock, agent picker, chat, panel tour" width="320" />
   </video>
+  <br/>
+  <sub><i>Tap-to-unlock &rarr; agent picker &rarr; human-paced chat &rarr; full panel tour</i> &middot; <a href="./docs/assets/jarela-promo.webm">Download .webm</a></sub>
 </p>
+---
 ## Quick start
 Get to a working local agent in under 10 minutes:
@@ -239,9 +242,12 @@ create an Outlook Calendar invite in the same turn.
   **Memory**, **Documents**, **Profile**, **Bridges**, **Scheduled tasks**,
   and **Pending approvals**.
 - **Browser extension** ([`browser-extension/`](./browser-extension)) —
-  Chrome MV3, click an element on any page and POST it to your local
-  Jarela as a new user message (ADR-0018). Loopback only; toolbar icon
-  greys out when Jarela isn't running.
+  Chrome MV3, click an element on any page and POST it (with a cropped
+  PNG of the picked element) to your local Jarela as a new user message
+  (ADR-0018). The screenshot is rendered inline in the chat bubble and
+  forwarded to vision-capable agents on the silent observer turn that
+  fires immediately after the capture. Loopback only; toolbar icon greys
+  out when Jarela isn't running.
 ### Operational
@@ -914,6 +920,25 @@ on every push and PR: `lint + tsc --noEmit + next build`, then the same
 live integration suite against the production server output. The build
 badge at the top of this README links straight to the latest run.
+## Recording a promo video
+[scripts/promo-record.mjs](./scripts/promo-record.mjs) drives your real
+local install (default `http://localhost:4312`) inside a 540&times;960
+vertical (9:16) PWA viewport and records a `.webm` of a five-scene tour
+in dark theme: a simulated PIN unlock, agent picker, a human-paced chat
+turn, every side panel, and a closing pose.
+```bash
+npm run dev            # in one terminal
+npm run promo:record   # in another — output lands in ./promo/
+```
+The first run opens a headed Chromium so you can manually unlock the
+install if needed; the resulting auth state is saved to
+`promo/.storage.json` and reused on every subsequent run. Override the
+target with `JARELA_PROMO_URL`, the chat line with `JARELA_PROMO_MSG`,
+or skip the actual send with `JARELA_PROMO_SKIP_CHAT=1`.
 ## Security
 - **CSRF / origin guard** ([lib/auth/access.ts](./lib/auth/access.ts))

package/components/chat/InputBar.tsx CHANGED Viewed

@@ -73,6 +73,12 @@ function fileToContentPart(file: File): Promise<ContentPart> {
   });
 }
+function attachmentKey(a: ContentPart, i: number): string {
+  if (a.type === "text") return `text:${i}:${a.text.length}`;
+  const name = a.type === "file" ? a.name : "";
+  return `${a.type}:${a.media_type}:${name}:${a.data.length}:${a.data.slice(0, 16)}`;
+}
 export function InputBar({ attachments, onAttachmentsChange, onSubmit, onQueue, onStop, streaming, disabled, placeholder, voiceEnabled, agentId, onVoiceTranscript }: Props) {
   // Text state is intentionally LOCAL. Lifting it to ChatView would re-render
   // the entire message list (every MessageBubble + ReactMarkdown pass) on
@@ -268,7 +274,10 @@ export function InputBar({ attachments, onAttachmentsChange, onSubmit, onQueue,
       {attachments.length > 0 && (
         <div className="flex flex-wrap gap-2 mb-2">
           {attachments.map((a, i) => (
-            <div key={i} className="relative group shrink-0">
+            // Content-derived key — using the index reused DOM nodes when
+            // earlier attachments were removed, flashing the wrong preview
+            // (and the wrong filename) into the slot of the survivor.
+            <div key={attachmentKey(a, i)} className="relative group shrink-0">
               {a.type === "image" ? (
                 // eslint-disable-next-line @next/next/no-img-element
                 <img

package/components/ui/BootScreen.tsx CHANGED Viewed

@@ -150,16 +150,6 @@ export function BootScreen({ agents, agentsLoaded, activeAgentId, onPickAgent, s
     };
   }, [activeAgentId, pickedId, agentsLoaded, markStep]);
-  // Returning users with a saved default skip the manual tile-click.
-  useEffect(() => {
-    if (suppressed) return;
-    if (!agentsLoaded) return;
-    if (activeAgentId || pickedId) return;
-    if (!defaultAgent) return;
-    setPickedId(defaultAgent.id);
-    onPickAgent(defaultAgent.id);
-  }, [suppressed, agentsLoaded, activeAgentId, pickedId, defaultAgent, onPickAgent]);
   if (done) return null;
   if (suppressed) return null;

package/lib/agents/agent-turn.ts CHANGED Viewed

@@ -41,6 +41,14 @@ export interface RunAgentTurnRequest {
    * the category default.
    */
   context_profile_override?: Partial<TurnContextProfile> | null;
+  /**
+   * Skip the stall-retry + strict-citation audit wrapper. One-shot
+   * callers (browser-extension fill / rewrite) want the raw assistant
+   * text without the `↻` separator or pre-retry stall prose that the
+   * wrapper would otherwise inject into the streamed content.
+   */
+  disable_quality_gates?: boolean;
 }
 export interface RunAgentTurnResult {
@@ -76,6 +84,7 @@ export async function runAgentTurn(req: RunAgentTurnRequest): Promise<RunAgentTu
         attachments: req.attachments,
         user_category: req.user_category ?? null,
         context_profile: contextProfile,
+        disable_quality_gates: req.disable_quality_gates,
         signal: active.abort.signal,
         _pinned_model_config_name: pinnedModelConfigName,
         _skip_persist_message: req.skip_persist_user_message,

package/lib/agents/prepare/request.ts CHANGED Viewed

@@ -41,6 +41,15 @@ export interface ThreadRunRequest {
    */
   context_profile?: TurnContextProfile;
+  /**
+   * Skip the post-stream stall-retry + strict-citation audit wrapper for
+   * this turn. Use for one-shot callers (browser-extension fill / rewrite)
+   * that consume `assistantContent` as raw text and would otherwise type
+   * the visible `↻` separator and the original stalled prose into the
+   * user's input field. Chat callers leave undefined.
+   */
+  disable_quality_gates?: boolean;
   /**
    * Internal - public callers leave undefined. When set by the submission
    * path, this freezes the effective model config for the turn so queued

package/lib/agents/run-thread.ts CHANGED Viewed

@@ -292,8 +292,16 @@ export async function prepareThreadRun(req: ThreadRunRequest): Promise<PreparedT
   // Overhead = the assembled system prompt + per-message scaffolding, which
   // is more accurate than the budget's static overhead allowance.
   const overheadTokens = estimateTokens(systemPrompt);
+  // One-shot callers (extension fill/rewrite) consume `assistantContent` as
+  // raw text. The stall-retry wrapper would otherwise leak the `↻` separator
+  // and the pre-retry stalled prose into the user's input field, and the
+  // strict-citation audit (which lives inside the same wrapper) would do
+  // the same with retry continuations. Bypass it entirely for those callers.
+  const stream = req.disable_quality_gates
+    ? rawStream
+    : stallRetryStream(rawStream, req, allowedTools, retriesLeft);
   return {
-    stream: stallRetryStream(rawStream, req, allowedTools, retriesLeft),
+    stream,
     thread_id: req.thread_id,
     context_snapshot: {
       context_window_tokens: historyWindow.budget.contextWindowTokens,

package/lib/api/extension-turn.ts CHANGED Viewed

@@ -91,6 +91,13 @@ async function runExtensionAction(action: z.infer<typeof ExtensionAction>, input
     message: prompt,
     user_category: "extension",
     assistant_category: "extension",
+    // The extension types `assistantContent` directly into the user's
+    // input field. The stall-retry wrapper and the strict-citation audit
+    // would otherwise inject the `↻` separator, the original stalled
+    // prose, and audit-retry continuations into that text — pollution
+    // the user then has to manually scrub. Both gates are chat
+    // affordances; skip them for one-shot writes.
+    disable_quality_gates: true,
   });
   // Ping the events bus so any open chat view on this thread re-fetches.

package/lib/api/page-capture.test.ts CHANGED Viewed

@@ -307,3 +307,61 @@ describe("handlePageCapture — response shape", () => {
     });
   });
 });
+describe("handlePageCapture — screenshot attachment", () => {
+  // 1x1 transparent PNG, base64-encoded (no data: prefix).
+  const tinyPng =
+    "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=";
+  it("rejects screenshot with invalid base64", async () => {
+    const res = await handlePageCapture(makeReq({ ...validBody, screenshot: "not base64!!" }));
+    expect(res.status).toBe(400);
+    expect(addMessageMock).not.toHaveBeenCalled();
+  });
+  it("rejects screenshot exceeding the size cap", async () => {
+    const huge = "A".repeat(4_000_001);
+    const res = await handlePageCapture(makeReq({ ...validBody, screenshot: huge }));
+    expect(res.status).toBe(400);
+  });
+  it("persists user message as a JSON ContentPart[] with text + image when screenshot is present", async () => {
+    const res = await handlePageCapture(makeReq({ ...validBody, screenshot: tinyPng }));
+    expect(res.status).toBe(200);
+    const stored = addMessageMock.mock.calls[0][2] as string;
+    const parsed = JSON.parse(stored) as Array<{ type: string; text?: string; media_type?: string; data?: string }>;
+    expect(Array.isArray(parsed)).toBe(true);
+    expect(parsed).toHaveLength(2);
+    expect(parsed[0]).toMatchObject({ type: "text" });
+    expect(parsed[0].text).toContain("Captured from");
+    expect(parsed[0].text).toContain("Screenshot attached.");
+    expect(parsed[1]).toEqual({ type: "image", media_type: "image/png", data: tinyPng });
+  });
+  it("forwards the screenshot as a vision attachment to the silent observer run", async () => {
+    await handlePageCapture(makeReq({ ...validBody, screenshot: tinyPng }));
+    expect(runAgentTurnMock).toHaveBeenCalledWith(expect.objectContaining({
+      attachments: [{ type: "image", media_type: "image/png", data: tinyPng }],
+    }));
+  });
+  it("honors a custom screenshotMediaType", async () => {
+    await handlePageCapture(makeReq({ ...validBody, screenshot: tinyPng, screenshotMediaType: "image/jpeg" }));
+    const stored = addMessageMock.mock.calls[0][2] as string;
+    const parsed = JSON.parse(stored) as Array<{ type: string; media_type?: string }>;
+    expect(parsed[1].media_type).toBe("image/jpeg");
+  });
+  it("keeps the legacy string-content path when no screenshot is sent", async () => {
+    await handlePageCapture(makeReq(validBody));
+    const stored = addMessageMock.mock.calls[0][2] as string;
+    // Not JSON-parseable as an array — it's the legacy plaintext body.
+    expect(() => JSON.parse(stored)).toThrow();
+    expect(stored).toContain("Captured from");
+    expect(stored).not.toContain("Screenshot attached.");
+    expect(runAgentTurnMock).toHaveBeenCalledWith(expect.objectContaining({
+      attachments: undefined,
+    }));
+  });
+});

package/lib/api/page-capture.ts CHANGED Viewed

@@ -13,12 +13,18 @@ import {
 } from "@/lib/stores/agent-configs";
 import { publish } from "@/lib/notifications/bus";
 import { runAgentTurn } from "@/lib/agents/agent-turn";
+import type { ContentPart } from "@/lib/tools/types";
 // 100KB UTF-8 cap on captured text. The LLM context window is the real
 // constraint; this cap exists to keep a runaway "<body>" pick from
 // trashing the conversation. See ADR-0018.
 export const MAX_TEXT_BYTES = 100_000;
+// Hard cap on the inline element screenshot (base64 chars). 4 MB of
+// base64 ≈ 3 MB decoded — generous for a single cropped element while
+// still bounding the SQLite row and the LLM vision payload.
+export const MAX_SCREENSHOT_B64 = 4_000_000;
 // Preamble prepended to the LLM call for the silent observer run.
 // The captured content is already persisted in the DB — this wrapper
 // instructs the agent to observe without replying, matching bridge
@@ -37,6 +43,13 @@ const Body = z.object({
   tagName: z.string().max(64).optional(),
   text: z.string(),
   capturedAt: z.string().datetime(),
+  // Optional base64-encoded PNG of just the picked element (no data: URL
+  // prefix). The content script crops `chrome.tabs.captureVisibleTab`
+  // to the element bounding box before sending. When present, it is
+  // attached to the persisted user message as an image ContentPart so
+  // the chat UI renders it inline and vision-capable agents can see it.
+  screenshot: z.string().regex(/^[A-Za-z0-9+/=]+$/).max(MAX_SCREENSHOT_B64).optional(),
+  screenshotMediaType: z.string().regex(/^image\/[a-z0-9.+-]+$/).max(64).optional(),
 });
 function truncateUtf8(s: string, maxBytes: number): { text: string; truncated: boolean; originalBytes: number } {
@@ -102,12 +115,14 @@ function composeBody(args: {
   text: string;
   truncated: boolean;
   originalBytes: number;
+  hasScreenshot?: boolean;
 }): string {
   const heading = args.title
     ? `📎 Captured from [${args.title}](${args.url})`
     : `📎 Captured from <${args.url}>`;
   const lines = [heading];
   if (args.selector) lines.push(`Element: \`${args.selector}\``);
+  if (args.hasScreenshot) lines.push("Screenshot attached.");
   if (args.truncated) {
     lines.push(`> ⚠ Truncated to ${MAX_TEXT_BYTES.toLocaleString()} bytes (original was ${args.originalBytes.toLocaleString()} bytes)`);
   }
@@ -158,9 +173,23 @@ export async function handlePageCapture(req: Request): Promise<Response> {
     text,
     truncated,
     originalBytes,
+    hasScreenshot: Boolean(input.screenshot),
   });
-  const msg = addMessage(thread_id, "user", messageBody, undefined, "page_capture");
+  // When a screenshot is included, persist the user turn as a multipart
+  // ContentPart[] (text + image) — that's the same shape the chat UI and
+  // agent runner expect for inline images, so the picture renders in the
+  // bubble on reload and vision-capable models can see it on the silent
+  // observer turn. Without a screenshot we keep the legacy string body
+  // to avoid touching messages that never had an image.
+  const screenshotPart: ContentPart | null = input.screenshot
+    ? { type: "image", media_type: input.screenshotMediaType ?? "image/png", data: input.screenshot }
+    : null;
+  const storedContent: string = screenshotPart
+    ? JSON.stringify([{ type: "text", text: messageBody }, screenshotPart] satisfies ContentPart[])
+    : messageBody;
+  const msg = addMessage(thread_id, "user", storedContent, undefined, "page_capture");
   // Fire a silent observer run so the agent ingests the captured context
   // without being forced to reply — matching bridge silent/observer mode.
@@ -170,6 +199,7 @@ export async function handlePageCapture(req: Request): Promise<Response> {
     thread_id,
     queue_source: "extension",
     message: `${SILENT_CAPTURE_PREAMBLE}\n\n${messageBody}`,
+    attachments: screenshotPart ? [screenshotPart] : undefined,
     user_category: "page_capture",
     assistant_category: "page_capture",
     silent: true,