npm - @mindstudio-ai/remy - Versions diffs - 0.1.196 → 0.1.198 - Mend

@mindstudio-ai/remy 0.1.196 → 0.1.198

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +9 -0
package/dist/headless.d.ts +5 -6
package/dist/headless.js +27 -23
package/dist/index.js +27 -23
package/dist/subagents/browserAutomation/prompt.md +1 -1
package/dist/subagents/designExpert/tools/images/enhance-image-prompt.md +10 -3
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -323,6 +323,14 @@ Clear conversation history and delete the session file.
 {"action": "clear", "requestId": "r4"}
 ```
+#### `changeModels`
+Change per-agent model picks **without** clearing history. `models` is a sparse map keyed by agent identifier (`parent`, `visualDesignExpert`, …); omit it (or send `{}`) to reset every agent to server defaults. Takes effect on the next turn. Rejected with `completed(success:false, error:"cannot change models while a turn is running")` if a turn is in flight — cancel first, then retry. Responds with `models_changed`. To start a fresh conversation on a different model, send `clear` then `changeModels`.
+```json
+{"action": "changeModels", "requestId": "r5", "models": {"parent": "gemini-3.1-pro"}}
+```
 ### Output Events (stdout)
 Events are emitted as newline-delimited JSON. Command responses include `requestId`; system events do not.
@@ -351,6 +359,7 @@ All command responses include the `requestId` from the originating command.
 | `error` | `error` | Error message (may precede `completed`) |
 | `history` | `messages` | Response to `get_history` |
 | `session_cleared` | | Response to `clear` |
+| `models_changed` | `models?`, `modelSurfaces`, `allowedModelsByType` | Response to `changeModels` |
 | `completed` | `success`, `error?` | Terminal event — exactly one per command |
 #### Example Session

package/dist/headless.d.ts CHANGED Viewed

@@ -131,12 +131,11 @@ declare class HeadlessSession {
      */
     private kickDrain;
     private handleClear;
-    /** Archive the current session and seed a fresh one with the given
-     * per-agent model overrides. Models are immutable for the life of a
-     * session — this is the only way to change them. Omitting `models`
-     * (or sending an empty object) resets to "use server defaults for
-     * every agent". */
-    private handleNewSession;
+    /** Change per-agent model picks without clearing history. Takes effect on
+     * the next turn — the model is resolved live, per LLM call, from
+     * `state.models`. Omitting `models` (or sending an empty object) resets
+     * every agent to "use server defaults". */
+    private handleChangeModels;
     /** Cancel the running turn and drain the queue. Returns the drained items. */
     private handleCancel;
     private handleStdinLine;

package/dist/headless.js CHANGED Viewed

@@ -2940,15 +2940,6 @@ ${content}` : attachmentHeader;
     const blocks = msg.content;
     const text = blocks.filter((b) => b.type === "text").map((b) => b.text).join("");
     const toolCalls = blocks.filter((b) => b.type === "tool").map((b) => ({ id: b.id, name: b.name, input: b.input }));
-    const thinking = blocks.filter(
-      (b) => b.type === "thinking" || b.type === "redacted_thinking"
-    ).map(
-      (b) => b.type === "thinking" ? {
-        type: "thinking",
-        thinking: b.thinking,
-        signature: b.signature
-      } : { type: "redacted_thinking", data: b.data }
-    );
     const cleaned2 = {
       role: msg.role,
       content: text
@@ -2956,9 +2947,6 @@ ${content}` : attachmentHeader;
     if (toolCalls.length > 0) {
       cleaned2.toolCalls = toolCalls;
     }
-    if (thinking.length > 0) {
-      cleaned2.thinking = thinking;
-    }
     if (msg.providerMetadata) {
       cleaned2.providerMetadata = msg.providerMetadata;
     }
@@ -3430,7 +3418,7 @@ var BROWSER_TOOLS = [
   {
     clearable: true,
     name: "browserCommand",
-    description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recordingUrl` \u2014 an rrweb session recording for visual replay. Timeout: 120s.",
+    description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recording` object \u2014 one chunk of a continuous per-session rrweb recording that the viewer stitches into a single replay (not a standalone per-call clip). Timeout: 120s.",
     inputSchema: {
       type: "object",
       properties: {
@@ -4292,7 +4280,14 @@ async function generateImageAssets(opts) {
   const height = opts.height || 2048;
   const config = { width, height };
   if (sourceImages?.length) {
+    const [firstImage] = sourceImages;
     config.images = sourceImages;
+    config.source_images = sourceImages;
+    config.image_ref = sourceImages;
+    config.image = firstImage;
+    config.image_url = firstImage;
+    config.source_image = firstImage;
+    config.source = firstImage;
   }
   const isEdit = !!sourceImages?.length;
   const enhancedPrompts = isEdit ? prompts : await Promise.all(
@@ -7867,13 +7862,11 @@ var HeadlessSession = class {
     clearSession(this.state);
     return {};
   }
-  /** Archive the current session and seed a fresh one with the given
-   * per-agent model overrides. Models are immutable for the life of a
-   * session — this is the only way to change them. Omitting `models`
-   * (or sending an empty object) resets to "use server defaults for
-   * every agent". */
-  handleNewSession(models) {
-    clearSession(this.state);
+  /** Change per-agent model picks without clearing history. Takes effect on
+   * the next turn — the model is resolved live, per LLM call, from
+   * `state.models`. Omitting `models` (or sending an empty object) resets
+   * every agent to "use server defaults". */
+  handleChangeModels(models) {
     this.state.models = models && Object.keys(models).length > 0 ? models : void 0;
     saveSession(this.state);
     return {
@@ -7970,12 +7963,23 @@ var HeadlessSession = class {
       );
       return;
     }
-    if (action === "newSession") {
+    if (action === "changeModels") {
+      if (this.running) {
+        this.emit(
+          "completed",
+          {
+            success: false,
+            error: "cannot change models while a turn is running"
+          },
+          requestId
+        );
+        return;
+      }
       const models = parsed.models;
       this.dispatchSimple(
         requestId,
-        "session_cleared",
-        () => this.handleNewSession(models)
+        "models_changed",
+        () => this.handleChangeModels(models)
       );
       return;
     }

package/dist/index.js CHANGED Viewed

@@ -3667,15 +3667,6 @@ ${content}` : attachmentHeader;
     const blocks = msg.content;
     const text = blocks.filter((b) => b.type === "text").map((b) => b.text).join("");
     const toolCalls = blocks.filter((b) => b.type === "tool").map((b) => ({ id: b.id, name: b.name, input: b.input }));
-    const thinking = blocks.filter(
-      (b) => b.type === "thinking" || b.type === "redacted_thinking"
-    ).map(
-      (b) => b.type === "thinking" ? {
-        type: "thinking",
-        thinking: b.thinking,
-        signature: b.signature
-      } : { type: "redacted_thinking", data: b.data }
-    );
     const cleaned2 = {
       role: msg.role,
       content: text
@@ -3683,9 +3674,6 @@ ${content}` : attachmentHeader;
     if (toolCalls.length > 0) {
       cleaned2.toolCalls = toolCalls;
     }
-    if (thinking.length > 0) {
-      cleaned2.thinking = thinking;
-    }
     if (msg.providerMetadata) {
       cleaned2.providerMetadata = msg.providerMetadata;
     }
@@ -4179,7 +4167,7 @@ var init_tools = __esm({
       {
         clearable: true,
         name: "browserCommand",
-        description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recordingUrl` \u2014 an rrweb session recording for visual replay. Timeout: 120s.",
+        description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recording` object \u2014 one chunk of a continuous per-session rrweb recording that the viewer stitches into a single replay (not a standalone per-call clip). Timeout: 120s.",
         inputSchema: {
           type: "object",
           properties: {
@@ -5027,7 +5015,14 @@ async function generateImageAssets(opts) {
   const height = opts.height || 2048;
   const config = { width, height };
   if (sourceImages?.length) {
+    const [firstImage] = sourceImages;
     config.images = sourceImages;
+    config.source_images = sourceImages;
+    config.image_ref = sourceImages;
+    config.image = firstImage;
+    config.image_url = firstImage;
+    config.source_image = firstImage;
+    config.source = firstImage;
   }
   const isEdit = !!sourceImages?.length;
   const enhancedPrompts = isEdit ? prompts : await Promise.all(
@@ -8659,13 +8654,11 @@ var init_headless = __esm({
         clearSession(this.state);
         return {};
       }
-      /** Archive the current session and seed a fresh one with the given
-       * per-agent model overrides. Models are immutable for the life of a
-       * session — this is the only way to change them. Omitting `models`
-       * (or sending an empty object) resets to "use server defaults for
-       * every agent". */
-      handleNewSession(models) {
-        clearSession(this.state);
+      /** Change per-agent model picks without clearing history. Takes effect on
+       * the next turn — the model is resolved live, per LLM call, from
+       * `state.models`. Omitting `models` (or sending an empty object) resets
+       * every agent to "use server defaults". */
+      handleChangeModels(models) {
         this.state.models = models && Object.keys(models).length > 0 ? models : void 0;
         saveSession(this.state);
         return {
@@ -8762,12 +8755,23 @@ var init_headless = __esm({
           );
           return;
         }
-        if (action === "newSession") {
+        if (action === "changeModels") {
+          if (this.running) {
+            this.emit(
+              "completed",
+              {
+                success: false,
+                error: "cannot change models while a turn is running"
+              },
+              requestId
+            );
+            return;
+          }
           const models = parsed.models;
           this.dispatchSimple(
             requestId,
-            "session_cleared",
-            () => this.handleNewSession(models)
+            "models_changed",
+            () => this.handleChangeModels(models)
           );
           return;
         }

package/dist/subagents/browserAutomation/prompt.md CHANGED Viewed

@@ -62,7 +62,7 @@ Each browserCommand returns:
 - `snapshot`: the final page state after all steps complete (always present, even without an explicit snapshot step)
 - `logs`: array of browser-side events that fired during the batch (console output, network failures, JS errors, user interactions). Check this for errors before reporting pass.
 - `duration`: total execution time in ms
-- `recordingUrl` (optional): URL to an rrweb session recording of the tool call. Present whenever the batch contained an interactive step (click, type, select). Include it in your failure reports so the main agent can share it — it's the fastest way to reproduce a bug visually.
+- `recording` (optional): metadata for an rrweb session recording, present whenever the batch contained an interactive step (click, type, select). Each call returns one chunk of a continuous per-session recording (the viewer stitches chunks by `sessionId`/`seq` into a single replay) — it's not a standalone clip. Note in your failure reports that a recording is available so the main agent can surface it.
 On error, the failing step has an `error` field and execution stops. Remaining steps are skipped.

package/dist/subagents/designExpert/tools/images/enhance-image-prompt.md CHANGED Viewed

@@ -15,12 +15,19 @@ Examples of good density:
 These are non-negotiable. Violating them produces bad output.
 - **No hex codes.** The model renders hex codes as visible text in the image. Describe colors by name and relationship: "deep emerald green with a smooth satin finish" or "warm sand beige fading into pale desaturated blue" — never "#7C3AED".
-- **No quoted strings.** Any single or double quoted string gets rendered as literal text in the image.
 - **No physical object framing.** Words like "artwork", "painting", "canvas", "print", "app icon", "square digital artwork" produce photorealistic mockups of a painting in a frame or an icon inset on a background. Describe the visual content directly.
-- **No text triggers.** Words like "poster", "magazine cover", "editorial spread", "sign", or brand names risk rendering literal text, mastheads, or mockup layouts. If you want an editorial photography *style*, describe the photographic qualities — not the format.
 - **Describe what you want, not what you don't want.** Negation doesn't work — "street with no cars" activates "cars." Say "empty street" instead.
 - **No body part positioning.** Don't describe specific arrangements of arms, legs, or limbs.
-- **No brand names.** Things like "Apple style" or "Nintendo style" will generate literal logos in the output.
+- **No other brands as a style shortcut.** Don't borrow another company's identity as shorthand — "Apple style", "Nintendo style" — it renders that company's literal logo. (A brand's *own* name or wordmark on its *own* asset is intended text, not this — see Text & wordmarks below.)
+## Text & wordmarks
+The model renders text well — but only the text you tell it to, so quotation marks mean "render this literally." Use them deliberately.
+- **Reproduce intended text exactly.** When the brief names a wordmark, brand name, label, sign, headline, or UI copy, carry the exact string through in quotes — e.g. a wordmark reading "Solid Credit". Never drop it, paraphrase it, or genericize it to "a wordmark": the literal text is usually the whole point of the asset, and if you omit it the model fills the space with an invented placeholder.
+- **Direct the typography and placement.** Specify weight, case, color, and position so the text lands where the designer wants it — "a near-black grotesque sans-serif wordmark reading 'Solid Credit', centered directly below the mark."
+- **Keep it short.** Wordmarks, labels, and short taglines render reliably; full sentences and paragraphs degrade into garbled glyphs. Trim long copy to the few words that matter, or leave it out.
+- **Don't summon text you don't want.** When the image should have no text, don't quote stray descriptive phrases, and avoid format words that imply copy — "poster", "magazine cover", "sign", "billboard" — which can produce spurious text or mastheads. For an editorial *style*, describe the photographic qualities, not the format.
 ## Composition

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mindstudio-ai/remy",
-  "version": "0.1.196",
+  "version": "0.1.198",
   "description": "MindStudio coding agent",
   "repository": {
     "type": "git",