@mindstudio-ai/remy 0.1.196 → 0.1.198

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -323,6 +323,14 @@ Clear conversation history and delete the session file.
323
323
  {"action": "clear", "requestId": "r4"}
324
324
  ```
325
325
 
326
+ #### `changeModels`
327
+
328
+ Change per-agent model picks **without** clearing history. `models` is a sparse map keyed by agent identifier (`parent`, `visualDesignExpert`, …); omit it (or send `{}`) to reset every agent to server defaults. Takes effect on the next turn. Rejected with `completed(success:false, error:"cannot change models while a turn is running")` if a turn is in flight — cancel first, then retry. Responds with `models_changed`. To start a fresh conversation on a different model, send `clear` then `changeModels`.
329
+
330
+ ```json
331
+ {"action": "changeModels", "requestId": "r5", "models": {"parent": "gemini-3.1-pro"}}
332
+ ```
333
+
326
334
  ### Output Events (stdout)
327
335
 
328
336
  Events are emitted as newline-delimited JSON. Command responses include `requestId`; system events do not.
@@ -351,6 +359,7 @@ All command responses include the `requestId` from the originating command.
351
359
  | `error` | `error` | Error message (may precede `completed`) |
352
360
  | `history` | `messages` | Response to `get_history` |
353
361
  | `session_cleared` | | Response to `clear` |
362
+ | `models_changed` | `models?`, `modelSurfaces`, `allowedModelsByType` | Response to `changeModels` |
354
363
  | `completed` | `success`, `error?` | Terminal event — exactly one per command |
355
364
 
356
365
  #### Example Session
@@ -131,12 +131,11 @@ declare class HeadlessSession {
131
131
  */
132
132
  private kickDrain;
133
133
  private handleClear;
134
- /** Archive the current session and seed a fresh one with the given
135
- * per-agent model overrides. Models are immutable for the life of a
136
- * session this is the only way to change them. Omitting `models`
137
- * (or sending an empty object) resets to "use server defaults for
138
- * every agent". */
139
- private handleNewSession;
134
+ /** Change per-agent model picks without clearing history. Takes effect on
135
+ * the next turn the model is resolved live, per LLM call, from
136
+ * `state.models`. Omitting `models` (or sending an empty object) resets
137
+ * every agent to "use server defaults". */
138
+ private handleChangeModels;
140
139
  /** Cancel the running turn and drain the queue. Returns the drained items. */
141
140
  private handleCancel;
142
141
  private handleStdinLine;
package/dist/headless.js CHANGED
@@ -2940,15 +2940,6 @@ ${content}` : attachmentHeader;
2940
2940
  const blocks = msg.content;
2941
2941
  const text = blocks.filter((b) => b.type === "text").map((b) => b.text).join("");
2942
2942
  const toolCalls = blocks.filter((b) => b.type === "tool").map((b) => ({ id: b.id, name: b.name, input: b.input }));
2943
- const thinking = blocks.filter(
2944
- (b) => b.type === "thinking" || b.type === "redacted_thinking"
2945
- ).map(
2946
- (b) => b.type === "thinking" ? {
2947
- type: "thinking",
2948
- thinking: b.thinking,
2949
- signature: b.signature
2950
- } : { type: "redacted_thinking", data: b.data }
2951
- );
2952
2943
  const cleaned2 = {
2953
2944
  role: msg.role,
2954
2945
  content: text
@@ -2956,9 +2947,6 @@ ${content}` : attachmentHeader;
2956
2947
  if (toolCalls.length > 0) {
2957
2948
  cleaned2.toolCalls = toolCalls;
2958
2949
  }
2959
- if (thinking.length > 0) {
2960
- cleaned2.thinking = thinking;
2961
- }
2962
2950
  if (msg.providerMetadata) {
2963
2951
  cleaned2.providerMetadata = msg.providerMetadata;
2964
2952
  }
@@ -3430,7 +3418,7 @@ var BROWSER_TOOLS = [
3430
3418
  {
3431
3419
  clearable: true,
3432
3420
  name: "browserCommand",
3433
- description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recordingUrl` \u2014 an rrweb session recording for visual replay. Timeout: 120s.",
3421
+ description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recording` object \u2014 one chunk of a continuous per-session rrweb recording that the viewer stitches into a single replay (not a standalone per-call clip). Timeout: 120s.",
3434
3422
  inputSchema: {
3435
3423
  type: "object",
3436
3424
  properties: {
@@ -4292,7 +4280,14 @@ async function generateImageAssets(opts) {
4292
4280
  const height = opts.height || 2048;
4293
4281
  const config = { width, height };
4294
4282
  if (sourceImages?.length) {
4283
+ const [firstImage] = sourceImages;
4295
4284
  config.images = sourceImages;
4285
+ config.source_images = sourceImages;
4286
+ config.image_ref = sourceImages;
4287
+ config.image = firstImage;
4288
+ config.image_url = firstImage;
4289
+ config.source_image = firstImage;
4290
+ config.source = firstImage;
4296
4291
  }
4297
4292
  const isEdit = !!sourceImages?.length;
4298
4293
  const enhancedPrompts = isEdit ? prompts : await Promise.all(
@@ -7867,13 +7862,11 @@ var HeadlessSession = class {
7867
7862
  clearSession(this.state);
7868
7863
  return {};
7869
7864
  }
7870
- /** Archive the current session and seed a fresh one with the given
7871
- * per-agent model overrides. Models are immutable for the life of a
7872
- * session this is the only way to change them. Omitting `models`
7873
- * (or sending an empty object) resets to "use server defaults for
7874
- * every agent". */
7875
- handleNewSession(models) {
7876
- clearSession(this.state);
7865
+ /** Change per-agent model picks without clearing history. Takes effect on
7866
+ * the next turn the model is resolved live, per LLM call, from
7867
+ * `state.models`. Omitting `models` (or sending an empty object) resets
7868
+ * every agent to "use server defaults". */
7869
+ handleChangeModels(models) {
7877
7870
  this.state.models = models && Object.keys(models).length > 0 ? models : void 0;
7878
7871
  saveSession(this.state);
7879
7872
  return {
@@ -7970,12 +7963,23 @@ var HeadlessSession = class {
7970
7963
  );
7971
7964
  return;
7972
7965
  }
7973
- if (action === "newSession") {
7966
+ if (action === "changeModels") {
7967
+ if (this.running) {
7968
+ this.emit(
7969
+ "completed",
7970
+ {
7971
+ success: false,
7972
+ error: "cannot change models while a turn is running"
7973
+ },
7974
+ requestId
7975
+ );
7976
+ return;
7977
+ }
7974
7978
  const models = parsed.models;
7975
7979
  this.dispatchSimple(
7976
7980
  requestId,
7977
- "session_cleared",
7978
- () => this.handleNewSession(models)
7981
+ "models_changed",
7982
+ () => this.handleChangeModels(models)
7979
7983
  );
7980
7984
  return;
7981
7985
  }
package/dist/index.js CHANGED
@@ -3667,15 +3667,6 @@ ${content}` : attachmentHeader;
3667
3667
  const blocks = msg.content;
3668
3668
  const text = blocks.filter((b) => b.type === "text").map((b) => b.text).join("");
3669
3669
  const toolCalls = blocks.filter((b) => b.type === "tool").map((b) => ({ id: b.id, name: b.name, input: b.input }));
3670
- const thinking = blocks.filter(
3671
- (b) => b.type === "thinking" || b.type === "redacted_thinking"
3672
- ).map(
3673
- (b) => b.type === "thinking" ? {
3674
- type: "thinking",
3675
- thinking: b.thinking,
3676
- signature: b.signature
3677
- } : { type: "redacted_thinking", data: b.data }
3678
- );
3679
3670
  const cleaned2 = {
3680
3671
  role: msg.role,
3681
3672
  content: text
@@ -3683,9 +3674,6 @@ ${content}` : attachmentHeader;
3683
3674
  if (toolCalls.length > 0) {
3684
3675
  cleaned2.toolCalls = toolCalls;
3685
3676
  }
3686
- if (thinking.length > 0) {
3687
- cleaned2.thinking = thinking;
3688
- }
3689
3677
  if (msg.providerMetadata) {
3690
3678
  cleaned2.providerMetadata = msg.providerMetadata;
3691
3679
  }
@@ -4179,7 +4167,7 @@ var init_tools = __esm({
4179
4167
  {
4180
4168
  clearable: true,
4181
4169
  name: "browserCommand",
4182
- description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recordingUrl` \u2014 an rrweb session recording for visual replay. Timeout: 120s.",
4170
+ description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recording` object \u2014 one chunk of a continuous per-session rrweb recording that the viewer stitches into a single replay (not a standalone per-call clip). Timeout: 120s.",
4183
4171
  inputSchema: {
4184
4172
  type: "object",
4185
4173
  properties: {
@@ -5027,7 +5015,14 @@ async function generateImageAssets(opts) {
5027
5015
  const height = opts.height || 2048;
5028
5016
  const config = { width, height };
5029
5017
  if (sourceImages?.length) {
5018
+ const [firstImage] = sourceImages;
5030
5019
  config.images = sourceImages;
5020
+ config.source_images = sourceImages;
5021
+ config.image_ref = sourceImages;
5022
+ config.image = firstImage;
5023
+ config.image_url = firstImage;
5024
+ config.source_image = firstImage;
5025
+ config.source = firstImage;
5031
5026
  }
5032
5027
  const isEdit = !!sourceImages?.length;
5033
5028
  const enhancedPrompts = isEdit ? prompts : await Promise.all(
@@ -8659,13 +8654,11 @@ var init_headless = __esm({
8659
8654
  clearSession(this.state);
8660
8655
  return {};
8661
8656
  }
8662
- /** Archive the current session and seed a fresh one with the given
8663
- * per-agent model overrides. Models are immutable for the life of a
8664
- * session this is the only way to change them. Omitting `models`
8665
- * (or sending an empty object) resets to "use server defaults for
8666
- * every agent". */
8667
- handleNewSession(models) {
8668
- clearSession(this.state);
8657
+ /** Change per-agent model picks without clearing history. Takes effect on
8658
+ * the next turn the model is resolved live, per LLM call, from
8659
+ * `state.models`. Omitting `models` (or sending an empty object) resets
8660
+ * every agent to "use server defaults". */
8661
+ handleChangeModels(models) {
8669
8662
  this.state.models = models && Object.keys(models).length > 0 ? models : void 0;
8670
8663
  saveSession(this.state);
8671
8664
  return {
@@ -8762,12 +8755,23 @@ var init_headless = __esm({
8762
8755
  );
8763
8756
  return;
8764
8757
  }
8765
- if (action === "newSession") {
8758
+ if (action === "changeModels") {
8759
+ if (this.running) {
8760
+ this.emit(
8761
+ "completed",
8762
+ {
8763
+ success: false,
8764
+ error: "cannot change models while a turn is running"
8765
+ },
8766
+ requestId
8767
+ );
8768
+ return;
8769
+ }
8766
8770
  const models = parsed.models;
8767
8771
  this.dispatchSimple(
8768
8772
  requestId,
8769
- "session_cleared",
8770
- () => this.handleNewSession(models)
8773
+ "models_changed",
8774
+ () => this.handleChangeModels(models)
8771
8775
  );
8772
8776
  return;
8773
8777
  }
@@ -62,7 +62,7 @@ Each browserCommand returns:
62
62
  - `snapshot`: the final page state after all steps complete (always present, even without an explicit snapshot step)
63
63
  - `logs`: array of browser-side events that fired during the batch (console output, network failures, JS errors, user interactions). Check this for errors before reporting pass.
64
64
  - `duration`: total execution time in ms
65
- - `recordingUrl` (optional): URL to an rrweb session recording of the tool call. Present whenever the batch contained an interactive step (click, type, select). Include it in your failure reports so the main agent can share it — it's the fastest way to reproduce a bug visually.
65
+ - `recording` (optional): metadata for an rrweb session recording, present whenever the batch contained an interactive step (click, type, select). Each call returns one chunk of a continuous per-session recording (the viewer stitches chunks by `sessionId`/`seq` into a single replay) — it's not a standalone clip. Note in your failure reports that a recording is available so the main agent can surface it.
66
66
 
67
67
  On error, the failing step has an `error` field and execution stops. Remaining steps are skipped.
68
68
 
@@ -15,12 +15,19 @@ Examples of good density:
15
15
  These are non-negotiable. Violating them produces bad output.
16
16
 
17
17
  - **No hex codes.** The model renders hex codes as visible text in the image. Describe colors by name and relationship: "deep emerald green with a smooth satin finish" or "warm sand beige fading into pale desaturated blue" — never "#7C3AED".
18
- - **No quoted strings.** Any single or double quoted string gets rendered as literal text in the image.
19
18
  - **No physical object framing.** Words like "artwork", "painting", "canvas", "print", "app icon", "square digital artwork" produce photorealistic mockups of a painting in a frame or an icon inset on a background. Describe the visual content directly.
20
- - **No text triggers.** Words like "poster", "magazine cover", "editorial spread", "sign", or brand names risk rendering literal text, mastheads, or mockup layouts. If you want an editorial photography *style*, describe the photographic qualities — not the format.
21
19
  - **Describe what you want, not what you don't want.** Negation doesn't work — "street with no cars" activates "cars." Say "empty street" instead.
22
20
  - **No body part positioning.** Don't describe specific arrangements of arms, legs, or limbs.
23
- - **No brand names.** Things like "Apple style" or "Nintendo style" will generate literal logos in the output.
21
+ - **No other brands as a style shortcut.** Don't borrow another company's identity as shorthand — "Apple style", "Nintendo style" it renders that company's literal logo. (A brand's *own* name or wordmark on its *own* asset is intended text, not this — see Text & wordmarks below.)
22
+
23
+ ## Text & wordmarks
24
+
25
+ The model renders text well — but only the text you tell it to, so quotation marks mean "render this literally." Use them deliberately.
26
+
27
+ - **Reproduce intended text exactly.** When the brief names a wordmark, brand name, label, sign, headline, or UI copy, carry the exact string through in quotes — e.g. a wordmark reading "Solid Credit". Never drop it, paraphrase it, or genericize it to "a wordmark": the literal text is usually the whole point of the asset, and if you omit it the model fills the space with an invented placeholder.
28
+ - **Direct the typography and placement.** Specify weight, case, color, and position so the text lands where the designer wants it — "a near-black grotesque sans-serif wordmark reading 'Solid Credit', centered directly below the mark."
29
+ - **Keep it short.** Wordmarks, labels, and short taglines render reliably; full sentences and paragraphs degrade into garbled glyphs. Trim long copy to the few words that matter, or leave it out.
30
+ - **Don't summon text you don't want.** When the image should have no text, don't quote stray descriptive phrases, and avoid format words that imply copy — "poster", "magazine cover", "sign", "billboard" — which can produce spurious text or mastheads. For an editorial *style*, describe the photographic qualities, not the format.
24
31
 
25
32
  ## Composition
26
33
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mindstudio-ai/remy",
3
- "version": "0.1.196",
3
+ "version": "0.1.198",
4
4
  "description": "MindStudio coding agent",
5
5
  "repository": {
6
6
  "type": "git",