@mindstudio-ai/remy 0.1.196 → 0.1.198
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -323,6 +323,14 @@ Clear conversation history and delete the session file.
|
|
|
323
323
|
{"action": "clear", "requestId": "r4"}
|
|
324
324
|
```
|
|
325
325
|
|
|
326
|
+
#### `changeModels`
|
|
327
|
+
|
|
328
|
+
Change per-agent model picks **without** clearing history. `models` is a sparse map keyed by agent identifier (`parent`, `visualDesignExpert`, …); omit it (or send `{}`) to reset every agent to server defaults. Takes effect on the next turn. Rejected with `completed(success:false, error:"cannot change models while a turn is running")` if a turn is in flight — cancel first, then retry. Responds with `models_changed`. To start a fresh conversation on a different model, send `clear` then `changeModels`.
|
|
329
|
+
|
|
330
|
+
```json
|
|
331
|
+
{"action": "changeModels", "requestId": "r5", "models": {"parent": "gemini-3.1-pro"}}
|
|
332
|
+
```
|
|
333
|
+
|
|
326
334
|
### Output Events (stdout)
|
|
327
335
|
|
|
328
336
|
Events are emitted as newline-delimited JSON. Command responses include `requestId`; system events do not.
|
|
@@ -351,6 +359,7 @@ All command responses include the `requestId` from the originating command.
|
|
|
351
359
|
| `error` | `error` | Error message (may precede `completed`) |
|
|
352
360
|
| `history` | `messages` | Response to `get_history` |
|
|
353
361
|
| `session_cleared` | | Response to `clear` |
|
|
362
|
+
| `models_changed` | `models?`, `modelSurfaces`, `allowedModelsByType` | Response to `changeModels` |
|
|
354
363
|
| `completed` | `success`, `error?` | Terminal event — exactly one per command |
|
|
355
364
|
|
|
356
365
|
#### Example Session
|
package/dist/headless.d.ts
CHANGED
|
@@ -131,12 +131,11 @@ declare class HeadlessSession {
|
|
|
131
131
|
*/
|
|
132
132
|
private kickDrain;
|
|
133
133
|
private handleClear;
|
|
134
|
-
/**
|
|
135
|
-
*
|
|
136
|
-
*
|
|
137
|
-
*
|
|
138
|
-
|
|
139
|
-
private handleNewSession;
|
|
134
|
+
/** Change per-agent model picks without clearing history. Takes effect on
|
|
135
|
+
* the next turn — the model is resolved live, per LLM call, from
|
|
136
|
+
* `state.models`. Omitting `models` (or sending an empty object) resets
|
|
137
|
+
* every agent to "use server defaults". */
|
|
138
|
+
private handleChangeModels;
|
|
140
139
|
/** Cancel the running turn and drain the queue. Returns the drained items. */
|
|
141
140
|
private handleCancel;
|
|
142
141
|
private handleStdinLine;
|
package/dist/headless.js
CHANGED
|
@@ -2940,15 +2940,6 @@ ${content}` : attachmentHeader;
|
|
|
2940
2940
|
const blocks = msg.content;
|
|
2941
2941
|
const text = blocks.filter((b) => b.type === "text").map((b) => b.text).join("");
|
|
2942
2942
|
const toolCalls = blocks.filter((b) => b.type === "tool").map((b) => ({ id: b.id, name: b.name, input: b.input }));
|
|
2943
|
-
const thinking = blocks.filter(
|
|
2944
|
-
(b) => b.type === "thinking" || b.type === "redacted_thinking"
|
|
2945
|
-
).map(
|
|
2946
|
-
(b) => b.type === "thinking" ? {
|
|
2947
|
-
type: "thinking",
|
|
2948
|
-
thinking: b.thinking,
|
|
2949
|
-
signature: b.signature
|
|
2950
|
-
} : { type: "redacted_thinking", data: b.data }
|
|
2951
|
-
);
|
|
2952
2943
|
const cleaned2 = {
|
|
2953
2944
|
role: msg.role,
|
|
2954
2945
|
content: text
|
|
@@ -2956,9 +2947,6 @@ ${content}` : attachmentHeader;
|
|
|
2956
2947
|
if (toolCalls.length > 0) {
|
|
2957
2948
|
cleaned2.toolCalls = toolCalls;
|
|
2958
2949
|
}
|
|
2959
|
-
if (thinking.length > 0) {
|
|
2960
|
-
cleaned2.thinking = thinking;
|
|
2961
|
-
}
|
|
2962
2950
|
if (msg.providerMetadata) {
|
|
2963
2951
|
cleaned2.providerMetadata = msg.providerMetadata;
|
|
2964
2952
|
}
|
|
@@ -3430,7 +3418,7 @@ var BROWSER_TOOLS = [
|
|
|
3430
3418
|
{
|
|
3431
3419
|
clearable: true,
|
|
3432
3420
|
name: "browserCommand",
|
|
3433
|
-
description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `
|
|
3421
|
+
description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recording` object \u2014 one chunk of a continuous per-session rrweb recording that the viewer stitches into a single replay (not a standalone per-call clip). Timeout: 120s.",
|
|
3434
3422
|
inputSchema: {
|
|
3435
3423
|
type: "object",
|
|
3436
3424
|
properties: {
|
|
@@ -4292,7 +4280,14 @@ async function generateImageAssets(opts) {
|
|
|
4292
4280
|
const height = opts.height || 2048;
|
|
4293
4281
|
const config = { width, height };
|
|
4294
4282
|
if (sourceImages?.length) {
|
|
4283
|
+
const [firstImage] = sourceImages;
|
|
4295
4284
|
config.images = sourceImages;
|
|
4285
|
+
config.source_images = sourceImages;
|
|
4286
|
+
config.image_ref = sourceImages;
|
|
4287
|
+
config.image = firstImage;
|
|
4288
|
+
config.image_url = firstImage;
|
|
4289
|
+
config.source_image = firstImage;
|
|
4290
|
+
config.source = firstImage;
|
|
4296
4291
|
}
|
|
4297
4292
|
const isEdit = !!sourceImages?.length;
|
|
4298
4293
|
const enhancedPrompts = isEdit ? prompts : await Promise.all(
|
|
@@ -7867,13 +7862,11 @@ var HeadlessSession = class {
|
|
|
7867
7862
|
clearSession(this.state);
|
|
7868
7863
|
return {};
|
|
7869
7864
|
}
|
|
7870
|
-
/**
|
|
7871
|
-
*
|
|
7872
|
-
*
|
|
7873
|
-
*
|
|
7874
|
-
|
|
7875
|
-
handleNewSession(models) {
|
|
7876
|
-
clearSession(this.state);
|
|
7865
|
+
/** Change per-agent model picks without clearing history. Takes effect on
|
|
7866
|
+
* the next turn — the model is resolved live, per LLM call, from
|
|
7867
|
+
* `state.models`. Omitting `models` (or sending an empty object) resets
|
|
7868
|
+
* every agent to "use server defaults". */
|
|
7869
|
+
handleChangeModels(models) {
|
|
7877
7870
|
this.state.models = models && Object.keys(models).length > 0 ? models : void 0;
|
|
7878
7871
|
saveSession(this.state);
|
|
7879
7872
|
return {
|
|
@@ -7970,12 +7963,23 @@ var HeadlessSession = class {
|
|
|
7970
7963
|
);
|
|
7971
7964
|
return;
|
|
7972
7965
|
}
|
|
7973
|
-
if (action === "
|
|
7966
|
+
if (action === "changeModels") {
|
|
7967
|
+
if (this.running) {
|
|
7968
|
+
this.emit(
|
|
7969
|
+
"completed",
|
|
7970
|
+
{
|
|
7971
|
+
success: false,
|
|
7972
|
+
error: "cannot change models while a turn is running"
|
|
7973
|
+
},
|
|
7974
|
+
requestId
|
|
7975
|
+
);
|
|
7976
|
+
return;
|
|
7977
|
+
}
|
|
7974
7978
|
const models = parsed.models;
|
|
7975
7979
|
this.dispatchSimple(
|
|
7976
7980
|
requestId,
|
|
7977
|
-
"
|
|
7978
|
-
() => this.
|
|
7981
|
+
"models_changed",
|
|
7982
|
+
() => this.handleChangeModels(models)
|
|
7979
7983
|
);
|
|
7980
7984
|
return;
|
|
7981
7985
|
}
|
package/dist/index.js
CHANGED
|
@@ -3667,15 +3667,6 @@ ${content}` : attachmentHeader;
|
|
|
3667
3667
|
const blocks = msg.content;
|
|
3668
3668
|
const text = blocks.filter((b) => b.type === "text").map((b) => b.text).join("");
|
|
3669
3669
|
const toolCalls = blocks.filter((b) => b.type === "tool").map((b) => ({ id: b.id, name: b.name, input: b.input }));
|
|
3670
|
-
const thinking = blocks.filter(
|
|
3671
|
-
(b) => b.type === "thinking" || b.type === "redacted_thinking"
|
|
3672
|
-
).map(
|
|
3673
|
-
(b) => b.type === "thinking" ? {
|
|
3674
|
-
type: "thinking",
|
|
3675
|
-
thinking: b.thinking,
|
|
3676
|
-
signature: b.signature
|
|
3677
|
-
} : { type: "redacted_thinking", data: b.data }
|
|
3678
|
-
);
|
|
3679
3670
|
const cleaned2 = {
|
|
3680
3671
|
role: msg.role,
|
|
3681
3672
|
content: text
|
|
@@ -3683,9 +3674,6 @@ ${content}` : attachmentHeader;
|
|
|
3683
3674
|
if (toolCalls.length > 0) {
|
|
3684
3675
|
cleaned2.toolCalls = toolCalls;
|
|
3685
3676
|
}
|
|
3686
|
-
if (thinking.length > 0) {
|
|
3687
|
-
cleaned2.thinking = thinking;
|
|
3688
|
-
}
|
|
3689
3677
|
if (msg.providerMetadata) {
|
|
3690
3678
|
cleaned2.providerMetadata = msg.providerMetadata;
|
|
3691
3679
|
}
|
|
@@ -4179,7 +4167,7 @@ var init_tools = __esm({
|
|
|
4179
4167
|
{
|
|
4180
4168
|
clearable: true,
|
|
4181
4169
|
name: "browserCommand",
|
|
4182
|
-
description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `
|
|
4170
|
+
description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recording` object \u2014 one chunk of a continuous per-session rrweb recording that the viewer stitches into a single replay (not a standalone per-call clip). Timeout: 120s.",
|
|
4183
4171
|
inputSchema: {
|
|
4184
4172
|
type: "object",
|
|
4185
4173
|
properties: {
|
|
@@ -5027,7 +5015,14 @@ async function generateImageAssets(opts) {
|
|
|
5027
5015
|
const height = opts.height || 2048;
|
|
5028
5016
|
const config = { width, height };
|
|
5029
5017
|
if (sourceImages?.length) {
|
|
5018
|
+
const [firstImage] = sourceImages;
|
|
5030
5019
|
config.images = sourceImages;
|
|
5020
|
+
config.source_images = sourceImages;
|
|
5021
|
+
config.image_ref = sourceImages;
|
|
5022
|
+
config.image = firstImage;
|
|
5023
|
+
config.image_url = firstImage;
|
|
5024
|
+
config.source_image = firstImage;
|
|
5025
|
+
config.source = firstImage;
|
|
5031
5026
|
}
|
|
5032
5027
|
const isEdit = !!sourceImages?.length;
|
|
5033
5028
|
const enhancedPrompts = isEdit ? prompts : await Promise.all(
|
|
@@ -8659,13 +8654,11 @@ var init_headless = __esm({
|
|
|
8659
8654
|
clearSession(this.state);
|
|
8660
8655
|
return {};
|
|
8661
8656
|
}
|
|
8662
|
-
/**
|
|
8663
|
-
*
|
|
8664
|
-
*
|
|
8665
|
-
*
|
|
8666
|
-
|
|
8667
|
-
handleNewSession(models) {
|
|
8668
|
-
clearSession(this.state);
|
|
8657
|
+
/** Change per-agent model picks without clearing history. Takes effect on
|
|
8658
|
+
* the next turn — the model is resolved live, per LLM call, from
|
|
8659
|
+
* `state.models`. Omitting `models` (or sending an empty object) resets
|
|
8660
|
+
* every agent to "use server defaults". */
|
|
8661
|
+
handleChangeModels(models) {
|
|
8669
8662
|
this.state.models = models && Object.keys(models).length > 0 ? models : void 0;
|
|
8670
8663
|
saveSession(this.state);
|
|
8671
8664
|
return {
|
|
@@ -8762,12 +8755,23 @@ var init_headless = __esm({
|
|
|
8762
8755
|
);
|
|
8763
8756
|
return;
|
|
8764
8757
|
}
|
|
8765
|
-
if (action === "
|
|
8758
|
+
if (action === "changeModels") {
|
|
8759
|
+
if (this.running) {
|
|
8760
|
+
this.emit(
|
|
8761
|
+
"completed",
|
|
8762
|
+
{
|
|
8763
|
+
success: false,
|
|
8764
|
+
error: "cannot change models while a turn is running"
|
|
8765
|
+
},
|
|
8766
|
+
requestId
|
|
8767
|
+
);
|
|
8768
|
+
return;
|
|
8769
|
+
}
|
|
8766
8770
|
const models = parsed.models;
|
|
8767
8771
|
this.dispatchSimple(
|
|
8768
8772
|
requestId,
|
|
8769
|
-
"
|
|
8770
|
-
() => this.
|
|
8773
|
+
"models_changed",
|
|
8774
|
+
() => this.handleChangeModels(models)
|
|
8771
8775
|
);
|
|
8772
8776
|
return;
|
|
8773
8777
|
}
|
|
@@ -62,7 +62,7 @@ Each browserCommand returns:
|
|
|
62
62
|
- `snapshot`: the final page state after all steps complete (always present, even without an explicit snapshot step)
|
|
63
63
|
- `logs`: array of browser-side events that fired during the batch (console output, network failures, JS errors, user interactions). Check this for errors before reporting pass.
|
|
64
64
|
- `duration`: total execution time in ms
|
|
65
|
-
- `
|
|
65
|
+
- `recording` (optional): metadata for an rrweb session recording, present whenever the batch contained an interactive step (click, type, select). Each call returns one chunk of a continuous per-session recording (the viewer stitches chunks by `sessionId`/`seq` into a single replay) — it's not a standalone clip. Note in your failure reports that a recording is available so the main agent can surface it.
|
|
66
66
|
|
|
67
67
|
On error, the failing step has an `error` field and execution stops. Remaining steps are skipped.
|
|
68
68
|
|
|
@@ -15,12 +15,19 @@ Examples of good density:
|
|
|
15
15
|
These are non-negotiable. Violating them produces bad output.
|
|
16
16
|
|
|
17
17
|
- **No hex codes.** The model renders hex codes as visible text in the image. Describe colors by name and relationship: "deep emerald green with a smooth satin finish" or "warm sand beige fading into pale desaturated blue" — never "#7C3AED".
|
|
18
|
-
- **No quoted strings.** Any single or double quoted string gets rendered as literal text in the image.
|
|
19
18
|
- **No physical object framing.** Words like "artwork", "painting", "canvas", "print", "app icon", "square digital artwork" produce photorealistic mockups of a painting in a frame or an icon inset on a background. Describe the visual content directly.
|
|
20
|
-
- **No text triggers.** Words like "poster", "magazine cover", "editorial spread", "sign", or brand names risk rendering literal text, mastheads, or mockup layouts. If you want an editorial photography *style*, describe the photographic qualities — not the format.
|
|
21
19
|
- **Describe what you want, not what you don't want.** Negation doesn't work — "street with no cars" activates "cars." Say "empty street" instead.
|
|
22
20
|
- **No body part positioning.** Don't describe specific arrangements of arms, legs, or limbs.
|
|
23
|
-
- **No
|
|
21
|
+
- **No other brands as a style shortcut.** Don't borrow another company's identity as shorthand — "Apple style", "Nintendo style" — it renders that company's literal logo. (A brand's *own* name or wordmark on its *own* asset is intended text, not this — see Text & wordmarks below.)
|
|
22
|
+
|
|
23
|
+
## Text & wordmarks
|
|
24
|
+
|
|
25
|
+
The model renders text well — but only the text you tell it to, so quotation marks mean "render this literally." Use them deliberately.
|
|
26
|
+
|
|
27
|
+
- **Reproduce intended text exactly.** When the brief names a wordmark, brand name, label, sign, headline, or UI copy, carry the exact string through in quotes — e.g. a wordmark reading "Solid Credit". Never drop it, paraphrase it, or genericize it to "a wordmark": the literal text is usually the whole point of the asset, and if you omit it the model fills the space with an invented placeholder.
|
|
28
|
+
- **Direct the typography and placement.** Specify weight, case, color, and position so the text lands where the designer wants it — "a near-black grotesque sans-serif wordmark reading 'Solid Credit', centered directly below the mark."
|
|
29
|
+
- **Keep it short.** Wordmarks, labels, and short taglines render reliably; full sentences and paragraphs degrade into garbled glyphs. Trim long copy to the few words that matter, or leave it out.
|
|
30
|
+
- **Don't summon text you don't want.** When the image should have no text, don't quote stray descriptive phrases, and avoid format words that imply copy — "poster", "magazine cover", "sign", "billboard" — which can produce spurious text or mastheads. For an editorial *style*, describe the photographic qualities, not the format.
|
|
24
31
|
|
|
25
32
|
## Composition
|
|
26
33
|
|