@mindstudio-ai/remy 0.1.135 → 0.1.137

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ ---
2
+ trigger: debugRequest
3
+ ---
4
+
5
+ This is an automated message triggered by the user having clicked "Debug" in the "Request" log detail in MindStudio UI. Find the request in .logs/requests.ndjson by its ID. If there is an error, fix it immediately - otherwise, explain the request at a high-level in non-technical/natural language and see what the user wishes to do with it. Remember, the user can't see this message, so keep that in mind when responding.
6
+
7
+ <request_id>
8
+ {{requestId}}
9
+ </request_id>
package/dist/headless.js CHANGED
@@ -165,7 +165,12 @@ async function sidecarRequest(endpoint, body = {}, options) {
165
165
  log2.error("Sidecar error", { endpoint, status: res.status });
166
166
  throw new Error(`Sidecar error: ${res.status}`);
167
167
  }
168
- return res.json();
168
+ const data = await res.json();
169
+ if (data?.success === false) {
170
+ const code = data.errorCode ? ` [${data.errorCode}]` : "";
171
+ throw new Error(`${data.error || "Unknown error"}${code}`);
172
+ }
173
+ return data;
169
174
  } catch (err) {
170
175
  if (err.message.startsWith("Sidecar error")) {
171
176
  throw err;
@@ -2602,6 +2607,39 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
2602
2607
  return JSON.stringify({ url, analysis, ...styleMap ? { styleMap } : {} });
2603
2608
  }
2604
2609
 
2610
+ // src/tools/_helpers/browserLock.ts
2611
+ var lockQueue = Promise.resolve();
2612
+ function acquireBrowserLock() {
2613
+ let release;
2614
+ const next = new Promise((res) => {
2615
+ release = res;
2616
+ });
2617
+ const wait = lockQueue;
2618
+ lockQueue = next;
2619
+ return wait.then(() => release);
2620
+ }
2621
+ async function checkBrowserConnected() {
2622
+ try {
2623
+ const status = await sidecarRequest(
2624
+ "/browser-status",
2625
+ {},
2626
+ { timeout: 5e3 }
2627
+ );
2628
+ if (!status.connected) {
2629
+ return {
2630
+ connected: false,
2631
+ error: "The browser preview is not connected. The user needs to open the preview."
2632
+ };
2633
+ }
2634
+ return { connected: true };
2635
+ } catch (err) {
2636
+ return {
2637
+ connected: false,
2638
+ error: err?.message || "Could not check browser status. The dev environment may not be running."
2639
+ };
2640
+ }
2641
+ }
2642
+
2605
2643
  // src/statusWatcher.ts
2606
2644
  function startStatusWatcher(config) {
2607
2645
  const { apiConfig, getContext, onStatus, interval = 3e3, signal } = config;
@@ -3303,16 +3341,6 @@ ${appSpec}
3303
3341
 
3304
3342
  // src/subagents/browserAutomation/index.ts
3305
3343
  var log6 = createLogger("browser-automation");
3306
- var lockQueue = Promise.resolve();
3307
- function acquireBrowserLock() {
3308
- let release;
3309
- const next = new Promise((res) => {
3310
- release = res;
3311
- });
3312
- const wait = lockQueue;
3313
- lockQueue = next;
3314
- return wait.then(() => release);
3315
- }
3316
3344
  var browserAutomationTool = {
3317
3345
  clearable: true,
3318
3346
  definition: {
@@ -3335,17 +3363,9 @@ var browserAutomationTool = {
3335
3363
  }
3336
3364
  const release = await acquireBrowserLock();
3337
3365
  try {
3338
- try {
3339
- const status = await sidecarRequest(
3340
- "/browser-status",
3341
- {},
3342
- { timeout: 5e3 }
3343
- );
3344
- if (!status.connected) {
3345
- return "Error: the browser preview is not connected. The user needs to open the preview before browser tests can run.";
3346
- }
3347
- } catch {
3348
- return "Error: could not check browser status. The dev environment may not be running.";
3366
+ const browserStatus = await checkBrowserConnected();
3367
+ if (!browserStatus.connected) {
3368
+ return `Error: ${browserStatus.error}`;
3349
3369
  }
3350
3370
  try {
3351
3371
  await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
@@ -3519,11 +3539,20 @@ var screenshotTool = {
3519
3539
  ...styleMap ? { styleMap } : {}
3520
3540
  });
3521
3541
  }
3522
- return await captureAndAnalyzeScreenshot({
3523
- prompt: input.prompt,
3524
- path: input.path,
3525
- onLog: context?.onLog
3526
- });
3542
+ const release = await acquireBrowserLock();
3543
+ try {
3544
+ const browserStatus = await checkBrowserConnected();
3545
+ if (!browserStatus.connected) {
3546
+ return `Error: ${browserStatus.error}`;
3547
+ }
3548
+ return await captureAndAnalyzeScreenshot({
3549
+ prompt: input.prompt,
3550
+ path: input.path,
3551
+ onLog: context?.onLog
3552
+ });
3553
+ } finally {
3554
+ release();
3555
+ }
3527
3556
  } catch (err) {
3528
3557
  return `Error taking screenshot: ${err.message}`;
3529
3558
  }
@@ -3849,7 +3878,12 @@ async function execute5(input, onLog, context) {
3849
3878
  return `Error taking interactive screenshot: ${err.message}`;
3850
3879
  }
3851
3880
  }
3881
+ const release = await acquireBrowserLock();
3852
3882
  try {
3883
+ const browserStatus = await checkBrowserConnected();
3884
+ if (!browserStatus.connected) {
3885
+ return `Error: ${browserStatus.error}`;
3886
+ }
3853
3887
  return await captureAndAnalyzeScreenshot({
3854
3888
  prompt: input.prompt,
3855
3889
  path: input.path,
@@ -3857,6 +3891,8 @@ async function execute5(input, onLog, context) {
3857
3891
  });
3858
3892
  } catch (err) {
3859
3893
  return `Error taking screenshot: ${err.message}`;
3894
+ } finally {
3895
+ release();
3860
3896
  }
3861
3897
  }
3862
3898
 
package/dist/index.js CHANGED
@@ -1986,7 +1986,12 @@ async function sidecarRequest(endpoint, body = {}, options) {
1986
1986
  log2.error("Sidecar error", { endpoint, status: res.status });
1987
1987
  throw new Error(`Sidecar error: ${res.status}`);
1988
1988
  }
1989
- return res.json();
1989
+ const data = await res.json();
1990
+ if (data?.success === false) {
1991
+ const code = data.errorCode ? ` [${data.errorCode}]` : "";
1992
+ throw new Error(`${data.error || "Unknown error"}${code}`);
1993
+ }
1994
+ return data;
1990
1995
  } catch (err) {
1991
1996
  if (err.message.startsWith("Sidecar error")) {
1992
1997
  throw err;
@@ -2306,6 +2311,46 @@ Respond only with your analysis as Markdown and absolutely no other text. Do not
2306
2311
  }
2307
2312
  });
2308
2313
 
2314
+ // src/tools/_helpers/browserLock.ts
2315
+ function acquireBrowserLock() {
2316
+ let release;
2317
+ const next = new Promise((res) => {
2318
+ release = res;
2319
+ });
2320
+ const wait = lockQueue;
2321
+ lockQueue = next;
2322
+ return wait.then(() => release);
2323
+ }
2324
+ async function checkBrowserConnected() {
2325
+ try {
2326
+ const status = await sidecarRequest(
2327
+ "/browser-status",
2328
+ {},
2329
+ { timeout: 5e3 }
2330
+ );
2331
+ if (!status.connected) {
2332
+ return {
2333
+ connected: false,
2334
+ error: "The browser preview is not connected. The user needs to open the preview."
2335
+ };
2336
+ }
2337
+ return { connected: true };
2338
+ } catch (err) {
2339
+ return {
2340
+ connected: false,
2341
+ error: err?.message || "Could not check browser status. The dev environment may not be running."
2342
+ };
2343
+ }
2344
+ }
2345
+ var lockQueue;
2346
+ var init_browserLock = __esm({
2347
+ "src/tools/_helpers/browserLock.ts"() {
2348
+ "use strict";
2349
+ init_sidecar();
2350
+ lockQueue = Promise.resolve();
2351
+ }
2352
+ });
2353
+
2309
2354
  // src/statusWatcher.ts
2310
2355
  function startStatusWatcher(config) {
2311
2356
  const { apiConfig, getContext, onStatus, interval = 3e3, signal } = config;
@@ -3082,16 +3127,7 @@ var init_prompt = __esm({
3082
3127
  });
3083
3128
 
3084
3129
  // src/subagents/browserAutomation/index.ts
3085
- function acquireBrowserLock() {
3086
- let release;
3087
- const next = new Promise((res) => {
3088
- release = res;
3089
- });
3090
- const wait = lockQueue;
3091
- lockQueue = next;
3092
- return wait.then(() => release);
3093
- }
3094
- var log4, lockQueue, browserAutomationTool;
3130
+ var log4, browserAutomationTool;
3095
3131
  var init_browserAutomation = __esm({
3096
3132
  "src/subagents/browserAutomation/index.ts"() {
3097
3133
  "use strict";
@@ -3099,11 +3135,11 @@ var init_browserAutomation = __esm({
3099
3135
  init_tools();
3100
3136
  init_prompt();
3101
3137
  init_sidecar();
3138
+ init_browserLock();
3102
3139
  init_screenshot();
3103
3140
  init_runCli();
3104
3141
  init_logger();
3105
3142
  log4 = createLogger("browser-automation");
3106
- lockQueue = Promise.resolve();
3107
3143
  browserAutomationTool = {
3108
3144
  clearable: true,
3109
3145
  definition: {
@@ -3126,17 +3162,9 @@ var init_browserAutomation = __esm({
3126
3162
  }
3127
3163
  const release = await acquireBrowserLock();
3128
3164
  try {
3129
- try {
3130
- const status = await sidecarRequest(
3131
- "/browser-status",
3132
- {},
3133
- { timeout: 5e3 }
3134
- );
3135
- if (!status.connected) {
3136
- return "Error: the browser preview is not connected. The user needs to open the preview before browser tests can run.";
3137
- }
3138
- } catch {
3139
- return "Error: could not check browser status. The dev environment may not be running.";
3165
+ const browserStatus = await checkBrowserConnected();
3166
+ if (!browserStatus.connected) {
3167
+ return `Error: ${browserStatus.error}`;
3140
3168
  }
3141
3169
  try {
3142
3170
  await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
@@ -3250,6 +3278,7 @@ var init_screenshot2 = __esm({
3250
3278
  "src/tools/code/screenshot.ts"() {
3251
3279
  "use strict";
3252
3280
  init_screenshot();
3281
+ init_browserLock();
3253
3282
  init_analyzeImage();
3254
3283
  init_browserAutomation();
3255
3284
  screenshotTool = {
@@ -3319,11 +3348,20 @@ var init_screenshot2 = __esm({
3319
3348
  ...styleMap ? { styleMap } : {}
3320
3349
  });
3321
3350
  }
3322
- return await captureAndAnalyzeScreenshot({
3323
- prompt: input.prompt,
3324
- path: input.path,
3325
- onLog: context?.onLog
3326
- });
3351
+ const release = await acquireBrowserLock();
3352
+ try {
3353
+ const browserStatus = await checkBrowserConnected();
3354
+ if (!browserStatus.connected) {
3355
+ return `Error: ${browserStatus.error}`;
3356
+ }
3357
+ return await captureAndAnalyzeScreenshot({
3358
+ prompt: input.prompt,
3359
+ path: input.path,
3360
+ onLog: context?.onLog
3361
+ });
3362
+ } finally {
3363
+ release();
3364
+ }
3327
3365
  } catch (err) {
3328
3366
  return `Error taking screenshot: ${err.message}`;
3329
3367
  }
@@ -3665,7 +3703,12 @@ async function execute5(input, onLog, context) {
3665
3703
  return `Error taking interactive screenshot: ${err.message}`;
3666
3704
  }
3667
3705
  }
3706
+ const release = await acquireBrowserLock();
3668
3707
  try {
3708
+ const browserStatus = await checkBrowserConnected();
3709
+ if (!browserStatus.connected) {
3710
+ return `Error: ${browserStatus.error}`;
3711
+ }
3669
3712
  return await captureAndAnalyzeScreenshot({
3670
3713
  prompt: input.prompt,
3671
3714
  path: input.path,
@@ -3673,6 +3716,8 @@ async function execute5(input, onLog, context) {
3673
3716
  });
3674
3717
  } catch (err) {
3675
3718
  return `Error taking screenshot: ${err.message}`;
3719
+ } finally {
3720
+ release();
3676
3721
  }
3677
3722
  }
3678
3723
  var definition5;
@@ -3680,6 +3725,7 @@ var init_screenshot3 = __esm({
3680
3725
  "src/subagents/designExpert/tools/screenshot.ts"() {
3681
3726
  "use strict";
3682
3727
  init_screenshot();
3728
+ init_browserLock();
3683
3729
  init_analyzeImage();
3684
3730
  init_browserAutomation();
3685
3731
  definition5 = {
@@ -69,7 +69,7 @@ From research into v0, Lovable, Bolt, and Anthropic's `<frontend_aesthetics>` co
69
69
 
70
70
  ## SDK Usage
71
71
 
72
- The source docs originally showed `mindstudio.executeStep('generateText', ...)` which was wrong. The correct API is `new MindStudioAgent()` with direct method calls (`agent.generateText(...)`, `agent.sendEmail(...)`, etc.). No constructor args needed inside methods — credentials come from the execution environment. We fixed both the compiled fragment and the upstream source doc in youai-api.
72
+ The canonical import pattern for MindStudio app methods is `import { mindstudio } from '@mindstudio-ai/agent'` with `mindstudio.generateText(...)`, `mindstudio.runTask(...)`, etc. The `mindstudio` singleton handles auth automatically. `new MindStudioAgent({ apiKey })` is only for external usage outside MindStudio apps.
73
73
 
74
74
  The SDK ships `llms.txt` at the package root with full signatures for all 170+ actions. The compiled fragment references this path (`dist/methods/node_modules/@mindstudio-ai/agent/llms.txt`) so the agent knows where to look up specific action details.
75
75
 
@@ -343,13 +343,16 @@ Roles are declared in the manifest, stored as an array column on the user table,
343
343
 
344
344
  Apps without `auth` in the manifest use anonymous guest sessions. No login, no user identity, no roles. This is the default and works fine for single-user apps, internal tools, and simple utilities.
345
345
 
346
- ## Designing Auth in Web Interfaces
346
+ ## Important: Designing Auth in Web Interfaces
347
+
347
348
  The most imporant user experience consideration with auth is that authentication moments must feel natural and intuitive - they should not feel jarring or surprising. Take care to integrate them into the entire experience when building.
348
349
 
349
350
  For the overwhelming majority of apps, a user should never land on auth at the root of an app when opening it for the first time (except in cases where the app is, e.g., an internal tool or some other protected experience - and even then it should feel more like a welcome/splash screen than an error state). Users should be able to explore public resources, or at least encounter some kind of landing/introduction moment, before they get hit with a signup/login screen. Make auth feel like a natural moment in the user's journey.
350
351
 
351
352
  Login and signup screens set the tone for the user's entire experience with the app and are important to get right - they should feel like exciting entry points into the next level of the user journy. A janky login form with misaligned inputs and no feedback dminishes excitement and undermines trust before the user even gets in.
352
353
 
354
+ Login and signup are separate moments - even if the underlying code is the same for both. A new user signing up should feel like they are creating a new account. A user logging in should feel like they are being welcomed back. Auth is always a pain for users, even when it's as frictionless as this, so take care to use Sign Up screens as moments to communicate value and help the user get excited about what they are joining.
355
+
353
356
  Consult the `visualDesignExpert` to help you work through authentication at a high level, including when and where to show auth, and the design of specific screens.
354
357
 
355
358
  ### Rules for Building Auth Screens
@@ -25,7 +25,7 @@ Interfaces run fullscreen in the user's browser or a wrapped webview mobile app.
25
25
  - **No long scrolling pages.** Use structured layouts: cards, split panes, steppers, tabs, grouped sections that fit the viewport. The interface should feel like an award winning iOS or macOS app, not a document.
26
26
  - **On mobile**, scrolling may be necessary, but use sticky headers, fixed CTAs, and anchored navigation to keep key actions within reach. Always use "width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" to make sure apps feel like apps.
27
27
  - Think of every screen as something the user opens, uses, and closes — not something they read.
28
- - Pay attention to details that will make things feel app-like - set user-select: none on specific app-like layout elements, use motion, use iOS/macOS design language and patterns.
28
+ - Pay attention to details that will make things feel app-like - set user-select: none on specific app-like layout elements like navigation bars, use motion, use iOS/macOS design language and patterns.
29
29
 
30
30
  ## Layout Stability
31
31
 
@@ -50,6 +50,7 @@ Every interface must work on both desktop and mobile. Think about how the app wi
50
50
  - Even for mobile-first apps, make sure to set desktop or larger device breakpoints - nothing looks jankier than opening a mobile-designed site in a desktop browser and seeing a full width bottom tab bar with nav icons stretching 1000px wide. Don't make sloppy, amateur mistakes or omissions like this - the user will notice them and be disappointed.
51
51
 
52
52
  ## Images
53
+
53
54
  The `designExpert` can create and source amazing, high quality images, graphics, illustrations, and logos to use in the interface - both with and without transparency. This is a huge level for upgrading the premium look, feel, and quality of the app. Use image logos directly instead of plain text wordmarks; use images for empty states, onboarding screens, full-screen loading, and more.
54
55
 
55
56
  ## Forms
@@ -65,38 +65,36 @@ export async function getDashboard(input: {
65
65
 
66
66
  ## Platform Capabilities
67
67
 
68
- The `@mindstudio-ai/agent` SDK provides access to 200+ AI models and 1,000+ actions (email, SMS, web scraping, file uploads, third-party integrations, and more). Inside a method, create an instance and call actions directly. No constructor arguments needed — credentials are picked up automatically from the execution environment:
68
+ The `@mindstudio-ai/agent` SDK provides access to 200+ AI models and 1,000+ actions (email, SMS, web scraping, file uploads, third-party integrations, and more). Inside a method, use the `mindstudio` singleton — credentials come from the execution environment automatically:
69
69
 
70
70
  ```typescript
71
- import { MindStudioAgent } from '@mindstudio-ai/agent';
72
-
73
- const agent = new MindStudioAgent();
71
+ import { mindstudio } from '@mindstudio-ai/agent';
74
72
 
75
73
  // AI text generation
76
- const { content } = await agent.generateText({
74
+ const { content } = await mindstudio.generateText({
77
75
  message: 'Summarize this invoice...',
78
76
  });
79
77
 
80
78
  // AI image generation
81
- const { imageUrl } = await agent.generateImage({
79
+ const { imageUrl } = await mindstudio.generateImage({
82
80
  prompt: 'A professional headshot placeholder',
83
81
  });
84
82
 
85
83
  // Send email
86
- await agent.sendEmail({
84
+ await mindstudio.sendEmail({
87
85
  to: 'user@example.com',
88
86
  subject: 'Your invoice',
89
87
  body: content,
90
88
  });
91
89
 
92
90
  // Upload files
93
- const { url } = await agent.uploadFile({
91
+ const { url } = await mindstudio.uploadFile({
94
92
  data: buffer,
95
93
  fileName: 'report.pdf',
96
94
  });
97
95
 
98
96
  // Web scraping
99
- const { markdown } = await agent.scrapeUrl({
97
+ const { markdown } = await mindstudio.scrapeUrl({
100
98
  url: 'https://example.com',
101
99
  });
102
100
 
@@ -126,7 +124,7 @@ For errors from external services or internal failures (API calls, AI generation
126
124
 
127
125
  ```typescript
128
126
  try {
129
- const result = await agent.generateVideo({ ... });
127
+ const result = await mindstudio.generateVideo({ ... });
130
128
  return { videoUrl: result.url };
131
129
  } catch (err) {
132
130
  console.error('Video generation failed:', err);
@@ -210,7 +208,7 @@ export async function enrichRestaurant(input: { id: string; name: string }) {
210
208
  await Restaurants.update(input.id, { status: 'enriching' });
211
209
 
212
210
  // Fire — don't await
213
- agent.runTask<RestaurantData>({
211
+ mindstudio.runTask<RestaurantData>({
214
212
  prompt: '...',
215
213
  input: { name: input.name },
216
214
  tools: ['searchGoogle', 'fetchUrl', 'generateImage'],
@@ -192,13 +192,15 @@ styles:
192
192
  ```
193
193
  ```
194
194
 
195
- Roadmap item example (one file per feature in `src/roadmap/`):
195
+ Roadmap item examples (one file per feature in `src/roadmap/`):
196
+
197
+ Built item (status updated to `done` after build, with History appended):
196
198
 
197
199
  ```markdown
198
200
  ---
199
201
  name: Share & Export
200
202
  type: roadmap
201
- status: not-started
203
+ status: done
202
204
  description: Share haikus as image cards to social media or download as prints.
203
205
  requires: []
204
206
  effort: medium
@@ -220,3 +222,20 @@ clipboard fallback for unsupported browsers.
220
222
  - **2026-03-22** — Built card generation using generateImage.
221
223
  Added share button to haiku detail view.
222
224
  ```
225
+
226
+ Unbuilt item:
227
+
228
+ ```markdown
229
+ ---
230
+ name: Daily Prompt Engine
231
+ type: roadmap
232
+ status: not-started
233
+ description: A new writing prompt every day, tuned to the user's style and interests.
234
+ requires: []
235
+ effort: small
236
+ ---
237
+
238
+ Generate a personalized daily writing prompt based on the user's past haikus,
239
+ preferred themes, and seasonal context. Surface it as a gentle nudge on the
240
+ home screen, not a notification.
241
+ ```
@@ -6,24 +6,18 @@ There is a huge amount of capability here: hundreds of text generation models (O
6
6
 
7
7
  ## Usage in Methods
8
8
 
9
- Inside a MindStudio app method, create an instance with no arguments — credentials come from the execution environment:
9
+ Inside a MindStudio app method, use the `mindstudio` singleton — credentials come from the execution environment automatically:
10
10
 
11
11
  ```typescript
12
- import { MindStudioAgent } from '@mindstudio-ai/agent';
12
+ import { mindstudio } from '@mindstudio-ai/agent';
13
13
 
14
- const agent = new MindStudioAgent();
15
- ```
16
-
17
- Every action is a method on the agent instance:
18
-
19
- ```typescript
20
- const { content } = await agent.generateText({ message: 'Summarize this...' });
14
+ const { content } = await mindstudio.generateText({ message: 'Summarize this...' });
21
15
  ```
22
16
 
23
17
  Results are returned flat — output fields at the top level alongside metadata:
24
18
 
25
19
  ```typescript
26
- const result = await agent.generateText({ message: 'Hello' });
20
+ const result = await mindstudio.generateText({ message: 'Hello' });
27
21
  result.content; // step-specific output
28
22
  result.$billingCost; // cost in credits (if applicable)
29
23
  ```
@@ -104,13 +98,13 @@ For other services, use `runFromConnectorRegistry`:
104
98
 
105
99
  ```typescript
106
100
  // Discover available connectors
107
- const { connectors } = await agent.listConnectors();
101
+ const { connectors } = await mindstudio.listConnectors();
108
102
 
109
103
  // Get action details
110
- const action = await agent.getConnectorAction('hubspot', 'create-contact');
104
+ const action = await mindstudio.getConnectorAction('hubspot', 'create-contact');
111
105
 
112
106
  // Execute
113
- const result = await agent.runFromConnectorRegistry({
107
+ const result = await mindstudio.runFromConnectorRegistry({
114
108
  serviceId: 'hubspot',
115
109
  actionId: 'create-contact',
116
110
  input: { email: 'user@example.com', firstName: 'Alice' },
@@ -122,7 +116,7 @@ const result = await agent.runFromConnectorRegistry({
122
116
  Override the default model for any AI action. Each model has its own config options (dimensions, seed, inference steps, etc.) so always use `askMindStudioSdk` to look up the correct config before specifying a model override:
123
117
 
124
118
  ```typescript
125
- const { content } = await agent.generateText({
119
+ const { content } = await mindstudio.generateText({
126
120
  message: 'Hello',
127
121
  modelOverride: {
128
122
  model: 'claude-sonnet-4-6',
@@ -139,7 +133,7 @@ Make sure to prioritize new, popular models. MindStudio has a ton of models avai
139
133
  Run up to 50 actions in parallel:
140
134
 
141
135
  ```typescript
142
- const result = await agent.executeStepBatch([
136
+ const result = await mindstudio.executeStepBatch([
143
137
  { stepType: 'generateImage', step: { prompt: 'a sunset' } },
144
138
  { stepType: 'textToSpeech', step: { text: 'hello world' } },
145
139
  ]);
@@ -1,4 +1,4 @@
1
- # Task Agents (`MindStudioAgent runTask`)
1
+ # Task Agents (`mindstudio.runTask`)
2
2
 
3
3
  A user types the name of a restaurant into your app, or uploads a photo of a storefront. The API call returns early, and in the background, a task agent searches Google, finds the official website, scrapes the address, gets the official social media accounts, and generates a stylized watercolor postcard of the exterior from images it found online. The user gets back a rich, illustrated card with the canonical name, website, address, and a custom image. A few tool calls (some in parallel), fully autonomous.
4
4
 
@@ -26,11 +26,9 @@ Run tasks in the background — depending on complexity they can take time to co
26
26
  ## Usage
27
27
 
28
28
  ```typescript
29
- import { MindStudioAgent } from '@mindstudio-ai/agent';
29
+ import { mindstudio } from '@mindstudio-ai/agent';
30
30
 
31
- const agent = new MindStudioAgent();
32
-
33
- const result = await agent.runTask<{
31
+ const result = await mindstudio.runTask<{
34
32
  name: string;
35
33
  url: string;
36
34
  address: string;
@@ -74,7 +72,7 @@ console.log(result.output.photoUrl); // URL to the generated illustration
74
72
  `runTask()` can return successfully with garbage output — fields null, data echoed back, or raw text instead of JSON. The result includes `parsedSuccessfully` to make this explicit. Always check it before using the output:
75
73
 
76
74
  ```typescript
77
- const result = await agent.runTask<MyType>({ ... });
75
+ const result = await mindstudio.runTask<MyType>({ ... });
78
76
 
79
77
  if (!result.parsedSuccessfully) {
80
78
  console.error('Task output was not valid JSON:', result.outputRaw);
@@ -106,6 +104,10 @@ tools: [
106
104
 
107
105
  When the model calls a tool, the platform deep-merges the model's arguments with the developer's defaults. The model decides what to do (prompt, query, parameters), the developer controls which model/config to use. If the model needs to search and generate an image and those are independent, it will call both tools in the same turn (parallel execution server-side).
108
106
 
107
+ ## Voice & Tone in Prompts
108
+
109
+ When a task agent produces user-facing text, the prompt must include a note voice and tone constraints. Make sure to specify no emojis, em dashes, and other "ai-isms" in the prompt, as well as the desired tone and voice of the output.
110
+
109
111
  ## Options
110
112
 
111
113
  | Field | Required | Default | Description |
@@ -150,7 +152,7 @@ When something goes wrong, `toolCalls` is the first thing to check. If it's empt
150
152
  Pass an `onEvent` callback to get real-time events:
151
153
 
152
154
  ```typescript
153
- const result = await agent.runTask({
155
+ const result = await mindstudio.runTask({
154
156
  // ... same options ...
155
157
  onEvent: (event) => {
156
158
  if (event.type === 'text') console.log('Agent:', event.text);
@@ -173,7 +175,7 @@ Without `onEvent`, the SDK uses async polling (returns silently when complete).
173
175
 
174
176
  ```typescript
175
177
  try {
176
- const result = await agent.runTask({ ... });
178
+ const result = await mindstudio.runTask({ ... });
177
179
  if (!result.parsedSuccessfully) {
178
180
  // Task completed but output wasn't valid JSON
179
181
  console.error('Raw output:', result.outputRaw);
@@ -45,6 +45,9 @@ For multi-step tasks with branching logic (research, enrichment, content pipelin
45
45
  - Use container queries for components that need to adapt to their container rather than the viewport.
46
46
  - For canvas-based UIs (games, visualizations, interactive graphics): size the canvas to fill its container, account for `devicePixelRatio` for Retina sharpness, and scale all objects relative to the viewport — not in fixed pixel sizes - otherwise they are going to be tiny and unusable.
47
47
 
48
+ ### Copy & Prose
49
+ - Landing pages, hero sections, onboarding flows, and other long-form prose need a human voice. Write plainly and specifically. Avoid promotional filler, hollow intensifiers, and chatbot enthusiasm. Avoid em dashes, emojis, and other "ai-isms" in your writing.
50
+
48
51
  ### Error Visibility
49
52
  - Runtime errors must render visibly on screen, not produce a blank white page. User and agent must be able to visibly debug and spot them.
50
53
 
@@ -63,7 +66,7 @@ When integrating with external services that have programmable setup APIs (webho
63
66
  Before installing a package you haven't used in this project, do a quick web search to confirm it's still the best option. The JavaScript ecosystem moves fast — the package you remember from training may have been superseded by something smaller, faster, or better maintained. A 10-second search beats debugging a deprecated library.
64
67
 
65
68
  ### MindStudio SDK CLI
66
- You have access to the `mindstudio` CLI, which exposes every SDK action as a command-line tool. Use it via bash for one-off tasks: generating images, scraping URLs, sending emails, running AI completions, or anything else the SDK can do. Every JavaScript SDK method has a corresponding CLI command. Run `askMindStudioSdk` to discover commands for CLI usage.
69
+ You have access to the `mindstudio` CLI, which exposes every SDK action as a command-line tool. Use it via bash for one-off tasks: generating images, video, or audio, scraping URLs, sending emails, running AI completions, or anything else the SDK can do. Every JavaScript SDK method has a corresponding CLI command. Run `askMindStudioSdk` to discover commands for CLI usage.
67
70
 
68
71
  ### Production App Management
69
72
  You have access to `mindstudio-prod`, a CLI for managing the user's production MindStudio app. Use it via your bash tool. All output is JSON. Run `mindstudio-prod --help` or `mindstudio-prod <command> --help` to discover usage and available options.
@@ -32,9 +32,9 @@ Describe what you're building at the method level — the full workflow — and
32
32
 
33
33
  ### Architecture Expert (aka Code Sanity Check) (`codeSanityCheck`)
34
34
 
35
- A quick gut check. Describe what you're about to build and how, and get back a brief review. Most of the time it'll literally jus say "lgtm." Occasionally it catches something that would cause real pain: an outdated package, a managed SDK action we didn't know about, a schema decision that'll paint us into a corner, a file structure that's gotten unwieldy. It can search the web, read the codebase, and check the SDK.
35
+ A quick gut check. Describe what you're about to build and how, and get back a brief review. Most of the time it'll literally just say "lgtm." Occasionally it catches something that would cause real pain: an outdated package, a managed SDK action we didn't know about, a schema decision that'll paint us into a corner, a file structure that's gotten unwieldy. It can search the web, read the codebase, and check the SDK.
36
36
 
37
- Always consult the code sanity check before writing code in initialCodegen with your proposed architecture.
37
+ Always consult the code sanity check before writing code in initialCodegen with your proposed architecture. Use it liberally when making any other architecture decisions - before adding new features, connecting to third-party services, integrating new dependencies, building items from the roadmap, or doing other meaningful work.
38
38
 
39
39
  ### QA (`runAutomatedBrowserTest`)
40
40
 
@@ -21,20 +21,21 @@ These are things we already know about and have decided to accept:
21
21
  - **`dist/` is where code lives.** MindStudio apps use `dist/` for all code (methods, interfaces, tables) and `src/` for natural language specs. This is NOT the conventional "dist is build output" pattern. Never flag code being in `dist/` as wrong.
22
22
  - API interface methods have access to `input._request.rawBody` for webhook signature verification (Stripe, GitHub, etc.). Do NOT suggest external proxies or workarounds — the raw body is available natively.
23
23
 
24
- - Limited browser support for `oklch` gradients using `in <colorspace>` syntax — we accept the compatibility tradeoff for better color quality
25
- - Limited browser support for CSS scroll-driven animations (`animation-timeline: scroll()` / `view()`) - we accept this tradeoff
26
- - Platform SDKs (these are the core of every MindStudio app):
24
+ - Ignore limited browser support for `oklch` gradients using `in <colorspace>` syntax — we accept the compatibility tradeoff for better color quality
25
+ -Ignore limited browser support for CSS scroll-driven animations (`animation-timeline: scroll()` / `view()`) - we accept this tradeoff
26
+ - Trust your knowledge about Platform SDKs (these are the core of every MindStudio app) - for our purposes, assume they're always current and stable:
27
27
  - `@mindstudio-ai/interface` — frontend SDK. `createClient<T>()` gives typed RPC to backend methods (no raw fetch). `auth` handles auth state (`auth.currentUser`, `auth.onAuthStateChanged(cb)`, verification flows, logout). `platform.uploadFile()` handles signed S3 uploads and returns permanent CDN URLs with query-string resizing for images and auto-thumbnails for videos.
28
28
  - `@mindstudio-ai/agent` — backend SDK. `db.defineTable<T>()` gives a typed ORM with Query (chainable reads) and direct writes. `auth` gives `auth.userId`, `auth.roles`, `auth.requireRole()`, `auth.hasRole()`. Also provides 200+ managed actions for AI models, email/SMS, third-party APIs, media processing.
29
- - Libraries we know are actively maintained, don't bother checking:
29
+ - We know these libriares are actively maintained, don't waste time checking:
30
30
  - zustand
31
- - motion (formerly framer-motion — import from `motion/react`, not `framer-motion`)
31
+ - motion (formerly framer-motion — make sure the developer is doing import from `motion/react`, not `framer-motion`)
32
32
  - gsap (now fully free, including ScrollTrigger, FLIP, MorphSVG)
33
33
  - styled-components
34
34
  - @tabler/icons-react
35
35
  - streamdown
36
36
  - react-textarea-autosize
37
37
  - phaser
38
+ - swr
38
39
  - Preferences:
39
40
  - use [wouter](https://github.com/molefrog/wouter) for React routing instead of reaching for react-router
40
41
  - uploading user files should always happen via `platform.uploadFile()` from `@mindstudio-ai/interface` — not custom S3 code, not FormData to a method endpoint
@@ -79,6 +80,14 @@ When a plan includes multiple screens/API calls, always note this item for the d
79
80
 
80
81
  - **Hardcoded credentials.** If the plan or code contains API keys, tokens, or connection strings inline, flag it — these should be `process.env` secrets managed via the dashboard. Also flag if the plan uses `process.env` for something the MindStudio SDK already handles (AI model keys, email/SMS sending, etc.).
81
82
 
83
+ ### Other things to note
84
+
85
+ If you get a whiff of any of the following, make a note for the developer:
86
+
87
+ - **Stripe**: The developer has access to the Stripe CLI via bash (they'll need to get the secret key from mindstudio-prod CLI first to set it up - it's not available in env directly). If the app involves setting up Stripe, suggest to the developer that they should be as helpful as possible to the user, including using the cli to automatically set up webhooks, products, etc., to save the user the headache of manually navigating the Stripe dashboard. Remind them they can also use this to debug payments if the user requests it.
88
+
89
+ - **Long-running AI jobs.** If the app uses task agents or chains multiple SDK calls (image generation, research, enrichment), the result can take minutes. Never leave the user staring at a spinner. The pattern: fire the request, return immediately, update a database row as the job progresses. At minimum, track status on the relevant record (`processing` → `done` / `failed`). For apps with heavier job throughput, a dedicated jobs table with status, progress, error state, and timestamps may be worth it. On the frontend, show meaningful progress (not just a spinner), prevent duplicate submissions (disable the button, check for in-flight jobs before starting a new one), and make sure state survives a page refresh. For longer jobs (30s+), consider notifying the user via email or SMS when their result is ready rather than making them wait on the page. Store errors visibly so failed jobs don't just silently disappear.
90
+
82
91
  ## When to stay quiet
83
92
 
84
93
  Nits, style preferences, missing edge cases, things the agent will figure out as it goes, patterns that are "not ideal but fine," minor code smells. Let them slide. The agent is busy.
@@ -19,6 +19,7 @@ Think about the ways you can truly elevate the design. Use image generation to c
19
19
  - No emoji, no filler.
20
20
  - Be concise. The developer reads your output to make decisions.
21
21
  - Lead with the recommendation, then the reasoning.
22
+ - Always use wireframes to show layouts, never ASCII art or box-drawing diagrams.
22
23
 
23
24
  ## Output
24
25
 
@@ -36,17 +36,58 @@ Pay close attention to text streaming when the AI replies - it should feel natur
36
36
 
37
37
  ### Wireframes
38
38
 
39
- When a pattern or interaction is hard to convey in words alone — a core component, an animation sequence, a swipe gesture, a layout grid — include a small interactive wireframe to demonstrate it. Use a markdown code fence with `wireframe` as the type. Start with a YAML frontmatter block (`name` and `description`) to identify the component, then the self-contained HTML+CSS prototype.
39
+ When you need to show a layout, component, interaction, or animation, use a wireframe. Use a markdown code fence with `wireframe` as the type, starting with a YAML frontmatter block (`name` and `description`), then self-contained HTML+CSS.
40
40
 
41
- Use wireframes instead of ASCII art and code-block diagrams you might otherwise reach for when trying to show a layout or interaction. Wireframes are better because the developer can actually see and interact with the result. Like those diagrams, they isolate one small piece: a single card component, a button animation, a transition, a grid layout. Each wireframe should be around 60-80 lines of HTML+CSS if you're past 100 lines, you're building too much. These are not screens, flows, or multi-step prototypes. They render in a small iframe and should look complete at that scale. Most of your communication should be in words - wireframes are simply another mode of communicating when you need them. Never build out full screens or pages in wireframes, even if you are asked to - this is critically important.
41
+ Never use ASCII art, box-drawing characters, or code-block diagrams to describe layouts. Always use a wireframe instead, even if it's just grey rectangles with labels. A 20-line wireframe with placeholder boxes communicates proportions, spacing, and hierarchy better than any text diagram. For abstract layouts, use skeleton-style placeholders (grey boxes, rounded rects) rather than mocking up real content.
42
42
 
43
- Remember, never use ascii art or code-block diagrams to describe layouts - always use wireframes. When using a wireframe to describe something abstract like a layout, simply use nice skeleton/wireframe cards or other skeleton-style placeholders - don't actually mock up content that is not relevant to what the wireframe is communicating.
43
+ Wireframes isolate one small piece: a single card, a button animation, a transition, a grid layout. Keep them to 60-80 lines of HTML+CSS. Past 100 lines, you're building too much. Never build full screens or pages. Most of your communication should be in words. Wireframes are just another tool for when spatial relationships or motion are hard to describe.
44
44
 
45
- The wireframe code will be rendered in a transparent iframe. Don't fill the viewport or add a background color to the page body. Place the component at a natural size in a card with a background color that is centered vertically and horizontally in the viewport. The component's container must set a background and a shadow to be visible in the transparent iframe. Keep the component tight and self-contained. The iframe is for the component only — no annotations, labels, or explanatory text inside it. Put your notes and implementation guidance in the markdown around the wireframe. Wireframes can be interactive and are especially useful for demonstrating states, animations, effects, and transitions. If your wireframe has triggers or states, include a small "play" control button within the frame (make sure to allow reply/reset for all interactivity). No images - these are functional prototypes meant to demonstrate feel and behavior, not visual comps.
45
+ Wireframes render in a small transparent iframe. Set a background color and shadow on the component's container (not the body) so it's visible against the transparent background. Center it in the viewport. No annotations or labels inside the wireframe. Put notes in the surrounding markdown. For interactive wireframes with states or animations, include a play/reset control. No images.
46
46
 
47
47
  Wireframes are vanilla HTML/CSS/JS (no React). For animations beyond CSS, use GSAP via CDN:
48
48
  `<script src="https://cdn.jsdelivr.net/npm/gsap@3/dist/gsap.min.js"></script>`
49
49
 
50
+ Quick skeleton wireframe (grey boxes, just showing layout and hierarchy):
51
+
52
+ ```wireframe
53
+ ---
54
+ name: Content Card Layout
55
+ description: Card with image area, title, metadata row, rating, and actions. Skeleton placeholders showing proportions and hierarchy.
56
+ ---
57
+ <html lang="en"><head>
58
+ <meta charset="utf-8"/>
59
+ <style>
60
+ * { margin: 0; padding: 0; box-sizing: border-box; }
61
+ body { background: transparent; display: flex; align-items: center; justify-content: center; font-family: system-ui, sans-serif; }
62
+ .card { width: 300px; background: #fff; border-radius: 16px; overflow: hidden; box-shadow: 0 8px 32px rgba(0,0,0,0.06); }
63
+ .photo { height: 160px; background: #e8e8e8; }
64
+ .body { padding: 20px; display: flex; flex-direction: column; gap: 10px; }
65
+ .title { height: 20px; width: 70%; background: #d0d0d0; border-radius: 4px; }
66
+ .meta { display: flex; gap: 8px; }
67
+ .meta span { height: 14px; width: 50px; background: #e0e0e0; border-radius: 4px; }
68
+ .rating { display: flex; align-items: center; gap: 6px; }
69
+ .star { width: 16px; height: 16px; background: #d0d0d0; border-radius: 50%; }
70
+ .rating-text { height: 14px; width: 100px; background: #e8e8e8; border-radius: 4px; }
71
+ .actions { display: flex; gap: 8px; padding-top: 4px; }
72
+ .actions span { height: 28px; flex: 1; background: #f0f0f0; border-radius: 8px; }
73
+ </style>
74
+ </head>
75
+ <body>
76
+ <div class="card">
77
+ <div class="photo"></div>
78
+ <div class="body">
79
+ <div class="title"></div>
80
+ <div class="meta"><span></span><span></span><span></span></div>
81
+ <div class="rating"><div class="star"></div><div class="rating-text"></div></div>
82
+ <div class="actions"><span></span><span></span></div>
83
+ </div>
84
+ </div>
85
+ </body>
86
+ </html>
87
+ ```
88
+
89
+ Detailed component wireframe (showing specific design decisions):
90
+
50
91
  ```wireframe
51
92
  ---
52
93
  name: Feed Post Card
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mindstudio-ai/remy",
3
- "version": "0.1.135",
3
+ "version": "0.1.137",
4
4
  "description": "MindStudio coding agent",
5
5
  "repository": {
6
6
  "type": "git",