@mindstudio-ai/remy 0.1.20 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1012,12 +1012,42 @@ var init_confirmDestructiveAction = __esm({
1012
1012
  }
1013
1013
  });
1014
1014
 
1015
- // src/subagents/sdkConsultant/index.ts
1015
+ // src/subagents/common/runCli.ts
1016
1016
  import { exec } from "child_process";
1017
+ function runCli(cmd, options) {
1018
+ return new Promise((resolve) => {
1019
+ exec(
1020
+ cmd,
1021
+ {
1022
+ timeout: options?.timeout ?? 6e4,
1023
+ maxBuffer: options?.maxBuffer ?? 1024 * 1024
1024
+ },
1025
+ (err, stdout, stderr) => {
1026
+ if (stdout.trim()) {
1027
+ resolve(stdout.trim());
1028
+ return;
1029
+ }
1030
+ if (err) {
1031
+ resolve(`Error: ${stderr.trim() || err.message}`);
1032
+ return;
1033
+ }
1034
+ resolve("(no response)");
1035
+ }
1036
+ );
1037
+ });
1038
+ }
1039
+ var init_runCli = __esm({
1040
+ "src/subagents/common/runCli.ts"() {
1041
+ "use strict";
1042
+ }
1043
+ });
1044
+
1045
+ // src/subagents/sdkConsultant/index.ts
1017
1046
  var askMindStudioSdkTool;
1018
1047
  var init_sdkConsultant = __esm({
1019
1048
  "src/subagents/sdkConsultant/index.ts"() {
1020
1049
  "use strict";
1050
+ init_runCli();
1021
1051
  askMindStudioSdkTool = {
1022
1052
  definition: {
1023
1053
  name: "askMindStudioSdk",
@@ -1035,22 +1065,8 @@ var init_sdkConsultant = __esm({
1035
1065
  },
1036
1066
  async execute(input) {
1037
1067
  const query = input.query;
1038
- return new Promise((resolve) => {
1039
- exec(
1040
- `mindstudio ask ${JSON.stringify(query)}`,
1041
- { timeout: 6e4, maxBuffer: 512 * 1024 },
1042
- (err, stdout, stderr) => {
1043
- if (stdout.trim()) {
1044
- resolve(stdout.trim());
1045
- return;
1046
- }
1047
- if (err) {
1048
- resolve(`Error: ${stderr.trim() || err.message}`);
1049
- return;
1050
- }
1051
- resolve("(no response)");
1052
- }
1053
- );
1068
+ return runCli(`mindstudio ask ${JSON.stringify(query)}`, {
1069
+ maxBuffer: 512 * 1024
1054
1070
  });
1055
1071
  }
1056
1072
  };
@@ -1058,11 +1074,11 @@ var init_sdkConsultant = __esm({
1058
1074
  });
1059
1075
 
1060
1076
  // src/tools/common/fetchUrl.ts
1061
- import { exec as exec2 } from "child_process";
1062
1077
  var fetchUrlTool;
1063
1078
  var init_fetchUrl = __esm({
1064
1079
  "src/tools/common/fetchUrl.ts"() {
1065
1080
  "use strict";
1081
+ init_runCli();
1066
1082
  fetchUrlTool = {
1067
1083
  definition: {
1068
1084
  name: "scapeWebUrl",
@@ -1089,35 +1105,20 @@ var init_fetchUrl = __esm({
1089
1105
  if (screenshot) {
1090
1106
  pageOptions.screenshot = true;
1091
1107
  }
1092
- const cmd = `mindstudio scrape-url --url ${JSON.stringify(url)} --page-options ${JSON.stringify(JSON.stringify(pageOptions))} --no-meta`;
1093
- return new Promise((resolve) => {
1094
- exec2(
1095
- cmd,
1096
- { timeout: 6e4, maxBuffer: 1024 * 1024 },
1097
- (err, stdout, stderr) => {
1098
- if (stdout.trim()) {
1099
- resolve(stdout.trim());
1100
- return;
1101
- }
1102
- if (err) {
1103
- resolve(`Error: ${stderr.trim() || err.message}`);
1104
- return;
1105
- }
1106
- resolve("(no response)");
1107
- }
1108
- );
1109
- });
1108
+ return runCli(
1109
+ `mindstudio scrape-url --url ${JSON.stringify(url)} --page-options ${JSON.stringify(JSON.stringify(pageOptions))} --no-meta`
1110
+ );
1110
1111
  }
1111
1112
  };
1112
1113
  }
1113
1114
  });
1114
1115
 
1115
1116
  // src/tools/common/searchGoogle.ts
1116
- import { exec as exec3 } from "child_process";
1117
1117
  var searchGoogleTool;
1118
1118
  var init_searchGoogle = __esm({
1119
1119
  "src/tools/common/searchGoogle.ts"() {
1120
1120
  "use strict";
1121
+ init_runCli();
1121
1122
  searchGoogleTool = {
1122
1123
  definition: {
1123
1124
  name: "searchGoogle",
@@ -1135,24 +1136,10 @@ var init_searchGoogle = __esm({
1135
1136
  },
1136
1137
  async execute(input) {
1137
1138
  const query = input.query;
1138
- const cmd = `mindstudio search-google --query ${JSON.stringify(query)} --export-type json --output-key results --no-meta`;
1139
- return new Promise((resolve) => {
1140
- exec3(
1141
- cmd,
1142
- { timeout: 6e4, maxBuffer: 512 * 1024 },
1143
- (err, stdout, stderr) => {
1144
- if (stdout.trim()) {
1145
- resolve(stdout.trim());
1146
- return;
1147
- }
1148
- if (err) {
1149
- resolve(`Error: ${stderr.trim() || err.message}`);
1150
- return;
1151
- }
1152
- resolve("(no response)");
1153
- }
1154
- );
1155
- });
1139
+ return runCli(
1140
+ `mindstudio search-google --query ${JSON.stringify(query)} --export-type json --output-key results --no-meta`,
1141
+ { maxBuffer: 512 * 1024 }
1142
+ );
1156
1143
  }
1157
1144
  };
1158
1145
  }
@@ -1509,7 +1496,7 @@ ${unifiedDiff(input.path, content, updated)}`;
1509
1496
  });
1510
1497
 
1511
1498
  // src/tools/code/bash.ts
1512
- import { exec as exec4 } from "child_process";
1499
+ import { exec as exec2 } from "child_process";
1513
1500
  var DEFAULT_TIMEOUT_MS, DEFAULT_MAX_LINES3, bashTool;
1514
1501
  var init_bash = __esm({
1515
1502
  "src/tools/code/bash.ts"() {
@@ -1547,7 +1534,7 @@ var init_bash = __esm({
1547
1534
  const maxLines = input.maxLines === 0 ? Infinity : input.maxLines || DEFAULT_MAX_LINES3;
1548
1535
  const timeoutMs = input.timeout ? input.timeout * 1e3 : DEFAULT_TIMEOUT_MS;
1549
1536
  return new Promise((resolve) => {
1550
- exec4(
1537
+ exec2(
1551
1538
  input.command,
1552
1539
  {
1553
1540
  timeout: timeoutMs,
@@ -1589,7 +1576,7 @@ var init_bash = __esm({
1589
1576
  });
1590
1577
 
1591
1578
  // src/tools/code/grep.ts
1592
- import { exec as exec5 } from "child_process";
1579
+ import { exec as exec3 } from "child_process";
1593
1580
  function formatResults(stdout, max) {
1594
1581
  const lines = stdout.trim().split("\n");
1595
1582
  let result = lines.join("\n");
@@ -1640,12 +1627,12 @@ var init_grep = __esm({
1640
1627
  const rgCmd = `rg -n --no-heading --max-count=${max}${globFlag} '${escaped}' ${searchPath}`;
1641
1628
  const grepCmd = `grep -rn --max-count=${max} '${escaped}' ${searchPath} --include='*.ts' --include='*.tsx' --include='*.js' --include='*.json' --include='*.md'`;
1642
1629
  return new Promise((resolve) => {
1643
- exec5(rgCmd, { maxBuffer: 512 * 1024 }, (err, stdout) => {
1630
+ exec3(rgCmd, { maxBuffer: 512 * 1024 }, (err, stdout) => {
1644
1631
  if (stdout?.trim()) {
1645
1632
  resolve(formatResults(stdout, max));
1646
1633
  return;
1647
1634
  }
1648
- exec5(grepCmd, { maxBuffer: 512 * 1024 }, (_err, grepStdout) => {
1635
+ exec3(grepCmd, { maxBuffer: 512 * 1024 }, (_err, grepStdout) => {
1649
1636
  if (grepStdout?.trim()) {
1650
1637
  resolve(formatResults(grepStdout, max));
1651
1638
  } else {
@@ -1980,21 +1967,44 @@ var init_runMethod = __esm({
1980
1967
  });
1981
1968
 
1982
1969
  // src/tools/code/screenshot.ts
1983
- var screenshotTool;
1970
+ var DEFAULT_PROMPT, screenshotTool;
1984
1971
  var init_screenshot = __esm({
1985
1972
  "src/tools/code/screenshot.ts"() {
1986
1973
  "use strict";
1974
+ init_sidecar();
1975
+ init_runCli();
1976
+ DEFAULT_PROMPT = "Describe this app screenshot for a developer who cannot see it. What is visible on screen: the layout, content, interactive elements, any loading or error states. Be concise and factual.";
1987
1977
  screenshotTool = {
1988
1978
  definition: {
1989
1979
  name: "screenshot",
1990
- description: "Capture a screenshot of the app preview. Returns a CDN URL with dimensions. Useful for visually checking the current state after UI changes or when debugging layout issues.",
1980
+ description: "Capture a screenshot of the app preview and get a description of what's on screen. Optionally provide a specific question about what you're looking for.",
1991
1981
  inputSchema: {
1992
1982
  type: "object",
1993
- properties: {}
1983
+ properties: {
1984
+ prompt: {
1985
+ type: "string",
1986
+ description: "Optional question about the screenshot. If omitted, returns a general description of what's visible."
1987
+ }
1988
+ }
1994
1989
  }
1995
1990
  },
1996
- async execute() {
1997
- return "ok";
1991
+ async execute(input) {
1992
+ try {
1993
+ const { url } = await sidecarRequest(
1994
+ "/screenshot",
1995
+ {},
1996
+ { timeout: 3e4 }
1997
+ );
1998
+ const analysisPrompt = input.prompt || DEFAULT_PROMPT;
1999
+ const analysis = await runCli(
2000
+ `mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt)} --image-url ${JSON.stringify(url)} --output-key analysis --no-meta`
2001
+ );
2002
+ return `Screenshot: ${url}
2003
+
2004
+ ${analysis}`;
2005
+ } catch (err) {
2006
+ return `Error taking screenshot: ${err.message}`;
2007
+ }
1998
2008
  }
1999
2009
  };
2000
2010
  }
@@ -2385,28 +2395,26 @@ var init_browserAutomation = __esm({
2385
2395
  });
2386
2396
 
2387
2397
  // src/subagents/designExpert/tools.ts
2388
- import { exec as exec6 } from "child_process";
2389
- function runCli(cmd) {
2390
- return new Promise((resolve) => {
2391
- exec6(
2392
- cmd,
2393
- { timeout: 6e4, maxBuffer: 1024 * 1024 },
2394
- (err, stdout, stderr) => {
2395
- if (stdout.trim()) {
2396
- resolve(stdout.trim());
2397
- return;
2398
- }
2399
- if (err) {
2400
- resolve(`Error: ${stderr.trim() || err.message}`);
2401
- return;
2402
- }
2403
- resolve("(no response)");
2404
- }
2405
- );
2406
- });
2407
- }
2408
- async function executeDesignTool(name, input) {
2398
+ async function executeDesignExpertTool(name, input) {
2409
2399
  switch (name) {
2400
+ case "screenshot": {
2401
+ try {
2402
+ const { url } = await sidecarRequest(
2403
+ "/screenshot",
2404
+ {},
2405
+ { timeout: 3e4 }
2406
+ );
2407
+ const analysisPrompt = input.prompt || "Describe this app screenshot for a visual designer reviewing the current state. What is visible: layout, typography, colors, spacing, imagery. Note anything that looks broken or off. Be concise.";
2408
+ const analysis = await runCli(
2409
+ `mindstudio analyze-image --prompt ${JSON.stringify(analysisPrompt)} --image-url ${JSON.stringify(url)} --output-key analysis --no-meta`
2410
+ );
2411
+ return `Screenshot: ${url}
2412
+
2413
+ ${analysis}`;
2414
+ } catch (err) {
2415
+ return `Error taking screenshot: ${err.message}`;
2416
+ }
2417
+ }
2410
2418
  case "searchGoogle":
2411
2419
  return runCli(
2412
2420
  `mindstudio search-google --query ${JSON.stringify(input.query)} --export-type json --output-key results --no-meta`
@@ -2451,6 +2459,8 @@ ${analysis}`;
2451
2459
  const prompts = input.prompts;
2452
2460
  const width = input.width || 2048;
2453
2461
  const height = input.height || 2048;
2462
+ const ANALYZE_PROMPT = "You are reviewing this image for a visual designer sourcing assets for a project. Describe: what the image depicts, the mood and color palette, how the lighting and composition work, whether there are any issues (unwanted text, artifacts, distortions), and how it could be used in a layout (hero background, feature section, card texture, etc). Be concise and practical.";
2463
+ let imageUrls;
2454
2464
  if (prompts.length === 1) {
2455
2465
  const step = JSON.stringify({
2456
2466
  prompt: prompts[0],
@@ -2459,30 +2469,58 @@ ${analysis}`;
2459
2469
  config: { width, height }
2460
2470
  }
2461
2471
  });
2462
- return runCli(
2472
+ const url = await runCli(
2463
2473
  `mindstudio generate-image '${step}' --output-key imageUrl --no-meta`
2464
2474
  );
2465
- }
2466
- const steps = prompts.map((prompt) => ({
2467
- stepType: "generateImage",
2468
- step: {
2469
- prompt,
2470
- imageModelOverride: {
2471
- model: "seedream-4.5",
2472
- config: { width, height }
2475
+ imageUrls = [url];
2476
+ } else {
2477
+ const steps = prompts.map((prompt) => ({
2478
+ stepType: "generateImage",
2479
+ step: {
2480
+ prompt,
2481
+ imageModelOverride: {
2482
+ model: "seedream-4.5",
2483
+ config: { width, height }
2484
+ }
2473
2485
  }
2486
+ }));
2487
+ const batchResult = await runCli(
2488
+ `mindstudio batch '${JSON.stringify(steps)}' --no-meta`
2489
+ );
2490
+ try {
2491
+ const parsed = JSON.parse(batchResult);
2492
+ imageUrls = parsed.results.map(
2493
+ (r) => r.output?.imageUrl ?? `Error: ${r.error}`
2494
+ );
2495
+ } catch {
2496
+ return batchResult;
2474
2497
  }
2475
- }));
2476
- return runCli(`mindstudio batch '${JSON.stringify(steps)}' --no-meta`);
2498
+ }
2499
+ const analyses = await Promise.all(
2500
+ imageUrls.map(async (url, i) => {
2501
+ if (url.startsWith("Error")) {
2502
+ return `Image ${i + 1}: ${url}`;
2503
+ }
2504
+ const analysis = await runCli(
2505
+ `mindstudio analyze-image --prompt ${JSON.stringify(ANALYZE_PROMPT)} --image-url ${JSON.stringify(url)} --output-key analysis --no-meta`
2506
+ );
2507
+ return `**Image ${i + 1}:** ${url}
2508
+ Prompt: ${prompts[i]}
2509
+ Analysis: ${analysis}`;
2510
+ })
2511
+ );
2512
+ return analyses.join("\n\n");
2477
2513
  }
2478
2514
  default:
2479
2515
  return `Error: unknown tool "${name}"`;
2480
2516
  }
2481
2517
  }
2482
- var DESIGN_REFERENCE_PROMPT, DESIGN_RESEARCH_TOOLS;
2518
+ var DESIGN_REFERENCE_PROMPT, DESIGN_EXPERT_TOOLS;
2483
2519
  var init_tools2 = __esm({
2484
2520
  "src/subagents/designExpert/tools.ts"() {
2485
2521
  "use strict";
2522
+ init_runCli();
2523
+ init_sidecar();
2486
2524
  DESIGN_REFERENCE_PROMPT = `Analyze this website/app screenshot as a design reference. Assess:
2487
2525
  1) Mood/aesthetic
2488
2526
  2) Color palette with approximate hex values and palette strategy
@@ -2490,7 +2528,7 @@ var init_tools2 = __esm({
2490
2528
  4) Layout composition (symmetric/asymmetric, grid structure, whitespace usage, content density)
2491
2529
  5) What makes it distinctive and interesting vs generic AI-generated interfaces
2492
2530
  Be specific and concise.`;
2493
- DESIGN_RESEARCH_TOOLS = [
2531
+ DESIGN_EXPERT_TOOLS = [
2494
2532
  {
2495
2533
  name: "searchGoogle",
2496
2534
  description: "Search Google for web results. Use for finding design inspiration, font recommendations, UI patterns, real products in a domain, and reference material.",
@@ -2565,7 +2603,7 @@ Be specific and concise.`;
2565
2603
  },
2566
2604
  {
2567
2605
  name: "generateImages",
2568
- description: "Generate images using AI (Seedream). Returns CDN URLs. Produces high-quality results for both photorealistic images and abstract/creative visuals. Pass multiple prompts to generate in parallel.",
2606
+ description: "Generate images using AI (Seedream). Returns CDN URLs with a quality analysis for each image. Produces high-quality results for both photorealistic images and abstract/creative visuals. Pass multiple prompts to generate in parallel. No need to analyze images separately after generating \u2014 the analysis is included.",
2569
2607
  inputSchema: {
2570
2608
  type: "object",
2571
2609
  properties: {
@@ -2721,7 +2759,7 @@ function sample(arr, n) {
2721
2759
  }
2722
2760
  return copy.slice(0, n);
2723
2761
  }
2724
- function getDesignResearchPrompt() {
2762
+ function getDesignExpertPrompt() {
2725
2763
  const fonts = sample(fontData.fonts, 30);
2726
2764
  const pairings = sample(fontData.pairings, 20);
2727
2765
  const images = sample(inspirationImages, 15);
@@ -2806,7 +2844,7 @@ var init_designExpert = __esm({
2806
2844
  init_tools2();
2807
2845
  init_prompt2();
2808
2846
  DESCRIPTION = `
2809
- Visual design expert. Handles fonts, colors, palettes, gradients, layouts, imagery, icons, and visual direction. Can answer from expertise alone or research the web. Returns concrete resources: hex values, font names with CSS URLs, image URLs, layout descriptions. Include app context in your task \u2014 the agent cannot see your conversation with the user.
2847
+ Visual design expert. Describe the situation and what you need \u2014 the agent decides what to deliver. It reads the spec files automatically. Include relevant user requirements and context it can't get from the spec, but do not list specific deliverables or tell it how to do its job.
2810
2848
  `.trim();
2811
2849
  designExpertTool = {
2812
2850
  definition: {
@@ -2825,17 +2863,17 @@ Visual design expert. Handles fonts, colors, palettes, gradients, layouts, image
2825
2863
  },
2826
2864
  async execute(input, context) {
2827
2865
  if (!context) {
2828
- return "Error: design research requires execution context";
2866
+ return "Error: visual design expert requires execution context";
2829
2867
  }
2830
2868
  const result = await runSubAgent({
2831
- system: getDesignResearchPrompt(),
2869
+ system: getDesignExpertPrompt(),
2832
2870
  task: input.task,
2833
- tools: DESIGN_RESEARCH_TOOLS,
2834
- externalTools: /* @__PURE__ */ new Set(["screenshot"]),
2835
- executeTool: executeDesignTool,
2871
+ tools: DESIGN_EXPERT_TOOLS,
2872
+ externalTools: /* @__PURE__ */ new Set(),
2873
+ executeTool: executeDesignExpertTool,
2836
2874
  apiConfig: context.apiConfig,
2837
2875
  model: context.model,
2838
- subAgentId: "designExpert",
2876
+ subAgentId: "visualDesignExpert",
2839
2877
  signal: context.signal,
2840
2878
  parentToolId: context.toolCallId,
2841
2879
  onEvent: context.onEvent,
@@ -3843,6 +3881,12 @@ async function runTurn(params) {
3843
3881
  });
3844
3882
  }
3845
3883
  state.messages.push(userMsg);
3884
+ const STATUS_EXCLUDED_TOOLS = /* @__PURE__ */ new Set([
3885
+ "setProjectOnboardingState",
3886
+ "setProjectName",
3887
+ "clearSyncStatus",
3888
+ "editsFinished"
3889
+ ]);
3846
3890
  let lastCompletedTools = "";
3847
3891
  let lastCompletedResult = "";
3848
3892
  while (true) {
@@ -3931,7 +3975,7 @@ async function runTurn(params) {
3931
3975
  apiConfig,
3932
3976
  getContext: () => ({
3933
3977
  assistantText: getTextContent(contentBlocks).slice(-500),
3934
- lastToolName: getToolCalls(contentBlocks).at(-1)?.name || lastCompletedTools || void 0,
3978
+ lastToolName: getToolCalls(contentBlocks).filter((tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)).at(-1)?.name || lastCompletedTools || void 0,
3935
3979
  lastToolResult: lastCompletedResult || void 0
3936
3980
  }),
3937
3981
  onStatus: (label) => onEvent({ type: "status", message: label }),
@@ -4090,6 +4134,28 @@ async function runTurn(params) {
4090
4134
  count: toolCalls.length,
4091
4135
  tools: toolCalls.map((tc) => tc.name)
4092
4136
  });
4137
+ let subAgentText = "";
4138
+ const origOnEvent = onEvent;
4139
+ const wrappedOnEvent = (e) => {
4140
+ if ("parentToolId" in e && e.parentToolId) {
4141
+ if (e.type === "text") {
4142
+ subAgentText = e.text;
4143
+ } else if (e.type === "tool_start") {
4144
+ subAgentText = `Using ${e.name}`;
4145
+ }
4146
+ }
4147
+ origOnEvent(e);
4148
+ };
4149
+ const toolStatusWatcher = startStatusWatcher({
4150
+ apiConfig,
4151
+ getContext: () => ({
4152
+ assistantText: subAgentText || getTextContent(contentBlocks).slice(-500),
4153
+ lastToolName: toolCalls.filter((tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)).map((tc) => tc.name).join(", ") || void 0,
4154
+ lastToolResult: lastCompletedResult || void 0
4155
+ }),
4156
+ onStatus: (label) => origOnEvent({ type: "status", message: label }),
4157
+ signal
4158
+ });
4093
4159
  const subAgentMessages = /* @__PURE__ */ new Map();
4094
4160
  const results = await Promise.all(
4095
4161
  toolCalls.map(async (tc) => {
@@ -4115,7 +4181,7 @@ async function runTurn(params) {
4115
4181
  apiConfig,
4116
4182
  model,
4117
4183
  signal,
4118
- onEvent,
4184
+ onEvent: wrappedOnEvent,
4119
4185
  resolveExternalTool,
4120
4186
  toolCallId: tc.id,
4121
4187
  subAgentMessages
@@ -4149,15 +4215,21 @@ async function runTurn(params) {
4149
4215
  }
4150
4216
  })
4151
4217
  );
4152
- for (const [toolId, msgs] of subAgentMessages) {
4218
+ toolStatusWatcher.stop();
4219
+ for (const r of results) {
4153
4220
  const block = contentBlocks.find(
4154
- (b) => b.type === "tool" && b.id === toolId
4221
+ (b) => b.type === "tool" && b.id === r.id
4155
4222
  );
4156
4223
  if (block?.type === "tool") {
4157
- block.subAgentMessages = msgs;
4224
+ block.result = r.result;
4225
+ block.isError = r.isError;
4226
+ const msgs = subAgentMessages.get(r.id);
4227
+ if (msgs) {
4228
+ block.subAgentMessages = msgs;
4229
+ }
4158
4230
  }
4159
4231
  }
4160
- lastCompletedTools = toolCalls.map((tc) => tc.name).join(", ");
4232
+ lastCompletedTools = toolCalls.filter((tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)).map((tc) => tc.name).join(", ");
4161
4233
  lastCompletedResult = results.at(-1)?.result ?? "";
4162
4234
  for (const r of results) {
4163
4235
  state.messages.push({
@@ -4197,7 +4269,6 @@ var init_agent = __esm({
4197
4269
  "runScenario",
4198
4270
  "runMethod",
4199
4271
  "browserCommand",
4200
- "screenshot",
4201
4272
  "setProjectName"
4202
4273
  ]);
4203
4274
  }
@@ -142,7 +142,7 @@ The intro framing ("you have a lot on your plate") gives the model permission to
142
142
 
143
143
  | Agent | Role | Tools | Context |
144
144
  |---|---|---|---|
145
- | `designExpert` | Visual design decisions | searchGoogle, fetchUrl, analyzeImage, screenshotAndAnalyze, searchProductScreenshots, generateImages | Spec files + sampled fonts + sampled inspiration |
145
+ | `visualDesignExpert` | Visual design decisions | searchGoogle, fetchUrl, analyzeReferenceImageOrUrl, screenshot, searchProductScreenshots, generateImages | Spec files + sampled fonts + sampled inspiration |
146
146
  | `productVision` | Roadmap ownership & product strategy | writeRoadmapItem, updateRoadmapItem, deleteRoadmapItem | Spec files + current roadmap |
147
147
  | `sdkConsultant` | MindStudio SDK architecture | None (shells out to `mindstudio ask` CLI) | None (external agent) |
148
148
  | `codeSanityCheck` | Pre-build review | readFile, grep, glob, searchGoogle, fetchUrl, askMindStudioSdk, bash (readonly) | Spec files |
@@ -43,7 +43,7 @@ Derive additional implementation colors (borders, focus states, hover states, di
43
43
 
44
44
  ### Typography block format
45
45
 
46
- A `` ```typography `` fenced block in a `type: design/typography` spec file declares fonts (with source URLs) and one or two anchor styles (typically Display and Body). Derive additional styles (labels, buttons, captions, overlines) from these anchors:
46
+ A `` ```typography `` fenced block in a `type: design/typography` spec file declares fonts (with source URLs) and one or two anchor styles (typically Display and Body). Styles can include an optional `case` field (`uppercase`, `lowercase`, `capitalize`) for text-transform. Derive additional styles (labels, buttons, captions, overlines) from these anchors:
47
47
 
48
48
  ```typography
49
49
  fonts:
@@ -59,6 +59,7 @@ styles:
59
59
  weight: 600
60
60
  letterSpacing: -0.03em
61
61
  lineHeight: 1.1
62
+ case: uppercase
62
63
  description: Page titles and hero text
63
64
  Body:
64
65
  font: Satoshi
@@ -181,6 +181,7 @@ styles:
181
181
  weight: 600
182
182
  letterSpacing: -0.03em
183
183
  lineHeight: 1.1
184
+ case: uppercase
184
185
  description: Page titles and hero text
185
186
  Body:
186
187
  font: Satoshi
@@ -55,6 +55,7 @@ styles:
55
55
  weight: 600
56
56
  letterSpacing: -0.03em
57
57
  lineHeight: 1.1
58
+ case: uppercase
58
59
  description: Page titles and hero text
59
60
  Body:
60
61
  font: Satoshi
@@ -3,7 +3,7 @@
3
3
  The spec is the application. It defines what the app does — the data, the workflows, the roles, the edge cases — and how it looks and feels. Code is derived from it. Your job is to help the user build a spec that's complete enough to compile into a working app.
4
4
 
5
5
  **Writing the first draft:**
6
- After intake, write the spec and get it on screen. The first draft should cover the full shape of the app — it's better to have every section roughed in than to have one section perfect and the rest missing.
6
+ After intake, write the spec immediately. Do not ask "ready for me to start?" or wait for confirmation — just start writing. The first draft should cover the full shape of the app — it's better to have every section roughed in than to have one section perfect and the rest missing.
7
7
 
8
8
  - Make concrete decisions rather than leaving things vague. The user can change a decision; they can't react to vagueness.
9
9
  - Flag assumptions you made during intake so the user can confirm or correct them.
@@ -23,9 +23,9 @@ Start from these and extend as needed. Add interface specs for other interface t
23
23
 
24
24
  Users often care about look and feel as much as (or more than) underlying data structures. Don't treat the brand and interface specs as an afterthought — for many users, the visual identity and voice are the first things they want to get right.
25
25
 
26
- Write specs in natural, human language. Describe what the app does the way you'd explain it to a colleague. The spec rendered with annotations hidden is a human-forward document that anyone can read. The spec with annotations visible is the agent-forward document that drives code generation. Keep the prose clean and readable — technical details like column types, status values, CSS properties, code snippets, and implementation hints belong in annotations, not in the prose.
26
+ Write specs in natural, human language. Describe what the app does the way you'd explain it to a colleague. The spec rendered with annotations hidden is a human-forward document that anyone can read. The spec with annotations visible is the agent-forward document that drives code generation. Keep the prose clean and readable — the user should never see raw CSS, code, or technical values in the prose. Write "square corners on all cards" not `border-radius: 0`. Write "no shadows" not `box-shadow: none`. Technical specifics belong in annotations.
27
27
 
28
- When you have image URLs (from the design expert, stock photos, or AI generation), embed them directly in the spec using markdown image syntax. Write descriptive alt text that captures what the image actually depicts (this helps accessibility and helps the coding agent understand the image without loading it). Use the surrounding prose to explain the design intent — what the image is for, how it should be used in the layout, and why it was chosen.
28
+ When you have image URLs (from the design expert), embed them directly in the spec using markdown image syntax. Write descriptive alt text that captures what the image actually depicts (this helps accessibility and helps the coding agent understand the image without loading it). Use the surrounding prose to explain the design intent — what the image is for, how it should be used in the layout, and why it was chosen.
29
29
 
30
30
  ```markdown
31
31
  ### Hero Section
@@ -78,7 +78,7 @@ The body is freeform MSFM: prose describing the feature for the user, annotation
78
78
 
79
79
  The MVP itself gets a roadmap file (`src/roadmap/mvp.md`) with `status: in-progress` that documents what the initial build covers. Update it to `done` after the build completes. Other items start as `not-started`. Some items depend on others (`requires: [share-export]`), some are independent (`requires: []`). The user picks what to build next.
80
80
 
81
- The `productVision` tool owns `src/roadmap/` — see the Team section for when and how to use it.
81
+ The `productVision` tool owns `src/roadmap/` — see the Team section for when and how to use it. As the final step of spec authoring, after all other spec files are written, call it to seed the initial roadmap.
82
82
 
83
83
  ## Spec + Code Sync
84
84
 
@@ -2,6 +2,10 @@
2
2
 
3
3
  You have a lot on your plate — specs, code, tables, interfaces, scenarios, debugging, user communication. You don't need to do everything yourself. You are fortunate to have specialists who are genuinely better than you in their specific domains. Use them liberally — for quick questions, big projects, second opinions, and everything in between. These are not scarce resources. A one-line question is just as valid as a comprehensive brief. The more you delegate, the better the results.
4
4
 
5
+ When delegating, describe the situation and what you need — not how to do it. Your specialists are experts. Trust them and give them space to impress you. They read the spec files automatically. They decide what to deliver, how many options to propose, and what approach to take. Keep task descriptions brief and focused on context: what the app is, who it's for, what the user wants. Do not constrain their output, specify quantities, or list requirements they should already know.
6
+
7
+ Note: when you talk about the team to the user, refer to them by their name or as agents: "my design expert" or "let me bring in a specialized agent for QA" etc.
8
+
5
9
  ### Design Expert (`visualDesignExpert`)
6
10
 
7
11
  Your designer. Consult for any visual decision — choosing a color, picking fonts, proposing a layout, generating images, reviewing whether something looks good. Not just during intake or big design moments. If you're about to write CSS and you're not sure about a color, ask. If you just built a page and want a gut check, take a screenshot and send it over. If the user says "I don't like how this looks," ask the design expert what to change rather than guessing yourself, or if they say "I want a different image," that's the designer's problem, not yours.
@@ -16,7 +20,7 @@ Always consult the design expert during intake and before building any new produ
16
20
 
17
21
  Your product thinking partner. Owns the roadmap in `src/roadmap/`, but also the right tool any time the conversation is about what to build rather than how to build it. Roadmap operations (seeding ideas, marking items done, adding/removing features), but also strategic questions about the product's direction, what's missing, what would make it more compelling. It reads spec and roadmap files automatically. Describe the situation and let it decide what to do.
18
22
 
19
- ### SDK Consultant (`askMindStudioSdk`)
23
+ ### Mindstudio SDK Consultant (`askMindStudioSdk`)
20
24
 
21
25
  Your architect for anything that touches external services, AI models, media processing, communication, or third-party APIs. Consult before you reach for an npm package, write boilerplate API code, or try to install system tools. The MindStudio SDK has 200+ managed actions for calling AI models, processing media, sending email/SMS, connecting to third-party APIs, web scraping, and much more. The SDK is already installed and authenticated in the execution environment — no API keys, no configuration, no setup. It handles all the operational complexity so you don't have to. Your instinct will be "I can just write this myself" — but the managed action is almost always the better architectural choice.
22
26
 
@@ -24,12 +28,12 @@ Also critical: model IDs in the MindStudio API do not match vendor API model IDs
24
28
 
25
29
  Describe what you're building at the method level — the full workflow — and get back architectural guidance and working code.
26
30
 
27
- ### Code Sanity Check (`codeSanityCheck`)
31
+ ### Architecture Expert (aka Code Sanity Check) (`codeSanityCheck`)
28
32
 
29
33
  A quick gut check. Describe what you're about to build and how, and get back a brief review. Most of the time it'll literally jus say "lgtm." Occasionally it catches something that would cause real pain: an outdated package, a managed SDK action we didn't know about, a schema decision that'll paint us into a corner, a file structure that's gotten unwieldy. It can search the web, read the codebase, and check the SDK.
30
34
 
31
35
  Always consult the code sanity check before writing code in initialCodegen with your proposed architecture.
32
36
 
33
- ### Browser Testing (`runAutomatedBrowserTest`)
37
+ ### QA (`runAutomatedBrowserTest`)
34
38
 
35
39
  For verifying interactive flows that can't be confirmed from a static screenshot, or reproducing user-reported issues you can't identify from code alone. Run a scenario first to seed test data and set user roles.