@mindstudio-ai/remy 0.1.48 → 0.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/headless.js CHANGED
@@ -7,45 +7,8 @@ var __export = (target, all) => {
7
7
  // src/headless.ts
8
8
  import { createInterface } from "readline";
9
9
 
10
- // src/assets.ts
11
- import fs from "fs";
12
- import path from "path";
13
- var ROOT = findRoot(
14
- import.meta.dirname ?? path.dirname(new URL(import.meta.url).pathname)
15
- );
16
- function findRoot(start) {
17
- let dir = start;
18
- while (dir !== path.dirname(dir)) {
19
- if (fs.existsSync(path.join(dir, "package.json"))) {
20
- return dir;
21
- }
22
- dir = path.dirname(dir);
23
- }
24
- return start;
25
- }
26
- var ASSETS_BASE = fs.existsSync(path.join(ROOT, "dist", "prompt")) ? path.join(ROOT, "dist") : path.join(ROOT, "src");
27
- function assetPath(...segments) {
28
- return path.join(ASSETS_BASE, ...segments);
29
- }
30
- function readAsset(...segments) {
31
- const full = assetPath(...segments);
32
- try {
33
- return fs.readFileSync(full, "utf-8").trim();
34
- } catch {
35
- throw new Error(`Required asset missing: ${full}`);
36
- }
37
- }
38
- function readJsonAsset(fallback, ...segments) {
39
- const full = assetPath(...segments);
40
- try {
41
- return JSON.parse(fs.readFileSync(full, "utf-8"));
42
- } catch {
43
- return fallback;
44
- }
45
- }
46
-
47
10
  // src/logger.ts
48
- import fs2 from "fs";
11
+ import fs from "fs";
49
12
  var LEVELS = {
50
13
  error: 0,
51
14
  warn: 1,
@@ -94,11 +57,11 @@ function createLogger(module) {
94
57
  }
95
58
 
96
59
  // src/config.ts
97
- import fs3 from "fs";
98
- import path2 from "path";
60
+ import fs2 from "fs";
61
+ import path from "path";
99
62
  import os from "os";
100
63
  var log = createLogger("config");
101
- var CONFIG_PATH = path2.join(
64
+ var CONFIG_PATH = path.join(
102
65
  os.homedir(),
103
66
  ".mindstudio-local-tunnel",
104
67
  "config.json"
@@ -106,7 +69,7 @@ var CONFIG_PATH = path2.join(
106
69
  var DEFAULT_BASE_URL = "https://api.mindstudio.ai";
107
70
  function loadConfigFile() {
108
71
  try {
109
- const raw = fs3.readFileSync(CONFIG_PATH, "utf-8");
72
+ const raw = fs2.readFileSync(CONFIG_PATH, "utf-8");
110
73
  log.debug("Loaded config file", { path: CONFIG_PATH });
111
74
  return JSON.parse(raw);
112
75
  } catch (err) {
@@ -138,6 +101,43 @@ function resolveConfig(flags) {
138
101
  return { apiKey, baseUrl: baseUrl2 };
139
102
  }
140
103
 
104
+ // src/assets.ts
105
+ import fs3 from "fs";
106
+ import path2 from "path";
107
+ var ROOT = findRoot(
108
+ import.meta.dirname ?? path2.dirname(new URL(import.meta.url).pathname)
109
+ );
110
+ function findRoot(start) {
111
+ let dir = start;
112
+ while (dir !== path2.dirname(dir)) {
113
+ if (fs3.existsSync(path2.join(dir, "package.json"))) {
114
+ return dir;
115
+ }
116
+ dir = path2.dirname(dir);
117
+ }
118
+ return start;
119
+ }
120
+ var ASSETS_BASE = fs3.existsSync(path2.join(ROOT, "dist", "prompt")) ? path2.join(ROOT, "dist") : path2.join(ROOT, "src");
121
+ function assetPath(...segments) {
122
+ return path2.join(ASSETS_BASE, ...segments);
123
+ }
124
+ function readAsset(...segments) {
125
+ const full = assetPath(...segments);
126
+ try {
127
+ return fs3.readFileSync(full, "utf-8").trim();
128
+ } catch {
129
+ throw new Error(`Required asset missing: ${full}`);
130
+ }
131
+ }
132
+ function readJsonAsset(fallback, ...segments) {
133
+ const full = assetPath(...segments);
134
+ try {
135
+ return JSON.parse(fs3.readFileSync(full, "utf-8"));
136
+ } catch {
137
+ return fallback;
138
+ }
139
+ }
140
+
141
141
  // src/tools/_helpers/sidecar.ts
142
142
  var log2 = createLogger("sidecar");
143
143
  var baseUrl = null;
@@ -358,6 +358,10 @@ Current date/time: ${now}
358
358
  {{compiled/design.md}}
359
359
  </design>
360
360
 
361
+ <building_agent_interfaces>
362
+ {{compiled/agent-interfaces.md}}
363
+ </building_agent_interfaces>
364
+
361
365
  <media_cdn>
362
366
  {{compiled/media-cdn.md}}
363
367
  </media_cdn>
@@ -1099,7 +1103,7 @@ var presentPublishPlanTool = {
1099
1103
  var presentPlanTool = {
1100
1104
  definition: {
1101
1105
  name: "presentPlan",
1102
- description: "Present an implementation plan for user approval before making changes. Use this only for large, multi-step changes or when the user explicitly asks to see a plan. Most work should be done autonomously without a plan. Write a clear markdown summary of what you intend to do in plain language \u2014 describe the changes from the user's perspective, not as a list of files and code paths. If the user rejects with feedback, revise and present again.",
1106
+ description: "Present an implementation plan for user approval before making changes. Use this only for large, multi-step changes like new features, new interface types, or when the user explicitly asks to see a plan. Most work should be done autonomously without a plan. Write a clear markdown summary of what you intend to do in plain language \u2014 describe the changes from the user's perspective, not as a list of files and code paths. If the user rejects with feedback, revise and present again.",
1103
1107
  inputSchema: {
1104
1108
  type: "object",
1105
1109
  properties: {
@@ -1284,7 +1288,7 @@ var confirmDestructiveActionTool = {
1284
1288
  properties: {
1285
1289
  message: {
1286
1290
  type: "string",
1287
- description: "Explanation of what is about to happen and why confirmation is needed."
1291
+ description: "Explanation of what is about to happen and why confirmation is needed in natural language - avoid technical terms or mentions of variables, bash commands, or other system-level concepts.."
1288
1292
  },
1289
1293
  confirmLabel: {
1290
1294
  type: "string",
@@ -2241,8 +2245,8 @@ function startStatusWatcher(config) {
2241
2245
  }
2242
2246
  inflight = true;
2243
2247
  try {
2244
- const ctx = getContext();
2245
- if (!ctx.assistantText && !ctx.lastToolName && !ctx.userMessage) {
2248
+ const context = getContext();
2249
+ if (!context) {
2246
2250
  return;
2247
2251
  }
2248
2252
  const res = await fetch(url, {
@@ -2251,13 +2255,7 @@ function startStatusWatcher(config) {
2251
2255
  "Content-Type": "application/json",
2252
2256
  Authorization: `Bearer ${apiConfig.apiKey}`
2253
2257
  },
2254
- body: JSON.stringify({
2255
- assistantText: ctx.assistantText.slice(-500),
2256
- lastToolName: ctx.lastToolName,
2257
- lastToolResult: ctx.lastToolResult?.slice(-200),
2258
- onboardingState: ctx.onboardingState,
2259
- userMessage: ctx.userMessage?.slice(-200)
2260
- }),
2258
+ body: JSON.stringify({ context }),
2261
2259
  signal
2262
2260
  });
2263
2261
  if (!res.ok) {
@@ -2294,7 +2292,7 @@ function cleanMessagesForApi(messages) {
2294
2292
  if (msg.role === "user" && typeof msg.content === "string" && msg.content.startsWith("@@automated::")) {
2295
2293
  return {
2296
2294
  ...msg,
2297
- content: msg.content.replace(/^@@automated::[^@]*@@\n?/, "")
2295
+ content: msg.content.replace(/^@@automated::[^@]*@@[^\n]*\n?/, "")
2298
2296
  };
2299
2297
  }
2300
2298
  if (!Array.isArray(msg.content)) {
@@ -2382,12 +2380,23 @@ ${partial}` : "[INTERRUPTED] Agent was interrupted before producing output.",
2382
2380
  let currentToolNames = "";
2383
2381
  const statusWatcher = startStatusWatcher({
2384
2382
  apiConfig,
2385
- getContext: () => ({
2386
- assistantText: getPartialText(contentBlocks),
2387
- lastToolName: currentToolNames || void 0,
2388
- lastToolResult: lastToolResult || void 0,
2389
- userMessage: task
2390
- }),
2383
+ getContext: () => {
2384
+ const parts = [];
2385
+ if (task) {
2386
+ parts.push(`Task: ${task.slice(-200)}`);
2387
+ }
2388
+ const text = getPartialText(contentBlocks);
2389
+ if (text) {
2390
+ parts.push(`Assistant text: ${text.slice(-500)}`);
2391
+ }
2392
+ if (currentToolNames) {
2393
+ parts.push(`Tool: ${currentToolNames}`);
2394
+ }
2395
+ if (lastToolResult) {
2396
+ parts.push(`Tool result: ${lastToolResult.slice(-200)}`);
2397
+ }
2398
+ return parts.join("\n");
2399
+ },
2391
2400
  onStatus: (label) => emit2({ type: "status", message: label }),
2392
2401
  signal
2393
2402
  });
@@ -3413,6 +3422,7 @@ Each interface type invokes the same backend methods. Methods don't know which i
3413
3422
  - Telegram \u2014 message-handling bots
3414
3423
  - Email \u2014 inbound email processing
3415
3424
  - MCP \u2014 tool servers for AI assistants
3425
+ - Agent \u2014 conversational LLM interface with tool access to backend methods
3416
3426
 
3417
3427
  ## Backend
3418
3428
 
@@ -4476,6 +4486,7 @@ async function runTurn(params) {
4476
4486
  "editsFinished"
4477
4487
  ]);
4478
4488
  let lastCompletedTools = "";
4489
+ let lastCompletedInput = "";
4479
4490
  let lastCompletedResult = "";
4480
4491
  while (true) {
4481
4492
  let getOrCreateAccumulator2 = function(id, name) {
@@ -4500,13 +4511,30 @@ async function runTurn(params) {
4500
4511
  let currentToolNames = "";
4501
4512
  const statusWatcher = startStatusWatcher({
4502
4513
  apiConfig,
4503
- getContext: () => ({
4504
- assistantText: subAgentText || getTextContent(contentBlocks).slice(-500),
4505
- lastToolName: currentToolNames || getToolCalls(contentBlocks).filter((tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)).at(-1)?.name || lastCompletedTools || void 0,
4506
- lastToolResult: lastCompletedResult || void 0,
4507
- onboardingState,
4508
- userMessage
4509
- }),
4514
+ getContext: () => {
4515
+ const parts = [];
4516
+ if (userMessage) {
4517
+ parts.push(`User message: ${userMessage.slice(-200)}`);
4518
+ }
4519
+ if (onboardingState) {
4520
+ parts.push(`Build phase: ${onboardingState}`);
4521
+ }
4522
+ const text = subAgentText || getTextContent(contentBlocks).slice(-500);
4523
+ if (text) {
4524
+ parts.push(`Assistant text: ${text}`);
4525
+ }
4526
+ const toolName = currentToolNames || getToolCalls(contentBlocks).filter((tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)).at(-1)?.name || lastCompletedTools;
4527
+ if (toolName) {
4528
+ parts.push(`Tool: ${toolName}`);
4529
+ }
4530
+ if (lastCompletedInput) {
4531
+ parts.push(`Tool input: ${lastCompletedInput.slice(-300)}`);
4532
+ }
4533
+ if (lastCompletedResult) {
4534
+ parts.push(`Tool result: ${lastCompletedResult.slice(-200)}`);
4535
+ }
4536
+ return parts.join("\n");
4537
+ },
4510
4538
  onStatus: (label) => onEvent({ type: "status", message: label }),
4511
4539
  signal
4512
4540
  });
@@ -4824,7 +4852,11 @@ async function runTurn(params) {
4824
4852
  }
4825
4853
  }
4826
4854
  }
4827
- lastCompletedTools = toolCalls.filter((tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)).map((tc) => tc.name).join(", ");
4855
+ const lastNonExcluded = toolCalls.filter(
4856
+ (tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)
4857
+ );
4858
+ lastCompletedTools = lastNonExcluded.map((tc) => tc.name).join(", ");
4859
+ lastCompletedInput = JSON.stringify(lastNonExcluded.at(-1)?.input ?? {});
4828
4860
  lastCompletedResult = results.at(-1)?.result ?? "";
4829
4861
  for (const r of results) {
4830
4862
  state.messages.push({
@@ -4917,11 +4949,36 @@ ${partial}` : "[INTERRUPTED] Tool execution was stopped.";
4917
4949
  }
4918
4950
  };
4919
4951
 
4952
+ // src/automatedActions/resolve.ts
4953
+ var NON_ACTION_SENTINELS = /* @__PURE__ */ new Set(["background_results"]);
4954
+ function resolveAction(text) {
4955
+ const match = text.match(/^@@automated::(\w+)@@(.*)/s);
4956
+ if (!match) {
4957
+ return null;
4958
+ }
4959
+ const triggerName = match[1];
4960
+ if (NON_ACTION_SENTINELS.has(triggerName)) {
4961
+ return null;
4962
+ }
4963
+ let params = {};
4964
+ const remainder = match[2];
4965
+ if (remainder) {
4966
+ try {
4967
+ params = JSON.parse(remainder.split("\n")[0]);
4968
+ } catch {
4969
+ }
4970
+ }
4971
+ let body = readAsset("automatedActions", `${triggerName}.md`);
4972
+ body = body.replace(/^---[\s\S]*?---\s*/, "");
4973
+ for (const [key, value] of Object.entries(params)) {
4974
+ body = body.replaceAll(`{{${key}}}`, String(value));
4975
+ }
4976
+ return `@@automated::${triggerName}@@
4977
+ ${body}`;
4978
+ }
4979
+
4920
4980
  // src/headless.ts
4921
4981
  var log9 = createLogger("headless");
4922
- function loadActionPrompt(name) {
4923
- return readAsset("prompt", "actions", `${name}.md`);
4924
- }
4925
4982
  function emit(event, data, requestId) {
4926
4983
  const payload = { event, ...data };
4927
4984
  if (requestId) {
@@ -5223,15 +5280,11 @@ ${xmlParts}
5223
5280
  );
5224
5281
  }
5225
5282
  let userMessage = parsed.text ?? "";
5226
- const isCommand = !!parsed.runCommand;
5227
- const isHidden = isCommand || !!parsed.hidden;
5228
- if (parsed.runCommand === "sync") {
5229
- userMessage = loadActionPrompt("sync");
5230
- } else if (parsed.runCommand === "publish") {
5231
- userMessage = loadActionPrompt("publish");
5232
- } else if (parsed.runCommand === "buildFromInitialSpec") {
5233
- userMessage = loadActionPrompt("buildFromInitialSpec");
5283
+ const resolved = resolveAction(userMessage);
5284
+ if (resolved !== null) {
5285
+ userMessage = resolved;
5234
5286
  }
5287
+ const isHidden = resolved !== null || !!parsed.hidden;
5235
5288
  const onboardingState = parsed.onboardingState ?? "onboardingFinished";
5236
5289
  const system = buildSystemPrompt(
5237
5290
  onboardingState,
package/dist/index.js CHANGED
@@ -826,7 +826,7 @@ var init_presentPlan = __esm({
826
826
  presentPlanTool = {
827
827
  definition: {
828
828
  name: "presentPlan",
829
- description: "Present an implementation plan for user approval before making changes. Use this only for large, multi-step changes or when the user explicitly asks to see a plan. Most work should be done autonomously without a plan. Write a clear markdown summary of what you intend to do in plain language \u2014 describe the changes from the user's perspective, not as a list of files and code paths. If the user rejects with feedback, revise and present again.",
829
+ description: "Present an implementation plan for user approval before making changes. Use this only for large, multi-step changes like new features, new interface types, or when the user explicitly asks to see a plan. Most work should be done autonomously without a plan. Write a clear markdown summary of what you intend to do in plain language \u2014 describe the changes from the user's perspective, not as a list of files and code paths. If the user rejects with feedback, revise and present again.",
830
830
  inputSchema: {
831
831
  type: "object",
832
832
  properties: {
@@ -1029,7 +1029,7 @@ var init_confirmDestructiveAction = __esm({
1029
1029
  properties: {
1030
1030
  message: {
1031
1031
  type: "string",
1032
- description: "Explanation of what is about to happen and why confirmation is needed."
1032
+ description: "Explanation of what is about to happen and why confirmation is needed in natural language - avoid technical terms or mentions of variables, bash commands, or other system-level concepts.."
1033
1033
  },
1034
1034
  confirmLabel: {
1035
1035
  type: "string",
@@ -2179,8 +2179,8 @@ function startStatusWatcher(config) {
2179
2179
  }
2180
2180
  inflight = true;
2181
2181
  try {
2182
- const ctx = getContext();
2183
- if (!ctx.assistantText && !ctx.lastToolName && !ctx.userMessage) {
2182
+ const context = getContext();
2183
+ if (!context) {
2184
2184
  return;
2185
2185
  }
2186
2186
  const res = await fetch(url, {
@@ -2189,13 +2189,7 @@ function startStatusWatcher(config) {
2189
2189
  "Content-Type": "application/json",
2190
2190
  Authorization: `Bearer ${apiConfig.apiKey}`
2191
2191
  },
2192
- body: JSON.stringify({
2193
- assistantText: ctx.assistantText.slice(-500),
2194
- lastToolName: ctx.lastToolName,
2195
- lastToolResult: ctx.lastToolResult?.slice(-200),
2196
- onboardingState: ctx.onboardingState,
2197
- userMessage: ctx.userMessage?.slice(-200)
2198
- }),
2192
+ body: JSON.stringify({ context }),
2199
2193
  signal
2200
2194
  });
2201
2195
  if (!res.ok) {
@@ -2237,7 +2231,7 @@ function cleanMessagesForApi(messages) {
2237
2231
  if (msg.role === "user" && typeof msg.content === "string" && msg.content.startsWith("@@automated::")) {
2238
2232
  return {
2239
2233
  ...msg,
2240
- content: msg.content.replace(/^@@automated::[^@]*@@\n?/, "")
2234
+ content: msg.content.replace(/^@@automated::[^@]*@@[^\n]*\n?/, "")
2241
2235
  };
2242
2236
  }
2243
2237
  if (!Array.isArray(msg.content)) {
@@ -2329,12 +2323,23 @@ ${partial}` : "[INTERRUPTED] Agent was interrupted before producing output.",
2329
2323
  let currentToolNames = "";
2330
2324
  const statusWatcher = startStatusWatcher({
2331
2325
  apiConfig,
2332
- getContext: () => ({
2333
- assistantText: getPartialText(contentBlocks),
2334
- lastToolName: currentToolNames || void 0,
2335
- lastToolResult: lastToolResult || void 0,
2336
- userMessage: task
2337
- }),
2326
+ getContext: () => {
2327
+ const parts = [];
2328
+ if (task) {
2329
+ parts.push(`Task: ${task.slice(-200)}`);
2330
+ }
2331
+ const text = getPartialText(contentBlocks);
2332
+ if (text) {
2333
+ parts.push(`Assistant text: ${text.slice(-500)}`);
2334
+ }
2335
+ if (currentToolNames) {
2336
+ parts.push(`Tool: ${currentToolNames}`);
2337
+ }
2338
+ if (lastToolResult) {
2339
+ parts.push(`Tool result: ${lastToolResult.slice(-200)}`);
2340
+ }
2341
+ return parts.join("\n");
2342
+ },
2338
2343
  onStatus: (label) => emit2({ type: "status", message: label }),
2339
2344
  signal
2340
2345
  });
@@ -3515,6 +3520,7 @@ Each interface type invokes the same backend methods. Methods don't know which i
3515
3520
  - Telegram \u2014 message-handling bots
3516
3521
  - Email \u2014 inbound email processing
3517
3522
  - MCP \u2014 tool servers for AI assistants
3523
+ - Agent \u2014 conversational LLM interface with tool access to backend methods
3518
3524
 
3519
3525
  ## Backend
3520
3526
 
@@ -4718,6 +4724,7 @@ async function runTurn(params) {
4718
4724
  "editsFinished"
4719
4725
  ]);
4720
4726
  let lastCompletedTools = "";
4727
+ let lastCompletedInput = "";
4721
4728
  let lastCompletedResult = "";
4722
4729
  while (true) {
4723
4730
  let getOrCreateAccumulator2 = function(id, name) {
@@ -4742,13 +4749,30 @@ async function runTurn(params) {
4742
4749
  let currentToolNames = "";
4743
4750
  const statusWatcher = startStatusWatcher({
4744
4751
  apiConfig,
4745
- getContext: () => ({
4746
- assistantText: subAgentText || getTextContent(contentBlocks).slice(-500),
4747
- lastToolName: currentToolNames || getToolCalls(contentBlocks).filter((tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)).at(-1)?.name || lastCompletedTools || void 0,
4748
- lastToolResult: lastCompletedResult || void 0,
4749
- onboardingState,
4750
- userMessage
4751
- }),
4752
+ getContext: () => {
4753
+ const parts = [];
4754
+ if (userMessage) {
4755
+ parts.push(`User message: ${userMessage.slice(-200)}`);
4756
+ }
4757
+ if (onboardingState) {
4758
+ parts.push(`Build phase: ${onboardingState}`);
4759
+ }
4760
+ const text = subAgentText || getTextContent(contentBlocks).slice(-500);
4761
+ if (text) {
4762
+ parts.push(`Assistant text: ${text}`);
4763
+ }
4764
+ const toolName = currentToolNames || getToolCalls(contentBlocks).filter((tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)).at(-1)?.name || lastCompletedTools;
4765
+ if (toolName) {
4766
+ parts.push(`Tool: ${toolName}`);
4767
+ }
4768
+ if (lastCompletedInput) {
4769
+ parts.push(`Tool input: ${lastCompletedInput.slice(-300)}`);
4770
+ }
4771
+ if (lastCompletedResult) {
4772
+ parts.push(`Tool result: ${lastCompletedResult.slice(-200)}`);
4773
+ }
4774
+ return parts.join("\n");
4775
+ },
4752
4776
  onStatus: (label) => onEvent({ type: "status", message: label }),
4753
4777
  signal
4754
4778
  });
@@ -5066,7 +5090,11 @@ async function runTurn(params) {
5066
5090
  }
5067
5091
  }
5068
5092
  }
5069
- lastCompletedTools = toolCalls.filter((tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)).map((tc) => tc.name).join(", ");
5093
+ const lastNonExcluded = toolCalls.filter(
5094
+ (tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)
5095
+ );
5096
+ lastCompletedTools = lastNonExcluded.map((tc) => tc.name).join(", ");
5097
+ lastCompletedInput = JSON.stringify(lastNonExcluded.at(-1)?.input ?? {});
5070
5098
  lastCompletedResult = results.at(-1)?.result ?? "";
5071
5099
  for (const r of results) {
5072
5100
  state.messages.push({
@@ -5295,6 +5323,10 @@ Current date/time: ${now}
5295
5323
  {{compiled/design.md}}
5296
5324
  </design>
5297
5325
 
5326
+ <building_agent_interfaces>
5327
+ {{compiled/agent-interfaces.md}}
5328
+ </building_agent_interfaces>
5329
+
5298
5330
  <media_cdn>
5299
5331
  {{compiled/media-cdn.md}}
5300
5332
  </media_cdn>
@@ -5502,15 +5534,47 @@ ${partial}` : "[INTERRUPTED] Tool execution was stopped.";
5502
5534
  }
5503
5535
  });
5504
5536
 
5537
+ // src/automatedActions/resolve.ts
5538
+ function resolveAction(text) {
5539
+ const match = text.match(/^@@automated::(\w+)@@(.*)/s);
5540
+ if (!match) {
5541
+ return null;
5542
+ }
5543
+ const triggerName = match[1];
5544
+ if (NON_ACTION_SENTINELS.has(triggerName)) {
5545
+ return null;
5546
+ }
5547
+ let params = {};
5548
+ const remainder = match[2];
5549
+ if (remainder) {
5550
+ try {
5551
+ params = JSON.parse(remainder.split("\n")[0]);
5552
+ } catch {
5553
+ }
5554
+ }
5555
+ let body = readAsset("automatedActions", `${triggerName}.md`);
5556
+ body = body.replace(/^---[\s\S]*?---\s*/, "");
5557
+ for (const [key, value] of Object.entries(params)) {
5558
+ body = body.replaceAll(`{{${key}}}`, String(value));
5559
+ }
5560
+ return `@@automated::${triggerName}@@
5561
+ ${body}`;
5562
+ }
5563
+ var NON_ACTION_SENTINELS;
5564
+ var init_resolve = __esm({
5565
+ "src/automatedActions/resolve.ts"() {
5566
+ "use strict";
5567
+ init_assets();
5568
+ NON_ACTION_SENTINELS = /* @__PURE__ */ new Set(["background_results"]);
5569
+ }
5570
+ });
5571
+
5505
5572
  // src/headless.ts
5506
5573
  var headless_exports = {};
5507
5574
  __export(headless_exports, {
5508
5575
  startHeadless: () => startHeadless
5509
5576
  });
5510
5577
  import { createInterface } from "readline";
5511
- function loadActionPrompt(name) {
5512
- return readAsset("prompt", "actions", `${name}.md`);
5513
- }
5514
5578
  function emit(event, data, requestId) {
5515
5579
  const payload = { event, ...data };
5516
5580
  if (requestId) {
@@ -5812,15 +5876,11 @@ ${xmlParts}
5812
5876
  );
5813
5877
  }
5814
5878
  let userMessage = parsed.text ?? "";
5815
- const isCommand = !!parsed.runCommand;
5816
- const isHidden = isCommand || !!parsed.hidden;
5817
- if (parsed.runCommand === "sync") {
5818
- userMessage = loadActionPrompt("sync");
5819
- } else if (parsed.runCommand === "publish") {
5820
- userMessage = loadActionPrompt("publish");
5821
- } else if (parsed.runCommand === "buildFromInitialSpec") {
5822
- userMessage = loadActionPrompt("buildFromInitialSpec");
5879
+ const resolved = resolveAction(userMessage);
5880
+ if (resolved !== null) {
5881
+ userMessage = resolved;
5823
5882
  }
5883
+ const isHidden = resolved !== null || !!parsed.hidden;
5824
5884
  const onboardingState = parsed.onboardingState ?? "onboardingFinished";
5825
5885
  const system = buildSystemPrompt(
5826
5886
  onboardingState,
@@ -5964,7 +6024,6 @@ var log9;
5964
6024
  var init_headless = __esm({
5965
6025
  "src/headless.ts"() {
5966
6026
  "use strict";
5967
- init_assets();
5968
6027
  init_logger();
5969
6028
  init_config();
5970
6029
  init_prompt4();
@@ -5972,6 +6031,7 @@ var init_headless = __esm({
5972
6031
  init_agent();
5973
6032
  init_session();
5974
6033
  init_toolRegistry();
6034
+ init_resolve();
5975
6035
  log9 = createLogger("headless");
5976
6036
  }
5977
6037
  });
@@ -0,0 +1,184 @@
1
+ # Building Agent Interfaces
2
+
3
+ Guidance for designing conversational AI agents and their frontends. An agent interface pairs an LLM (with per-user-scoped/authenticated access to app methods as tools, handled by platform automatically) with a chat UI. The developer authors the agent's character in MSFM (`src/interfaces/agent.md`); you compile it into a system prompt and tool descriptions (`dist/interfaces/agent/`).
4
+
5
+ ## Agent Design Principles
6
+
7
+ ### System prompts define character, not procedures
8
+
9
+ A good system prompt establishes who the agent is — personality, tone, judgment style, the kind of person they sound like. It doesn't enumerate every possible interaction or restate what tools already describe.
10
+
11
+ Short and opinionated beats long and comprehensive. "Sounds like a sharp, organized friend — brief by default" gives the model more to work with than a page of behavioral rules. Define constraints through character, not checklists. Let the model's judgment work.
12
+
13
+ #### System Prompt Specifics
14
+ Always include a note like "## Tool Usage
15
+ - When multiple tool calls are independent, make them all in a single turn. Searching for three different products, or fetching two reference sites: batch them instead of doing one per turn." to help the model know it can run tools in parallel
16
+ - The user's name and current role(s) at the time of message, if any, will be automatically appended to the end of every system prompt at runtime like:
17
+
18
+ ```
19
+ ## Current User
20
+ Name: Jane Smith
21
+ Roles: editor
22
+ ```
23
+ - Unless the user specifies otherwise, always include a note that the agent can use markdown in responses (since the chat UI renders it) and should avoid using em dashes and emojis in its responses.
24
+
25
+ ### Tool descriptions are the most important artifact
26
+
27
+ The system prompt says *who* the agent is. The tool descriptions say *what it can do*. A great tool description means the agent uses the tool correctly without explicit instruction. Do not be overly precise or micromanage. Your goal with tool descriptions is to provide context and faming- trust that the model is intelligent enough to fill in the gaps.. Each `tools/*.md` file should cover:
28
+
29
+ - **When to use** this tool (and when NOT to — e.g. "NOT for marking complete, use toggle-todo")
30
+ - **Parameter guidance** beyond the schema — what makes a good value, when to include optional fields, what to skip
31
+ - **Return value** and how to present results to the user
32
+
33
+ ### Not every method should be a tool
34
+
35
+ Expose methods that serve the conversational flow. Internal helpers, admin-only methods, and batch operations often don't belong in the agent's toolset. A focused set of well-described tools performs better than many underdocumented ones.
36
+
37
+ Think about what the user would actually say in conversation. If a method only makes sense triggered by another system (cron, webhook) or through a form UI, it probably shouldn't be an agent tool.
38
+
39
+ ### The MSFM spec body drives compilation
40
+
41
+ The spec (`src/interfaces/agent.md`) is the human-editable source. Write it for humans — voice, personality, capabilities, behavioral rules, edge cases. The body should read like a character brief, not a technical manual.
42
+
43
+ Model ID and config belong in the frontmatter, not the prose. The prose focuses on judgment calls: "When a user adds a task, consider whether it would benefit from a note. For vague or complex tasks, attach guidance. For simple tasks, skip it."
44
+
45
+ Use MSFM annotations for implementation-level notes that the compiler needs but the human reader doesn't — same pattern as app specs.
46
+
47
+ When defining tools for multi-user apps with access restrictions, be sure to note the roles that are allowed or disallowed from accessing the tool, as well as any other restrictions. The actual tool invocation will be rejected at runtime if the requesting user is not allowed to access the underlying method, but defining this early allows the model to gate permissions cleanly rather than vomiting an error when the user tries to do something they're not permissioned for.
48
+
49
+ ### Anti-patterns
50
+
51
+ - Avoid system prompts that restate tool schemas ("You have a tool called createTodo that takes a title and optional aiNotes...")
52
+ - Avoid generic personalities ("You are a helpful assistant") — every agent should have a distinct voice, this is often the most fun part for the user building the agent - lean in and help them enjoy bringing their agent to life!
53
+ - Avoid exposing all methods without considering conversational fit
54
+
55
+ ## Compiling the Agent Spec
56
+
57
+ When building the `dist/interfaces/agent/`, consider the agent spec, as well as the larger context of the app and especially any `@brand/` guidelines. The agent should feel as though cut from the same cloth as the rest of the app - it is simply the same backend application projected into a different modality. Take care to make it consistent with the user's app, and then output:
58
+
59
+ **`system.md`** — compiled from the spec body. Should feel like a character brief: who the agent is, how they talk, what they care about, key behavioral rules.
60
+
61
+ **`tools/*.md`** — one file per exposed method. Rich markdown with when-to-use, examples, edge cases, return value guidance. These are what make the agent actually work well.
62
+
63
+ **`agent.json`** — ties it together. Model config from frontmatter, paths to system prompt and tool files, optional `webInterfacePath`.
64
+
65
+ ## Chat UI Design
66
+
67
+ When the agent has a web frontend (via `webInterfacePath`), the chat UI is a page within the web interface.
68
+
69
+ ### Frontend SDK: `createAgentChatClient()`
70
+
71
+ The `@mindstudio-ai/interface` package provides `createAgentChatClient()` for thread management and streaming chat. All agent chat UIs should use this — don't build raw fetch/SSE handling.
72
+
73
+ **Thread management:**
74
+
75
+ ```ts
76
+ import { createAgentChatClient } from '@mindstudio-ai/interface';
77
+
78
+ const chat = createAgentChatClient();
79
+
80
+ const thread = await chat.createThread();
81
+ const { threads, nextCursor } = await chat.listThreads();
82
+ const full = await chat.getThread(thread.id);
83
+ await chat.updateThread(thread.id, 'New title');
84
+ await chat.deleteThread(thread.id);
85
+ ```
86
+
87
+ **Sending messages (streaming):**
88
+
89
+ `sendMessage` streams the agent's response via SSE. Use named callbacks for common events:
90
+
91
+ ```ts
92
+ const response = chat.sendMessage(threadId, content, {
93
+ // Text deltas — append, don't replace
94
+ onText: (delta) => setText((prev) => prev + delta),
95
+
96
+ // Extended thinking (also deltas)
97
+ onThinking: (delta) => setThinking((prev) => prev + delta),
98
+ onThinkingComplete: (thinking, signature) => setThinking(''),
99
+
100
+ // Tool execution
101
+ onToolCallStart: (id, name) => { },
102
+ onToolCallResult: (id, output) => { },
103
+
104
+ // Errors
105
+ onError: (error) => console.error(error),
106
+ });
107
+
108
+ // Resolves when stream completes
109
+ const { stopReason, usage } = await response;
110
+
111
+ // Cancel mid-stream
112
+ response.abort();
113
+ ```
114
+
115
+ **Attachments:**
116
+
117
+ Send images or documents alongside a message. Upload via `platform.uploadFile()` first, then pass CDN URLs as the 4th argument:
118
+
119
+ ```ts
120
+ const url = await platform.uploadFile(file);
121
+
122
+ chat.sendMessage(threadId, "What's in this document?", {
123
+ onText: (delta) => setText((prev) => prev + delta),
124
+ }, {
125
+ attachments: [url],
126
+ });
127
+ ```
128
+
129
+ Images (`i.mscdn.ai`) are sent as vision input. Documents (`f.mscdn.ai`) have text extracted server-side and included in context. Attachments are preserved in thread history.
130
+
131
+ **Key points:**
132
+ - `onText` and `onThinking` receive deltas (append to state, don't replace)
133
+ - `sendMessage` returns an `AbortablePromise` — a Promise with `.abort()`. Also accepts `signal` in callbacks for `AbortController` support
134
+ - Tool call events (`onToolCallStart`, `onToolCallResult`) are available for showing progress indicators
135
+ - Thread title is auto-generated after the first exchange
136
+
137
+ ### Layout
138
+
139
+ Ask `visualDesignExpert` for ideas about how to design the chat UI in a way that is appropriate and unique to the app.
140
+
141
+ User messages visually distinct from assistant messages (right-aligned, different background, or both). Keep it clean — no avatars unless they add meaning. Generous vertical spacing between messages so the conversation breathes. Use clean, beautiful animation where it is additive.
142
+
143
+ ### Streaming & Markdown
144
+
145
+ Display tokens as they arrive. No loading spinners that block the whole view — show partial text immediately. A subtle cursor or animation at the streaming edge signals "still generating." The user should be reading, not waiting.
146
+
147
+ Use `streamdown` for rendering markdown from streaming text. It handles unterminated blocks gracefully (the core problem with react-markdown during mid-stream rendering), includes Shiki syntax highlighting for code blocks, and supports KaTeX math and Mermaid diagrams. Install the base package and tree-shake plugins as needed (`@streamdown/code`, `@streamdown/math`, `@streamdown/mermaid`).
148
+
149
+ Pay attention to streaming text animation — fast token delivery can look jarring, and slow delivery can look laggy. Throttling renders to ~50-100ms batches smooths things out.
150
+
151
+ It is critical to never introduce layout shift or jarring transitions when dealing with responses. Messages should cleanly and smoothly transition between thinking, streaming, and completed states. Tool use should fit beautifully within the conversation and should never cause abrupt layout shift.
152
+
153
+ ### Scrolling
154
+
155
+ Use `use-stick-to-bottom` (`github.com/stackblitz-labs/use-stick-to-bottom`) for auto-scroll behavior. It handles the standard chat scroll contract: stick to bottom as new content streams in, but stop following if the user scrolls up. Don't hand-roll this — the edge cases (momentum scrolling, resize, streaming while scrolled up) are fiddly.
156
+
157
+ ### Optimistic messages
158
+
159
+ When the user sends a message, add it to the conversation immediately — don't wait for the server to acknowledge. Show a thinking/typing indicator in the assistant's response area right away so the UI feels instant. The indicator should appear the moment the user hits send, not when the first token arrives.
160
+
161
+ ### Tool calls
162
+
163
+ Show tool activity in the chat as a compact, inline status that appears when `onToolCallStart` fires and resolves when `onToolCallResult` arrives. Never show raw JSON, tool IDs, or internal details — just a human-readable description of what's happening.
164
+
165
+ ### Input area
166
+
167
+ Fixed at the bottom. Auto-growing textarea using `react-textarea-autosize`, not a single-line input. Clear send affordance (button or Enter). Disabled while the agent is streaming, with a visible stop/cancel button. Placeholder text that reflects the agent's personality, not generic "Type a message..."
168
+
169
+ ### Empty state
170
+
171
+ The first screen should invite conversation. A greeting from the agent, a few suggested prompts, or a concise description of what the agent can help with can go a long way. Always make sure they are optional though - the user needs to be able to chat directly if they want. Match the agent's voice — a casual todo assistant and a formal legal review agent should feel completely different from the first screen.
172
+
173
+ ### Mobile
174
+
175
+ Chat is inherently mobile-friendly — lean into it. Pay attention to viewport sizing on mobile as the virtual keyboard changes the available height.
176
+
177
+ ### Respect the brand
178
+
179
+ The chat UI uses the app's design system — colors, typography, voice from `@brand/`. Apply the same design standards as any other page in the web interface.
180
+
181
+ ### Anti-patterns
182
+
183
+ - Avoid designs that look like dated messaging apps from 2015
184
+ - Avoid robotic empty states ("Hello! I'm your AI assistant. How can I help you today?")
@@ -76,6 +76,8 @@ auth.name;
76
76
  auth.email;
77
77
  ```
78
78
 
79
+ For apps with an agent interface, the SDK also provides `createAgentChatClient()` for thread management and streaming chat. See the "Building Agent Interfaces" section for usage details.
80
+
79
81
  The project uses `"jsx": "react-jsx"` (automatic JSX transform) — do not `import React from 'react'`. Only import the specific hooks and types you need (e.g., `import { useState, useEffect } from 'react'`).
80
82
 
81
83
  On deploy, the platform runs `npm install && npm run build` in the web directory and hosts the output on CDN.
@@ -230,6 +232,74 @@ Expose methods as AI tools.
230
232
 
231
233
  Each listed method becomes an MCP tool. Method names and descriptions from the manifest are used as tool names and descriptions.
232
234
 
235
+ ## Agent (Conversational Interface)
236
+
237
+ A conversational interface where an LLM has access to the app's methods as tools. Unlike MCP (which exposes methods for external agents), the agent interface IS the agent — it has its own personality, system prompt, and model config, and orchestrates tool calls against the app's methods internally.
238
+
239
+ ### Spec: `src/interfaces/agent.md`
240
+
241
+ The human-readable spec. Frontmatter contains structured fields; the prose body is the behavioral spec — voice, personality, capabilities, rules — written in MSFM.
242
+
243
+ ```yaml
244
+ ---
245
+ name: Todo Assistant
246
+ model: {"model": "claude-4-5-haiku", "temperature": 0.5, "maxResponseTokens": 15000}
247
+ description: Conversational agent that helps users manage their to-do list.
248
+ ---
249
+ ```
250
+
251
+ Frontmatter fields:
252
+ - `name` — agent display name
253
+ - `model` — JSON string with `model` (MindStudio model ID), `temperature`, `maxResponseTokens`, and optional `config` (model-specific settings like `reasoning`, `tools`, etc.). Use `askMindStudioSdk` to look up available model IDs and their config options when setting the model ID. The user's UI will have a nice visual picker to allow them to change it later, so only validate model when you're setting - otherwise assume this value to be correct if it changes.
254
+ - `description` — one-liner for agent card/listing
255
+
256
+ The prose body contains sections like Voice & Personality, Capabilities, Behavior — whatever structure serves the agent's character. This is compiled into the system prompt and tool descriptions.
257
+
258
+ ### Compiled Output: `dist/interfaces/agent/`
259
+
260
+ ```
261
+ dist/interfaces/agent/
262
+ ├── agent.json ← config the platform reads
263
+ ├── system.md ← compiled system prompt
264
+ └── tools/
265
+ ├── createTodo.md ← rich tool description per method
266
+ ├── listTodos.md
267
+ └── ...
268
+ ```
269
+
270
+ ### Config (`agent.json`)
271
+
272
+ ```json
273
+ {
274
+ "agent": {
275
+ "model": "claude-4-5-haiku",
276
+ "temperature": 0.5,
277
+ "maxTokens": 15000,
278
+ "systemPrompt": "system.md",
279
+ "tools": [
280
+ { "method": "create-todo", "description": "tools/createTodo.md" },
281
+ { "method": "list-todos", "description": "tools/listTodos.md" }
282
+ ],
283
+ "webInterfacePath": "/chat"
284
+ }
285
+ }
286
+ ```
287
+
288
+ | Field | Description |
289
+ |-------|-------------|
290
+ | `model` | MindStudio model ID (e.g. `claude-4-5-haiku`, `claude-4-6-sonnet`) |
291
+ | `temperature` | Model temperature |
292
+ | `maxTokens` | Max response tokens |
293
+ | `systemPrompt` | Relative path to the compiled system prompt markdown file |
294
+ | `tools` | Array of tool entries — `method` references a method `id` from the manifest, `description` is a relative path to a markdown file with rich tool docs (when to use, examples, edge cases, parameter guidance) |
295
+ | `webInterfacePath` | Optional. If the app has a web interface with a chat page, this path tells the IDE where to show the preview. Otherwise the agent is accessed via API. |
296
+
297
+ ### Manifest Declaration
298
+
299
+ ```json
300
+ { "type": "agent", "path": "dist/interfaces/agent/agent.json" }
301
+ ```
302
+
233
303
  ## Manifest Declaration
234
304
 
235
305
  Each interface is declared in `mindstudio.json`:
@@ -244,7 +314,8 @@ Each interface is declared in `mindstudio.json`:
244
314
  { "type": "telegram", "path": "dist/interfaces/telegram/interface.json" },
245
315
  { "type": "webhook", "path": "dist/interfaces/webhook/interface.json" },
246
316
  { "type": "email", "path": "dist/interfaces/email/interface.json" },
247
- { "type": "mcp", "path": "dist/interfaces/mcp/interface.json" }
317
+ { "type": "mcp", "path": "dist/interfaces/mcp/interface.json" },
318
+ { "type": "agent", "path": "dist/interfaces/agent/agent.json" }
248
319
  ]
249
320
  }
250
321
  ```
@@ -89,7 +89,7 @@
89
89
 
90
90
  | Field | Type | Required | Description |
91
91
  |-------|------|----------|-------------|
92
- | `type` | `string` | Yes | One of: `web`, `api`, `discord`, `telegram`, `cron`, `webhook`, `email`, `mcp` |
92
+ | `type` | `string` | Yes | One of: `web`, `api`, `discord`, `telegram`, `cron`, `webhook`, `email`, `mcp`, `agent` |
93
93
  | `path` | `string` | No | Path to the interface config file |
94
94
  | `config` | `object` | No | Inline config (alternative to a file) |
95
95
  | `enabled` | `boolean` | No | Default `true`. Set `false` to skip during build. |
@@ -22,6 +22,7 @@ my-app/
22
22
  assets/ logos, icons
23
23
  web.md web UI spec
24
24
  api.md API conventions
25
+ agent.md agent personality and behavior spec
25
26
  cron.md scheduled job descriptions
26
27
  roadmap/ feature roadmap (one file per item, type: roadmap)
27
28
 
@@ -46,6 +47,10 @@ my-app/
46
47
  webhook/interface.json webhook config
47
48
  email/interface.json email config
48
49
  mcp/interface.json MCP config
50
+ agent/ agent interface
51
+ agent.json agent config
52
+ system.md compiled system prompt
53
+ tools/ tool descriptions (one .md per method)
49
54
  ```
50
55
 
51
56
  ## What Goes Where
@@ -19,7 +19,7 @@ The scaffold starts with these spec files that cover the full picture of the app
19
19
  - **`src/interfaces/@brand/voice.md`** — voice and terminology: tone, error messages, word choices
20
20
  - **`src/roadmap/`** — feature roadmap. One file per feature (`type: roadmap`). See "Roadmap" below.
21
21
 
22
- Start from these and extend as needed. Add interface specs for other interface types (`api.md`, `cron.md`, etc.) if the app uses them. Split `app.md` into multiple files if the domain is complex. The agent uses the entire `src/` folder as compilation context, so organize however serves clarity.
22
+ Start from these and extend as needed. Add interface specs for other interface types (`api.md`, `cron.md`, `agent.md`, etc.) if the app uses them. Split `app.md` into multiple files if the domain is complex. The agent uses the entire `src/` folder as compilation context, so organize however serves clarity.
23
23
 
24
24
  Users often care about look and feel as much as (or more than) underlying data structures. Don't treat the brand and interface specs as an afterthought — for many users, the visual identity and voice are the first things they want to get right.
25
25
 
@@ -18,12 +18,15 @@
18
18
  ## Communication
19
19
  The user can already see your tool calls, so most of your work is visible without narration. Focus text output on three things:
20
20
  - **Decisions that need input.** Questions, tradeoffs, ambiguity that blocks progress.
21
- - **Milestones.** What you built, what changed. Summarize in plain language rather than listing a per-file changelog.
21
+ - **Milestones.** What you built, what changed. Summarize in plain language rather than listing a per-file changelog. If you've just built something, help the user understand how to use it, especially if they're seeing an MVP or new feature for the first time. For complex things, offer to walk them through a demo using `runAutomatedBrowserTest`
22
22
  - **Errors or blockers.** Something failed or the approach needs to shift.
23
23
 
24
24
  Skip the rest: narrating what you're about to do, restating what the user asked, explaining tool calls they can already see.
25
25
 
26
- Style:
26
+ ### Automated messages
27
+ You will occasionally receive automated messages prefixed with `@@automated_message@@` - these are triggered by things like background agents returning their work, or by the user clicking a button in the UI (e.g., the user might click a "Build Feature" button in the product roadmap UI, and you will receive a message detailing what they want to build). You will be able to see these messages in your chat history but the user will not see them, so acknowledge them appropriately and then perform the requested work.
28
+
29
+ ## Style
27
30
  - Your messages are rendered as markdown. Use formatting (headers, bold, lists, code blocks) when it helps readability. You can also include images using `![alt](url)` — use this to show the user screenshots, generated images, or other visual references inline in your messages.
28
31
  - Keep language accessible. Describe what the app *does*, not how it's implemented, unless the user demonstrates technical fluency.
29
32
  - Always use full paths relative to the project root when mentioning files (`dist/interfaces/web/src/App.tsx`, not `App.tsx`). Paths will be rendered as clickable links for the user.
@@ -6,9 +6,10 @@ The user just arrived at a blank project with a full-screen chat. They may have
6
6
  Don't list features. Frame what MindStudio does through the lens of what the user wants. A MindStudio app is a managed TypeScript project with a backend, optional database, optional auth, and one or more interfaces. The key is that it's extremely flexible — here are some examples of what people build:
7
7
 
8
8
  - **Business tools** — dashboards, admin panels, approval workflows, data entry apps, internal tools with role-based access
9
- - **AI-powered apps** — chatbots, content generators, document processors, image/video tools, AI agents that take actions (send emails, update CRMs, post to Slack)
9
+ - **AI-powered apps** — chatbots, content generators, document processors, image/video tools, conversational agents with tool access, AI agents that take actions (send emails, update CRMs, post to Slack)
10
10
  - **Automations with no UI** — a set of cron jobs that scrape websites and send alerts, a webhook handler that syncs data between services, an email processor that triages inbound support requests
11
- - **Bots** Discord slash-command bots, Telegram bots, MCP tool servers for AI assistants
11
+ - **Conversational AI Agents** - Full conversational AI agents with custom frontends and access to the app's methods as tools. Make all or only a subset of app functionality available - manage access to methods on a per-user basis; fully custom chat UIs, use any model you want, including Gemini, GPT, Anthropic Claude, and any of the hundreds of other models MindStudio supports automatically.
12
+ - **Bots & agent tools** — Discord slash-command bots, Telegram bots, MCP tool servers
12
13
  - **Creative/interactive projects** — games with Three.js or p5.js, interactive visualizations, generative art, portfolio sites with dynamic backends
13
14
  - **API services** — backend logic exposed as REST endpoints for other systems to consume
14
15
  - **Simple static sites** — no backend needed, just a web interface with a build step
@@ -38,7 +38,7 @@ Always consult the code sanity check before writing code in initialCodegen with
38
38
 
39
39
  ### QA (`runAutomatedBrowserTest`)
40
40
 
41
- For verifying complex stateful interactions: multi-step form submissions, auth flows, real-time updates, flows that require specific data/role setup. This spins up a full chrome browser automation — it's heavyweight. Do not use it for basic rendering or navigation checks. If you can verify something with a screenshot or by reading the code, do that instead. Run a scenario first to seed test data and set user roles.
41
+ For verifying complex stateful interactions: multi-step form submissions, auth flows, real-time updates, flows that require specific data/role setup. This spins up a full chrome browser automation — it's heavyweight. Do not use it for basic rendering or navigation checks. If you can verify something with a screenshot or by reading the code, do that instead. Run a scenario first to seed test data and set user roles. The user is able to watch QA work on their screen via a live browser preview - the cursor will move, type, etc - so you can also use this to demo functionality to the user and help them understand how to use their app.
42
42
 
43
43
  ### Background Execution
44
44
 
@@ -60,7 +60,7 @@ When you receive background results:
60
60
  #### When You Are Allowed to Background
61
61
 
62
62
  You can only background the following two tasks, unless the user specifically asks you to do work in the background:
63
- - `productVision` seeding the intiial roadmap after writing the spec for the first time. This task takes a while and we can allow the user to continue building while it happens in the background
63
+ - `productVision` seeding the intiial roadmap after writing the spec for the first time. This task takes a while and we can allow the user to continue building while it happens in the background.
64
64
  - After writing the spec, once you have finalized the shape of the app, ask `visualDesignExpert` to create an "iphone app store" style icon for the app, then set it with `setProjectMetadata({ iconUrl: ... })`
65
65
 
66
66
  Do not background any other tasks.
@@ -23,6 +23,8 @@ These are things we already know about and have decided to accept:
23
23
  - framer-motion
24
24
  - styled-components
25
25
  - @tabler/icons-react
26
+ - streamdown
27
+ - react-textarea-autosize
26
28
  - Preferences:
27
29
  - use [wouter](https://github.com/molefrog/wouter) for React routing instead of reaching for react-router
28
30
 
@@ -7,7 +7,7 @@ There are two categories of animation and you should think of them separately:
7
7
  - Design animations: think beautiful layout reveals, dramatic loading and success states for user onboarding, beautiful scroll-driven animations on a landing page. These are the place to show off - and if you're showing off you better get it right. Anything that looks dated or janky will be disappointing to the user. Done correctly, these animations are powerful and transformative - and when the design calls for it, you should take a risk and suggest something big, bold, and creative. Remember, the user can always modify or change things later. It's better to dream big and walk it back than to deliver something generic or bland.
8
8
 
9
9
  ### Patterns to Use
10
- - CSS scroll-driven animations (`animation-timeline: scroll()` / `view()`) — native, off main thread, even though there is still a little lag in browser support we should always be using this when we need scroll-driven animations.
10
+ - CSS scroll-driven animations (`animation-timeline: scroll()` / `view()`) — native, off main thread, even though there is still a little lag in browser support we should always be using this when we need scroll-driven animations. Scroll driven animations that animate based on scrollport are very slick and look beautiful in landing pages. Think about how you can use them.
11
11
  - Spring physics for natural-feeling motion
12
12
  - Purposeful micro-interactions — scaling, color shifts, depth changes on hover/click
13
13
  - Entrance reveals — content animating when it enters the view - can be powerful, but can very easily feel cheap if it is just sections of a page animating in on scroll, for example. Be very thoughtful and intentional when animating in this way.
@@ -20,7 +20,7 @@ Then, think about the layout and UI patterns - these are the core of the user's
20
20
 
21
21
  Every recommendation must be immediately usable in production. Font names with CSS URLs. Color palettes as hex values. Image URLs that resolve. No placeholders, no "you could try..." The developer interprets your results, so focus on being useful rather than rigidly formatted.
22
22
 
23
- When giving longer responses like full design plans, be sure to include implementation notes specific to this project for things the developer should pay extra close attention to as it builds to avoid any gotchas or oversights. The developer has a lot on their plate and we have a chance to help them out. Reference <app_interface_design_notes> as a resource for this information.
23
+ When giving longer responses like full design plans, be sure to include implementation notes specific to this project for things the developer should pay extra close attention to as it builds to avoid any gotchas or oversights. The developer has a lot on their plate and we have a chance to help them out. Reference <app_interface_design_notes> as a resource for this information. The developer doesn't have access to your internal notes and references, so be explicit when referring to things, don't just say "Reference 11" or something like that, as they'll have no idea what that means.
24
24
 
25
25
  Important: Assume the developer has a terrible sense of design. Therefore, you must be direct and unambiguous, and be prescriptive about design choices - don't leave room for assumption or interpretation. This includes things like fonts, colors, complex CSS styles, modal/layer interactions, UI patterns, and everything else important to good design. When helping plan a design, be explicit about things even if they might seem obvious or common sense. The developer is highly technical and that is the best language in which to communicate precisely with them - use raw CSS snippets, pseudocode, and other technical terms liberally to be as precise and refined as possible - they will appreciate it and do better work as a result!
26
26
 
@@ -6,7 +6,12 @@ Study the patterns provided in <ui_case_studies> and actually spend time breakin
6
6
 
7
7
  When descirbing UI patterns to the developer, be verbose and explicit. Describe every aspect - don't leave room for interpretation by the developer because it ain't gonna be pretty.
8
8
 
9
-
10
9
  ### Dated Patterns to Avoid
11
10
 
12
11
  The design should look like it could be an Apple iOS/macOS app of the year winner for 2026. Avoid long pages, things that feel like blogs, things that borrow from "dated" app store apps, and the like. It should feel like an award winner from the past two years, not an award winner from a decade ago.
12
+
13
+ ### Notes for Designing AI Chat Interfaces
14
+
15
+ If the app includes an AI chat interface, take care to make it beautiful and intentional. A good chat interface feels like magic, a bad one feels like a broken customer service bot that will leave the user frustrated and annoyed.
16
+
17
+ Pay close attention to text streaming when the AI replies - it should feel natural, smooth, and beautiful. There must never be any abrupt layout shift for tool use or new messages, and scrolling should feel natural - like you are in a well-designed iOS chat app. Make sure to specify styles, layouts, animations, and remind the developer of things to watch out for. Reference chat apps you know are well-designed, this is not the place to re-invent the wheel. Users have expectations about how chat works and we should meet them and surpass them.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mindstudio-ai/remy",
3
- "version": "0.1.48",
3
+ "version": "0.1.49",
4
4
  "description": "MindStudio coding agent",
5
5
  "repository": {
6
6
  "type": "git",
@@ -1,15 +0,0 @@
1
- This is an automated action triggered by the user pressing "Build" in the editor after reviewing the spec.
2
-
3
- The user has reviewed the spec and is ready to build.
4
-
5
- Think about your approach and then get a quick sanity check from `codeSanityCheck` to make sure you aren't missing anything.
6
-
7
- Then, build everything in one turn: methods, tables, interfaces, manifest updates, and scenarios, using the spec as the master plan.
8
-
9
- When code generation is complete, verify your work:
10
- - First, run use `runScenario` to seed test data, then use `runMethod` to confirm a method works
11
- - If the app has a web frontend, check the browser logs to make sure there are no errors rendering it.
12
- - Ask the `visualDesignExpert` to take a screenshot and verity that the visual design looks correct. Fix any issues it flags - we want the user's first time seeing the finished product to truly wow them.
13
- - Finally, use `runAutomatedBrowserTest` to smoke-test the main UI flow. The dev database is a disposable snapshot, so don't worry about being destructive. Fix any errors before finishing.
14
-
15
- When everything is working, use `productVision` to mark the MVP roadmap item as done, then call `setProjectOnboardingState({ state: "onboardingFinished" })`.
@@ -1,12 +0,0 @@
1
- This is an automated action triggered by the user pressing "Publish" in the editor.
2
-
3
- The user wants to deploy their app. Pushing to the `main` branch triggers a production deploy.
4
-
5
- Review the current state of the working tree — what has changed since the last commit, what's been committed since the last push, and the overall shape of recent work. Write a user-friendly changelog with `presentPublishPlan` — summarize what changed in plain language ("added vendor approval workflow", "fixed invoice totals", "updated the dashboard layout"). Reference specific code or file paths only when it helps clarity. This is what the user will see before deploying.
6
-
7
- If approved:
8
- - Stage and commit any uncommitted changes with a clean, descriptive commit message
9
- - Push to main
10
- - Let the user know their app is deploying
11
-
12
- If dismissed, acknowledge and do nothing.
@@ -1,19 +0,0 @@
1
- This is an automated action triggered by the user pressing "Sync" in the editor.
2
-
3
- The user has manually edited files since the last sync. The `refs/sync-point` git ref marks the last known-good sync state. It's created using a temporary git index that captures the full working tree (including unstaged changes) as a tree object — so it represents exactly what the files looked like at sync time, not just what was committed.
4
-
5
- To see what the user changed, run: `git diff refs/sync-point -- src/ dist/`
6
-
7
- This compares the sync-point tree against the current working tree. Do not add `HEAD` or any other ref — the command as written diffs directly against the working tree, which is what you want.
8
-
9
- In the diff output: lines prefixed with `-` are what was in the file at last sync. Lines prefixed with `+` are the user's current edits. Sync should bring the other side in line with the `+` side.
10
-
11
- Analyze the changes and write a sync plan with `presentSyncPlan` — a clear markdown summary of what changed and what you intend to update. Write it for a human: describe changes in plain language ("renamed the greeting field", "added a note about error handling"), not as a list of file paths and code diffs. Reference specific code or file paths only when it helps clarity. The user will review and approve before you make changes.
12
-
13
- If approved:
14
- - If spec files (`src/`) changed, update the corresponding code in `dist/` to match
15
- - If code files (`dist/`) changed, update the corresponding spec in `src/` to match
16
- - If both changed, reconcile — spec is the source of truth for intent, but respect code changes that add implementation detail
17
- - When all files are synced, call `clearSyncStatus`
18
-
19
- If dismissed, acknowledge and do nothing.