@mindstudio-ai/remy 0.1.48 → 0.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/headless.js +132 -79
- package/dist/index.js +98 -38
- package/dist/prompt/compiled/agent-interfaces.md +184 -0
- package/dist/prompt/compiled/interfaces.md +72 -1
- package/dist/prompt/compiled/manifest.md +1 -1
- package/dist/prompt/compiled/platform.md +5 -0
- package/dist/prompt/static/authoring.md +1 -1
- package/dist/prompt/static/instructions.md +5 -2
- package/dist/prompt/static/intake.md +3 -2
- package/dist/prompt/static/team.md +2 -2
- package/dist/subagents/codeSanityCheck/prompt.md +2 -0
- package/dist/subagents/designExpert/prompts/animation.md +1 -1
- package/dist/subagents/designExpert/prompts/instructions.md +1 -1
- package/dist/subagents/designExpert/prompts/ui-patterns.md +6 -1
- package/package.json +1 -1
- package/dist/prompt/actions/buildFromInitialSpec.md +0 -15
- package/dist/prompt/actions/publish.md +0 -12
- package/dist/prompt/actions/sync.md +0 -19
package/dist/headless.js
CHANGED
|
@@ -7,45 +7,8 @@ var __export = (target, all) => {
|
|
|
7
7
|
// src/headless.ts
|
|
8
8
|
import { createInterface } from "readline";
|
|
9
9
|
|
|
10
|
-
// src/assets.ts
|
|
11
|
-
import fs from "fs";
|
|
12
|
-
import path from "path";
|
|
13
|
-
var ROOT = findRoot(
|
|
14
|
-
import.meta.dirname ?? path.dirname(new URL(import.meta.url).pathname)
|
|
15
|
-
);
|
|
16
|
-
function findRoot(start) {
|
|
17
|
-
let dir = start;
|
|
18
|
-
while (dir !== path.dirname(dir)) {
|
|
19
|
-
if (fs.existsSync(path.join(dir, "package.json"))) {
|
|
20
|
-
return dir;
|
|
21
|
-
}
|
|
22
|
-
dir = path.dirname(dir);
|
|
23
|
-
}
|
|
24
|
-
return start;
|
|
25
|
-
}
|
|
26
|
-
var ASSETS_BASE = fs.existsSync(path.join(ROOT, "dist", "prompt")) ? path.join(ROOT, "dist") : path.join(ROOT, "src");
|
|
27
|
-
function assetPath(...segments) {
|
|
28
|
-
return path.join(ASSETS_BASE, ...segments);
|
|
29
|
-
}
|
|
30
|
-
function readAsset(...segments) {
|
|
31
|
-
const full = assetPath(...segments);
|
|
32
|
-
try {
|
|
33
|
-
return fs.readFileSync(full, "utf-8").trim();
|
|
34
|
-
} catch {
|
|
35
|
-
throw new Error(`Required asset missing: ${full}`);
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
function readJsonAsset(fallback, ...segments) {
|
|
39
|
-
const full = assetPath(...segments);
|
|
40
|
-
try {
|
|
41
|
-
return JSON.parse(fs.readFileSync(full, "utf-8"));
|
|
42
|
-
} catch {
|
|
43
|
-
return fallback;
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
|
|
47
10
|
// src/logger.ts
|
|
48
|
-
import
|
|
11
|
+
import fs from "fs";
|
|
49
12
|
var LEVELS = {
|
|
50
13
|
error: 0,
|
|
51
14
|
warn: 1,
|
|
@@ -94,11 +57,11 @@ function createLogger(module) {
|
|
|
94
57
|
}
|
|
95
58
|
|
|
96
59
|
// src/config.ts
|
|
97
|
-
import
|
|
98
|
-
import
|
|
60
|
+
import fs2 from "fs";
|
|
61
|
+
import path from "path";
|
|
99
62
|
import os from "os";
|
|
100
63
|
var log = createLogger("config");
|
|
101
|
-
var CONFIG_PATH =
|
|
64
|
+
var CONFIG_PATH = path.join(
|
|
102
65
|
os.homedir(),
|
|
103
66
|
".mindstudio-local-tunnel",
|
|
104
67
|
"config.json"
|
|
@@ -106,7 +69,7 @@ var CONFIG_PATH = path2.join(
|
|
|
106
69
|
var DEFAULT_BASE_URL = "https://api.mindstudio.ai";
|
|
107
70
|
function loadConfigFile() {
|
|
108
71
|
try {
|
|
109
|
-
const raw =
|
|
72
|
+
const raw = fs2.readFileSync(CONFIG_PATH, "utf-8");
|
|
110
73
|
log.debug("Loaded config file", { path: CONFIG_PATH });
|
|
111
74
|
return JSON.parse(raw);
|
|
112
75
|
} catch (err) {
|
|
@@ -138,6 +101,43 @@ function resolveConfig(flags) {
|
|
|
138
101
|
return { apiKey, baseUrl: baseUrl2 };
|
|
139
102
|
}
|
|
140
103
|
|
|
104
|
+
// src/assets.ts
|
|
105
|
+
import fs3 from "fs";
|
|
106
|
+
import path2 from "path";
|
|
107
|
+
var ROOT = findRoot(
|
|
108
|
+
import.meta.dirname ?? path2.dirname(new URL(import.meta.url).pathname)
|
|
109
|
+
);
|
|
110
|
+
function findRoot(start) {
|
|
111
|
+
let dir = start;
|
|
112
|
+
while (dir !== path2.dirname(dir)) {
|
|
113
|
+
if (fs3.existsSync(path2.join(dir, "package.json"))) {
|
|
114
|
+
return dir;
|
|
115
|
+
}
|
|
116
|
+
dir = path2.dirname(dir);
|
|
117
|
+
}
|
|
118
|
+
return start;
|
|
119
|
+
}
|
|
120
|
+
var ASSETS_BASE = fs3.existsSync(path2.join(ROOT, "dist", "prompt")) ? path2.join(ROOT, "dist") : path2.join(ROOT, "src");
|
|
121
|
+
function assetPath(...segments) {
|
|
122
|
+
return path2.join(ASSETS_BASE, ...segments);
|
|
123
|
+
}
|
|
124
|
+
function readAsset(...segments) {
|
|
125
|
+
const full = assetPath(...segments);
|
|
126
|
+
try {
|
|
127
|
+
return fs3.readFileSync(full, "utf-8").trim();
|
|
128
|
+
} catch {
|
|
129
|
+
throw new Error(`Required asset missing: ${full}`);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
function readJsonAsset(fallback, ...segments) {
|
|
133
|
+
const full = assetPath(...segments);
|
|
134
|
+
try {
|
|
135
|
+
return JSON.parse(fs3.readFileSync(full, "utf-8"));
|
|
136
|
+
} catch {
|
|
137
|
+
return fallback;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
141
|
// src/tools/_helpers/sidecar.ts
|
|
142
142
|
var log2 = createLogger("sidecar");
|
|
143
143
|
var baseUrl = null;
|
|
@@ -358,6 +358,10 @@ Current date/time: ${now}
|
|
|
358
358
|
{{compiled/design.md}}
|
|
359
359
|
</design>
|
|
360
360
|
|
|
361
|
+
<building_agent_interfaces>
|
|
362
|
+
{{compiled/agent-interfaces.md}}
|
|
363
|
+
</building_agent_interfaces>
|
|
364
|
+
|
|
361
365
|
<media_cdn>
|
|
362
366
|
{{compiled/media-cdn.md}}
|
|
363
367
|
</media_cdn>
|
|
@@ -1099,7 +1103,7 @@ var presentPublishPlanTool = {
|
|
|
1099
1103
|
var presentPlanTool = {
|
|
1100
1104
|
definition: {
|
|
1101
1105
|
name: "presentPlan",
|
|
1102
|
-
description: "Present an implementation plan for user approval before making changes. Use this only for large, multi-step changes or when the user explicitly asks to see a plan. Most work should be done autonomously without a plan. Write a clear markdown summary of what you intend to do in plain language \u2014 describe the changes from the user's perspective, not as a list of files and code paths. If the user rejects with feedback, revise and present again.",
|
|
1106
|
+
description: "Present an implementation plan for user approval before making changes. Use this only for large, multi-step changes like new features, new interface types, or when the user explicitly asks to see a plan. Most work should be done autonomously without a plan. Write a clear markdown summary of what you intend to do in plain language \u2014 describe the changes from the user's perspective, not as a list of files and code paths. If the user rejects with feedback, revise and present again.",
|
|
1103
1107
|
inputSchema: {
|
|
1104
1108
|
type: "object",
|
|
1105
1109
|
properties: {
|
|
@@ -1284,7 +1288,7 @@ var confirmDestructiveActionTool = {
|
|
|
1284
1288
|
properties: {
|
|
1285
1289
|
message: {
|
|
1286
1290
|
type: "string",
|
|
1287
|
-
description: "Explanation of what is about to happen and why confirmation is needed
|
|
1291
|
+
description: "Explanation of what is about to happen and why confirmation is needed in natural language - avoid technical terms or mentions of variables, bash commands, or other system-level concepts.."
|
|
1288
1292
|
},
|
|
1289
1293
|
confirmLabel: {
|
|
1290
1294
|
type: "string",
|
|
@@ -2241,8 +2245,8 @@ function startStatusWatcher(config) {
|
|
|
2241
2245
|
}
|
|
2242
2246
|
inflight = true;
|
|
2243
2247
|
try {
|
|
2244
|
-
const
|
|
2245
|
-
if (!
|
|
2248
|
+
const context = getContext();
|
|
2249
|
+
if (!context) {
|
|
2246
2250
|
return;
|
|
2247
2251
|
}
|
|
2248
2252
|
const res = await fetch(url, {
|
|
@@ -2251,13 +2255,7 @@ function startStatusWatcher(config) {
|
|
|
2251
2255
|
"Content-Type": "application/json",
|
|
2252
2256
|
Authorization: `Bearer ${apiConfig.apiKey}`
|
|
2253
2257
|
},
|
|
2254
|
-
body: JSON.stringify({
|
|
2255
|
-
assistantText: ctx.assistantText.slice(-500),
|
|
2256
|
-
lastToolName: ctx.lastToolName,
|
|
2257
|
-
lastToolResult: ctx.lastToolResult?.slice(-200),
|
|
2258
|
-
onboardingState: ctx.onboardingState,
|
|
2259
|
-
userMessage: ctx.userMessage?.slice(-200)
|
|
2260
|
-
}),
|
|
2258
|
+
body: JSON.stringify({ context }),
|
|
2261
2259
|
signal
|
|
2262
2260
|
});
|
|
2263
2261
|
if (!res.ok) {
|
|
@@ -2294,7 +2292,7 @@ function cleanMessagesForApi(messages) {
|
|
|
2294
2292
|
if (msg.role === "user" && typeof msg.content === "string" && msg.content.startsWith("@@automated::")) {
|
|
2295
2293
|
return {
|
|
2296
2294
|
...msg,
|
|
2297
|
-
content: msg.content.replace(/^@@automated::[^@]
|
|
2295
|
+
content: msg.content.replace(/^@@automated::[^@]*@@[^\n]*\n?/, "")
|
|
2298
2296
|
};
|
|
2299
2297
|
}
|
|
2300
2298
|
if (!Array.isArray(msg.content)) {
|
|
@@ -2382,12 +2380,23 @@ ${partial}` : "[INTERRUPTED] Agent was interrupted before producing output.",
|
|
|
2382
2380
|
let currentToolNames = "";
|
|
2383
2381
|
const statusWatcher = startStatusWatcher({
|
|
2384
2382
|
apiConfig,
|
|
2385
|
-
getContext: () =>
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2383
|
+
getContext: () => {
|
|
2384
|
+
const parts = [];
|
|
2385
|
+
if (task) {
|
|
2386
|
+
parts.push(`Task: ${task.slice(-200)}`);
|
|
2387
|
+
}
|
|
2388
|
+
const text = getPartialText(contentBlocks);
|
|
2389
|
+
if (text) {
|
|
2390
|
+
parts.push(`Assistant text: ${text.slice(-500)}`);
|
|
2391
|
+
}
|
|
2392
|
+
if (currentToolNames) {
|
|
2393
|
+
parts.push(`Tool: ${currentToolNames}`);
|
|
2394
|
+
}
|
|
2395
|
+
if (lastToolResult) {
|
|
2396
|
+
parts.push(`Tool result: ${lastToolResult.slice(-200)}`);
|
|
2397
|
+
}
|
|
2398
|
+
return parts.join("\n");
|
|
2399
|
+
},
|
|
2391
2400
|
onStatus: (label) => emit2({ type: "status", message: label }),
|
|
2392
2401
|
signal
|
|
2393
2402
|
});
|
|
@@ -3413,6 +3422,7 @@ Each interface type invokes the same backend methods. Methods don't know which i
|
|
|
3413
3422
|
- Telegram \u2014 message-handling bots
|
|
3414
3423
|
- Email \u2014 inbound email processing
|
|
3415
3424
|
- MCP \u2014 tool servers for AI assistants
|
|
3425
|
+
- Agent \u2014 conversational LLM interface with tool access to backend methods
|
|
3416
3426
|
|
|
3417
3427
|
## Backend
|
|
3418
3428
|
|
|
@@ -4476,6 +4486,7 @@ async function runTurn(params) {
|
|
|
4476
4486
|
"editsFinished"
|
|
4477
4487
|
]);
|
|
4478
4488
|
let lastCompletedTools = "";
|
|
4489
|
+
let lastCompletedInput = "";
|
|
4479
4490
|
let lastCompletedResult = "";
|
|
4480
4491
|
while (true) {
|
|
4481
4492
|
let getOrCreateAccumulator2 = function(id, name) {
|
|
@@ -4500,13 +4511,30 @@ async function runTurn(params) {
|
|
|
4500
4511
|
let currentToolNames = "";
|
|
4501
4512
|
const statusWatcher = startStatusWatcher({
|
|
4502
4513
|
apiConfig,
|
|
4503
|
-
getContext: () =>
|
|
4504
|
-
|
|
4505
|
-
|
|
4506
|
-
|
|
4507
|
-
|
|
4508
|
-
|
|
4509
|
-
|
|
4514
|
+
getContext: () => {
|
|
4515
|
+
const parts = [];
|
|
4516
|
+
if (userMessage) {
|
|
4517
|
+
parts.push(`User message: ${userMessage.slice(-200)}`);
|
|
4518
|
+
}
|
|
4519
|
+
if (onboardingState) {
|
|
4520
|
+
parts.push(`Build phase: ${onboardingState}`);
|
|
4521
|
+
}
|
|
4522
|
+
const text = subAgentText || getTextContent(contentBlocks).slice(-500);
|
|
4523
|
+
if (text) {
|
|
4524
|
+
parts.push(`Assistant text: ${text}`);
|
|
4525
|
+
}
|
|
4526
|
+
const toolName = currentToolNames || getToolCalls(contentBlocks).filter((tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)).at(-1)?.name || lastCompletedTools;
|
|
4527
|
+
if (toolName) {
|
|
4528
|
+
parts.push(`Tool: ${toolName}`);
|
|
4529
|
+
}
|
|
4530
|
+
if (lastCompletedInput) {
|
|
4531
|
+
parts.push(`Tool input: ${lastCompletedInput.slice(-300)}`);
|
|
4532
|
+
}
|
|
4533
|
+
if (lastCompletedResult) {
|
|
4534
|
+
parts.push(`Tool result: ${lastCompletedResult.slice(-200)}`);
|
|
4535
|
+
}
|
|
4536
|
+
return parts.join("\n");
|
|
4537
|
+
},
|
|
4510
4538
|
onStatus: (label) => onEvent({ type: "status", message: label }),
|
|
4511
4539
|
signal
|
|
4512
4540
|
});
|
|
@@ -4824,7 +4852,11 @@ async function runTurn(params) {
|
|
|
4824
4852
|
}
|
|
4825
4853
|
}
|
|
4826
4854
|
}
|
|
4827
|
-
|
|
4855
|
+
const lastNonExcluded = toolCalls.filter(
|
|
4856
|
+
(tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)
|
|
4857
|
+
);
|
|
4858
|
+
lastCompletedTools = lastNonExcluded.map((tc) => tc.name).join(", ");
|
|
4859
|
+
lastCompletedInput = JSON.stringify(lastNonExcluded.at(-1)?.input ?? {});
|
|
4828
4860
|
lastCompletedResult = results.at(-1)?.result ?? "";
|
|
4829
4861
|
for (const r of results) {
|
|
4830
4862
|
state.messages.push({
|
|
@@ -4917,11 +4949,36 @@ ${partial}` : "[INTERRUPTED] Tool execution was stopped.";
|
|
|
4917
4949
|
}
|
|
4918
4950
|
};
|
|
4919
4951
|
|
|
4952
|
+
// src/automatedActions/resolve.ts
|
|
4953
|
+
var NON_ACTION_SENTINELS = /* @__PURE__ */ new Set(["background_results"]);
|
|
4954
|
+
function resolveAction(text) {
|
|
4955
|
+
const match = text.match(/^@@automated::(\w+)@@(.*)/s);
|
|
4956
|
+
if (!match) {
|
|
4957
|
+
return null;
|
|
4958
|
+
}
|
|
4959
|
+
const triggerName = match[1];
|
|
4960
|
+
if (NON_ACTION_SENTINELS.has(triggerName)) {
|
|
4961
|
+
return null;
|
|
4962
|
+
}
|
|
4963
|
+
let params = {};
|
|
4964
|
+
const remainder = match[2];
|
|
4965
|
+
if (remainder) {
|
|
4966
|
+
try {
|
|
4967
|
+
params = JSON.parse(remainder.split("\n")[0]);
|
|
4968
|
+
} catch {
|
|
4969
|
+
}
|
|
4970
|
+
}
|
|
4971
|
+
let body = readAsset("automatedActions", `${triggerName}.md`);
|
|
4972
|
+
body = body.replace(/^---[\s\S]*?---\s*/, "");
|
|
4973
|
+
for (const [key, value] of Object.entries(params)) {
|
|
4974
|
+
body = body.replaceAll(`{{${key}}}`, String(value));
|
|
4975
|
+
}
|
|
4976
|
+
return `@@automated::${triggerName}@@
|
|
4977
|
+
${body}`;
|
|
4978
|
+
}
|
|
4979
|
+
|
|
4920
4980
|
// src/headless.ts
|
|
4921
4981
|
var log9 = createLogger("headless");
|
|
4922
|
-
function loadActionPrompt(name) {
|
|
4923
|
-
return readAsset("prompt", "actions", `${name}.md`);
|
|
4924
|
-
}
|
|
4925
4982
|
function emit(event, data, requestId) {
|
|
4926
4983
|
const payload = { event, ...data };
|
|
4927
4984
|
if (requestId) {
|
|
@@ -5223,15 +5280,11 @@ ${xmlParts}
|
|
|
5223
5280
|
);
|
|
5224
5281
|
}
|
|
5225
5282
|
let userMessage = parsed.text ?? "";
|
|
5226
|
-
const
|
|
5227
|
-
|
|
5228
|
-
|
|
5229
|
-
userMessage = loadActionPrompt("sync");
|
|
5230
|
-
} else if (parsed.runCommand === "publish") {
|
|
5231
|
-
userMessage = loadActionPrompt("publish");
|
|
5232
|
-
} else if (parsed.runCommand === "buildFromInitialSpec") {
|
|
5233
|
-
userMessage = loadActionPrompt("buildFromInitialSpec");
|
|
5283
|
+
const resolved = resolveAction(userMessage);
|
|
5284
|
+
if (resolved !== null) {
|
|
5285
|
+
userMessage = resolved;
|
|
5234
5286
|
}
|
|
5287
|
+
const isHidden = resolved !== null || !!parsed.hidden;
|
|
5235
5288
|
const onboardingState = parsed.onboardingState ?? "onboardingFinished";
|
|
5236
5289
|
const system = buildSystemPrompt(
|
|
5237
5290
|
onboardingState,
|
package/dist/index.js
CHANGED
|
@@ -826,7 +826,7 @@ var init_presentPlan = __esm({
|
|
|
826
826
|
presentPlanTool = {
|
|
827
827
|
definition: {
|
|
828
828
|
name: "presentPlan",
|
|
829
|
-
description: "Present an implementation plan for user approval before making changes. Use this only for large, multi-step changes or when the user explicitly asks to see a plan. Most work should be done autonomously without a plan. Write a clear markdown summary of what you intend to do in plain language \u2014 describe the changes from the user's perspective, not as a list of files and code paths. If the user rejects with feedback, revise and present again.",
|
|
829
|
+
description: "Present an implementation plan for user approval before making changes. Use this only for large, multi-step changes like new features, new interface types, or when the user explicitly asks to see a plan. Most work should be done autonomously without a plan. Write a clear markdown summary of what you intend to do in plain language \u2014 describe the changes from the user's perspective, not as a list of files and code paths. If the user rejects with feedback, revise and present again.",
|
|
830
830
|
inputSchema: {
|
|
831
831
|
type: "object",
|
|
832
832
|
properties: {
|
|
@@ -1029,7 +1029,7 @@ var init_confirmDestructiveAction = __esm({
|
|
|
1029
1029
|
properties: {
|
|
1030
1030
|
message: {
|
|
1031
1031
|
type: "string",
|
|
1032
|
-
description: "Explanation of what is about to happen and why confirmation is needed
|
|
1032
|
+
description: "Explanation of what is about to happen and why confirmation is needed in natural language - avoid technical terms or mentions of variables, bash commands, or other system-level concepts.."
|
|
1033
1033
|
},
|
|
1034
1034
|
confirmLabel: {
|
|
1035
1035
|
type: "string",
|
|
@@ -2179,8 +2179,8 @@ function startStatusWatcher(config) {
|
|
|
2179
2179
|
}
|
|
2180
2180
|
inflight = true;
|
|
2181
2181
|
try {
|
|
2182
|
-
const
|
|
2183
|
-
if (!
|
|
2182
|
+
const context = getContext();
|
|
2183
|
+
if (!context) {
|
|
2184
2184
|
return;
|
|
2185
2185
|
}
|
|
2186
2186
|
const res = await fetch(url, {
|
|
@@ -2189,13 +2189,7 @@ function startStatusWatcher(config) {
|
|
|
2189
2189
|
"Content-Type": "application/json",
|
|
2190
2190
|
Authorization: `Bearer ${apiConfig.apiKey}`
|
|
2191
2191
|
},
|
|
2192
|
-
body: JSON.stringify({
|
|
2193
|
-
assistantText: ctx.assistantText.slice(-500),
|
|
2194
|
-
lastToolName: ctx.lastToolName,
|
|
2195
|
-
lastToolResult: ctx.lastToolResult?.slice(-200),
|
|
2196
|
-
onboardingState: ctx.onboardingState,
|
|
2197
|
-
userMessage: ctx.userMessage?.slice(-200)
|
|
2198
|
-
}),
|
|
2192
|
+
body: JSON.stringify({ context }),
|
|
2199
2193
|
signal
|
|
2200
2194
|
});
|
|
2201
2195
|
if (!res.ok) {
|
|
@@ -2237,7 +2231,7 @@ function cleanMessagesForApi(messages) {
|
|
|
2237
2231
|
if (msg.role === "user" && typeof msg.content === "string" && msg.content.startsWith("@@automated::")) {
|
|
2238
2232
|
return {
|
|
2239
2233
|
...msg,
|
|
2240
|
-
content: msg.content.replace(/^@@automated::[^@]
|
|
2234
|
+
content: msg.content.replace(/^@@automated::[^@]*@@[^\n]*\n?/, "")
|
|
2241
2235
|
};
|
|
2242
2236
|
}
|
|
2243
2237
|
if (!Array.isArray(msg.content)) {
|
|
@@ -2329,12 +2323,23 @@ ${partial}` : "[INTERRUPTED] Agent was interrupted before producing output.",
|
|
|
2329
2323
|
let currentToolNames = "";
|
|
2330
2324
|
const statusWatcher = startStatusWatcher({
|
|
2331
2325
|
apiConfig,
|
|
2332
|
-
getContext: () =>
|
|
2333
|
-
|
|
2334
|
-
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
|
|
2326
|
+
getContext: () => {
|
|
2327
|
+
const parts = [];
|
|
2328
|
+
if (task) {
|
|
2329
|
+
parts.push(`Task: ${task.slice(-200)}`);
|
|
2330
|
+
}
|
|
2331
|
+
const text = getPartialText(contentBlocks);
|
|
2332
|
+
if (text) {
|
|
2333
|
+
parts.push(`Assistant text: ${text.slice(-500)}`);
|
|
2334
|
+
}
|
|
2335
|
+
if (currentToolNames) {
|
|
2336
|
+
parts.push(`Tool: ${currentToolNames}`);
|
|
2337
|
+
}
|
|
2338
|
+
if (lastToolResult) {
|
|
2339
|
+
parts.push(`Tool result: ${lastToolResult.slice(-200)}`);
|
|
2340
|
+
}
|
|
2341
|
+
return parts.join("\n");
|
|
2342
|
+
},
|
|
2338
2343
|
onStatus: (label) => emit2({ type: "status", message: label }),
|
|
2339
2344
|
signal
|
|
2340
2345
|
});
|
|
@@ -3515,6 +3520,7 @@ Each interface type invokes the same backend methods. Methods don't know which i
|
|
|
3515
3520
|
- Telegram \u2014 message-handling bots
|
|
3516
3521
|
- Email \u2014 inbound email processing
|
|
3517
3522
|
- MCP \u2014 tool servers for AI assistants
|
|
3523
|
+
- Agent \u2014 conversational LLM interface with tool access to backend methods
|
|
3518
3524
|
|
|
3519
3525
|
## Backend
|
|
3520
3526
|
|
|
@@ -4718,6 +4724,7 @@ async function runTurn(params) {
|
|
|
4718
4724
|
"editsFinished"
|
|
4719
4725
|
]);
|
|
4720
4726
|
let lastCompletedTools = "";
|
|
4727
|
+
let lastCompletedInput = "";
|
|
4721
4728
|
let lastCompletedResult = "";
|
|
4722
4729
|
while (true) {
|
|
4723
4730
|
let getOrCreateAccumulator2 = function(id, name) {
|
|
@@ -4742,13 +4749,30 @@ async function runTurn(params) {
|
|
|
4742
4749
|
let currentToolNames = "";
|
|
4743
4750
|
const statusWatcher = startStatusWatcher({
|
|
4744
4751
|
apiConfig,
|
|
4745
|
-
getContext: () =>
|
|
4746
|
-
|
|
4747
|
-
|
|
4748
|
-
|
|
4749
|
-
|
|
4750
|
-
|
|
4751
|
-
|
|
4752
|
+
getContext: () => {
|
|
4753
|
+
const parts = [];
|
|
4754
|
+
if (userMessage) {
|
|
4755
|
+
parts.push(`User message: ${userMessage.slice(-200)}`);
|
|
4756
|
+
}
|
|
4757
|
+
if (onboardingState) {
|
|
4758
|
+
parts.push(`Build phase: ${onboardingState}`);
|
|
4759
|
+
}
|
|
4760
|
+
const text = subAgentText || getTextContent(contentBlocks).slice(-500);
|
|
4761
|
+
if (text) {
|
|
4762
|
+
parts.push(`Assistant text: ${text}`);
|
|
4763
|
+
}
|
|
4764
|
+
const toolName = currentToolNames || getToolCalls(contentBlocks).filter((tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)).at(-1)?.name || lastCompletedTools;
|
|
4765
|
+
if (toolName) {
|
|
4766
|
+
parts.push(`Tool: ${toolName}`);
|
|
4767
|
+
}
|
|
4768
|
+
if (lastCompletedInput) {
|
|
4769
|
+
parts.push(`Tool input: ${lastCompletedInput.slice(-300)}`);
|
|
4770
|
+
}
|
|
4771
|
+
if (lastCompletedResult) {
|
|
4772
|
+
parts.push(`Tool result: ${lastCompletedResult.slice(-200)}`);
|
|
4773
|
+
}
|
|
4774
|
+
return parts.join("\n");
|
|
4775
|
+
},
|
|
4752
4776
|
onStatus: (label) => onEvent({ type: "status", message: label }),
|
|
4753
4777
|
signal
|
|
4754
4778
|
});
|
|
@@ -5066,7 +5090,11 @@ async function runTurn(params) {
|
|
|
5066
5090
|
}
|
|
5067
5091
|
}
|
|
5068
5092
|
}
|
|
5069
|
-
|
|
5093
|
+
const lastNonExcluded = toolCalls.filter(
|
|
5094
|
+
(tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)
|
|
5095
|
+
);
|
|
5096
|
+
lastCompletedTools = lastNonExcluded.map((tc) => tc.name).join(", ");
|
|
5097
|
+
lastCompletedInput = JSON.stringify(lastNonExcluded.at(-1)?.input ?? {});
|
|
5070
5098
|
lastCompletedResult = results.at(-1)?.result ?? "";
|
|
5071
5099
|
for (const r of results) {
|
|
5072
5100
|
state.messages.push({
|
|
@@ -5295,6 +5323,10 @@ Current date/time: ${now}
|
|
|
5295
5323
|
{{compiled/design.md}}
|
|
5296
5324
|
</design>
|
|
5297
5325
|
|
|
5326
|
+
<building_agent_interfaces>
|
|
5327
|
+
{{compiled/agent-interfaces.md}}
|
|
5328
|
+
</building_agent_interfaces>
|
|
5329
|
+
|
|
5298
5330
|
<media_cdn>
|
|
5299
5331
|
{{compiled/media-cdn.md}}
|
|
5300
5332
|
</media_cdn>
|
|
@@ -5502,15 +5534,47 @@ ${partial}` : "[INTERRUPTED] Tool execution was stopped.";
|
|
|
5502
5534
|
}
|
|
5503
5535
|
});
|
|
5504
5536
|
|
|
5537
|
+
// src/automatedActions/resolve.ts
|
|
5538
|
+
function resolveAction(text) {
|
|
5539
|
+
const match = text.match(/^@@automated::(\w+)@@(.*)/s);
|
|
5540
|
+
if (!match) {
|
|
5541
|
+
return null;
|
|
5542
|
+
}
|
|
5543
|
+
const triggerName = match[1];
|
|
5544
|
+
if (NON_ACTION_SENTINELS.has(triggerName)) {
|
|
5545
|
+
return null;
|
|
5546
|
+
}
|
|
5547
|
+
let params = {};
|
|
5548
|
+
const remainder = match[2];
|
|
5549
|
+
if (remainder) {
|
|
5550
|
+
try {
|
|
5551
|
+
params = JSON.parse(remainder.split("\n")[0]);
|
|
5552
|
+
} catch {
|
|
5553
|
+
}
|
|
5554
|
+
}
|
|
5555
|
+
let body = readAsset("automatedActions", `${triggerName}.md`);
|
|
5556
|
+
body = body.replace(/^---[\s\S]*?---\s*/, "");
|
|
5557
|
+
for (const [key, value] of Object.entries(params)) {
|
|
5558
|
+
body = body.replaceAll(`{{${key}}}`, String(value));
|
|
5559
|
+
}
|
|
5560
|
+
return `@@automated::${triggerName}@@
|
|
5561
|
+
${body}`;
|
|
5562
|
+
}
|
|
5563
|
+
var NON_ACTION_SENTINELS;
|
|
5564
|
+
var init_resolve = __esm({
|
|
5565
|
+
"src/automatedActions/resolve.ts"() {
|
|
5566
|
+
"use strict";
|
|
5567
|
+
init_assets();
|
|
5568
|
+
NON_ACTION_SENTINELS = /* @__PURE__ */ new Set(["background_results"]);
|
|
5569
|
+
}
|
|
5570
|
+
});
|
|
5571
|
+
|
|
5505
5572
|
// src/headless.ts
|
|
5506
5573
|
var headless_exports = {};
|
|
5507
5574
|
__export(headless_exports, {
|
|
5508
5575
|
startHeadless: () => startHeadless
|
|
5509
5576
|
});
|
|
5510
5577
|
import { createInterface } from "readline";
|
|
5511
|
-
function loadActionPrompt(name) {
|
|
5512
|
-
return readAsset("prompt", "actions", `${name}.md`);
|
|
5513
|
-
}
|
|
5514
5578
|
function emit(event, data, requestId) {
|
|
5515
5579
|
const payload = { event, ...data };
|
|
5516
5580
|
if (requestId) {
|
|
@@ -5812,15 +5876,11 @@ ${xmlParts}
|
|
|
5812
5876
|
);
|
|
5813
5877
|
}
|
|
5814
5878
|
let userMessage = parsed.text ?? "";
|
|
5815
|
-
const
|
|
5816
|
-
|
|
5817
|
-
|
|
5818
|
-
userMessage = loadActionPrompt("sync");
|
|
5819
|
-
} else if (parsed.runCommand === "publish") {
|
|
5820
|
-
userMessage = loadActionPrompt("publish");
|
|
5821
|
-
} else if (parsed.runCommand === "buildFromInitialSpec") {
|
|
5822
|
-
userMessage = loadActionPrompt("buildFromInitialSpec");
|
|
5879
|
+
const resolved = resolveAction(userMessage);
|
|
5880
|
+
if (resolved !== null) {
|
|
5881
|
+
userMessage = resolved;
|
|
5823
5882
|
}
|
|
5883
|
+
const isHidden = resolved !== null || !!parsed.hidden;
|
|
5824
5884
|
const onboardingState = parsed.onboardingState ?? "onboardingFinished";
|
|
5825
5885
|
const system = buildSystemPrompt(
|
|
5826
5886
|
onboardingState,
|
|
@@ -5964,7 +6024,6 @@ var log9;
|
|
|
5964
6024
|
var init_headless = __esm({
|
|
5965
6025
|
"src/headless.ts"() {
|
|
5966
6026
|
"use strict";
|
|
5967
|
-
init_assets();
|
|
5968
6027
|
init_logger();
|
|
5969
6028
|
init_config();
|
|
5970
6029
|
init_prompt4();
|
|
@@ -5972,6 +6031,7 @@ var init_headless = __esm({
|
|
|
5972
6031
|
init_agent();
|
|
5973
6032
|
init_session();
|
|
5974
6033
|
init_toolRegistry();
|
|
6034
|
+
init_resolve();
|
|
5975
6035
|
log9 = createLogger("headless");
|
|
5976
6036
|
}
|
|
5977
6037
|
});
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# Building Agent Interfaces
|
|
2
|
+
|
|
3
|
+
Guidance for designing conversational AI agents and their frontends. An agent interface pairs an LLM (with per-user-scoped/authenticated access to app methods as tools, handled by platform automatically) with a chat UI. The developer authors the agent's character in MSFM (`src/interfaces/agent.md`); you compile it into a system prompt and tool descriptions (`dist/interfaces/agent/`).
|
|
4
|
+
|
|
5
|
+
## Agent Design Principles
|
|
6
|
+
|
|
7
|
+
### System prompts define character, not procedures
|
|
8
|
+
|
|
9
|
+
A good system prompt establishes who the agent is — personality, tone, judgment style, the kind of person they sound like. It doesn't enumerate every possible interaction or restate what tools already describe.
|
|
10
|
+
|
|
11
|
+
Short and opinionated beats long and comprehensive. "Sounds like a sharp, organized friend — brief by default" gives the model more to work with than a page of behavioral rules. Define constraints through character, not checklists. Let the model's judgment work.
|
|
12
|
+
|
|
13
|
+
#### System Prompt Specifics
|
|
14
|
+
Always include a note like "## Tool Usage
|
|
15
|
+
- When multiple tool calls are independent, make them all in a single turn. Searching for three different products, or fetching two reference sites: batch them instead of doing one per turn." to help the model know it can run tools in parallel
|
|
16
|
+
- The user's name and current role(s) at the time of message, if any, will be automatically appended to the end of every system prompt at runtime like:
|
|
17
|
+
|
|
18
|
+
```
|
|
19
|
+
## Current User
|
|
20
|
+
Name: Jane Smith
|
|
21
|
+
Roles: editor
|
|
22
|
+
```
|
|
23
|
+
- Unless the user specifies otherwise, always include a note that the agent can use markdown in responses (since the chat UI renders it) and should avoid using em dashes and emojis in its responses.
|
|
24
|
+
|
|
25
|
+
### Tool descriptions are the most important artifact
|
|
26
|
+
|
|
27
|
+
The system prompt says *who* the agent is. The tool descriptions say *what it can do*. A great tool description means the agent uses the tool correctly without explicit instruction. Do not be overly precise or micromanage. Your goal with tool descriptions is to provide context and faming- trust that the model is intelligent enough to fill in the gaps.. Each `tools/*.md` file should cover:
|
|
28
|
+
|
|
29
|
+
- **When to use** this tool (and when NOT to — e.g. "NOT for marking complete, use toggle-todo")
|
|
30
|
+
- **Parameter guidance** beyond the schema — what makes a good value, when to include optional fields, what to skip
|
|
31
|
+
- **Return value** and how to present results to the user
|
|
32
|
+
|
|
33
|
+
### Not every method should be a tool
|
|
34
|
+
|
|
35
|
+
Expose methods that serve the conversational flow. Internal helpers, admin-only methods, and batch operations often don't belong in the agent's toolset. A focused set of well-described tools performs better than many underdocumented ones.
|
|
36
|
+
|
|
37
|
+
Think about what the user would actually say in conversation. If a method only makes sense triggered by another system (cron, webhook) or through a form UI, it probably shouldn't be an agent tool.
|
|
38
|
+
|
|
39
|
+
### The MSFM spec body drives compilation
|
|
40
|
+
|
|
41
|
+
The spec (`src/interfaces/agent.md`) is the human-editable source. Write it for humans — voice, personality, capabilities, behavioral rules, edge cases. The body should read like a character brief, not a technical manual.
|
|
42
|
+
|
|
43
|
+
Model ID and config belong in the frontmatter, not the prose. The prose focuses on judgment calls: "When a user adds a task, consider whether it would benefit from a note. For vague or complex tasks, attach guidance. For simple tasks, skip it."
|
|
44
|
+
|
|
45
|
+
Use MSFM annotations for implementation-level notes that the compiler needs but the human reader doesn't — same pattern as app specs.
|
|
46
|
+
|
|
47
|
+
When defining tools for multi-user apps with access restrictions, be sure to note the roles that are allowed or disallowed from accessing the tool, as well as any other restrictions. The actual tool invocation will be rejected at runtime if the requesting user is not allowed to access the underlying method, but defining this early allows the model to gate permissions cleanly rather than vomiting an error when the user tries to do something they're not permissioned for.
|
|
48
|
+
|
|
49
|
+
### Anti-patterns
|
|
50
|
+
|
|
51
|
+
- Avoid system prompts that restate tool schemas ("You have a tool called createTodo that takes a title and optional aiNotes...")
|
|
52
|
+
- Avoid generic personalities ("You are a helpful assistant") — every agent should have a distinct voice, this is often the most fun part for the user building the agent - lean in and help them enjoy bringing their agent to life!
|
|
53
|
+
- Avoid exposing all methods without considering conversational fit
|
|
54
|
+
|
|
55
|
+
## Compiling the Agent Spec
|
|
56
|
+
|
|
57
|
+
When building the `dist/interfaces/agent/`, consider the agent spec, as well as the larger context of the app and especially any `@brand/` guidelines. The agent should feel as though cut from the same cloth as the rest of the app - it is simply the same backend application projected into a different modality. Take care to make it consistent with the user's app, and then output:
|
|
58
|
+
|
|
59
|
+
**`system.md`** — compiled from the spec body. Should feel like a character brief: who the agent is, how they talk, what they care about, key behavioral rules.
|
|
60
|
+
|
|
61
|
+
**`tools/*.md`** — one file per exposed method. Rich markdown with when-to-use, examples, edge cases, return value guidance. These are what make the agent actually work well.
|
|
62
|
+
|
|
63
|
+
**`agent.json`** — ties it together. Model config from frontmatter, paths to system prompt and tool files, optional `webInterfacePath`.
|
|
64
|
+
|
|
65
|
+
## Chat UI Design
|
|
66
|
+
|
|
67
|
+
When the agent has a web frontend (via `webInterfacePath`), the chat UI is a page within the web interface.
|
|
68
|
+
|
|
69
|
+
### Frontend SDK: `createAgentChatClient()`
|
|
70
|
+
|
|
71
|
+
The `@mindstudio-ai/interface` package provides `createAgentChatClient()` for thread management and streaming chat. All agent chat UIs should use this — don't build raw fetch/SSE handling.
|
|
72
|
+
|
|
73
|
+
**Thread management:**
|
|
74
|
+
|
|
75
|
+
```ts
|
|
76
|
+
import { createAgentChatClient } from '@mindstudio-ai/interface';
|
|
77
|
+
|
|
78
|
+
const chat = createAgentChatClient();
|
|
79
|
+
|
|
80
|
+
const thread = await chat.createThread();
|
|
81
|
+
const { threads, nextCursor } = await chat.listThreads();
|
|
82
|
+
const full = await chat.getThread(thread.id);
|
|
83
|
+
await chat.updateThread(thread.id, 'New title');
|
|
84
|
+
await chat.deleteThread(thread.id);
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
**Sending messages (streaming):**
|
|
88
|
+
|
|
89
|
+
`sendMessage` streams the agent's response via SSE. Use named callbacks for common events:
|
|
90
|
+
|
|
91
|
+
```ts
|
|
92
|
+
const response = chat.sendMessage(threadId, content, {
|
|
93
|
+
// Text deltas — append, don't replace
|
|
94
|
+
onText: (delta) => setText((prev) => prev + delta),
|
|
95
|
+
|
|
96
|
+
// Extended thinking (also deltas)
|
|
97
|
+
onThinking: (delta) => setThinking((prev) => prev + delta),
|
|
98
|
+
onThinkingComplete: (thinking, signature) => setThinking(''),
|
|
99
|
+
|
|
100
|
+
// Tool execution
|
|
101
|
+
onToolCallStart: (id, name) => { },
|
|
102
|
+
onToolCallResult: (id, output) => { },
|
|
103
|
+
|
|
104
|
+
// Errors
|
|
105
|
+
onError: (error) => console.error(error),
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
// Resolves when stream completes
|
|
109
|
+
const { stopReason, usage } = await response;
|
|
110
|
+
|
|
111
|
+
// Cancel mid-stream
|
|
112
|
+
response.abort();
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
**Attachments:**
|
|
116
|
+
|
|
117
|
+
Send images or documents alongside a message. Upload via `platform.uploadFile()` first, then pass CDN URLs as the 4th argument:
|
|
118
|
+
|
|
119
|
+
```ts
|
|
120
|
+
const url = await platform.uploadFile(file);
|
|
121
|
+
|
|
122
|
+
chat.sendMessage(threadId, "What's in this document?", {
|
|
123
|
+
onText: (delta) => setText((prev) => prev + delta),
|
|
124
|
+
}, {
|
|
125
|
+
attachments: [url],
|
|
126
|
+
});
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Images (`i.mscdn.ai`) are sent as vision input. Documents (`f.mscdn.ai`) have text extracted server-side and included in context. Attachments are preserved in thread history.
|
|
130
|
+
|
|
131
|
+
**Key points:**
|
|
132
|
+
- `onText` and `onThinking` receive deltas (append to state, don't replace)
|
|
133
|
+
- `sendMessage` returns an `AbortablePromise` — a Promise with `.abort()`. Also accepts `signal` in callbacks for `AbortController` support
|
|
134
|
+
- Tool call events (`onToolCallStart`, `onToolCallResult`) are available for showing progress indicators
|
|
135
|
+
- Thread title is auto-generated after the first exchange
|
|
136
|
+
|
|
137
|
+
### Layout
|
|
138
|
+
|
|
139
|
+
Ask `visualDesignExpert` for ideas about how to design the chat UI in a way that is appropriate and unique to the app.
|
|
140
|
+
|
|
141
|
+
User messages visually distinct from assistant messages (right-aligned, different background, or both). Keep it clean — no avatars unless they add meaning. Generous vertical spacing between messages so the conversation breathes. Use clean, beautiful animation where it is additive.
|
|
142
|
+
|
|
143
|
+
### Streaming & Markdown
|
|
144
|
+
|
|
145
|
+
Display tokens as they arrive. No loading spinners that block the whole view — show partial text immediately. A subtle cursor or animation at the streaming edge signals "still generating." The user should be reading, not waiting.
|
|
146
|
+
|
|
147
|
+
Use `streamdown` for rendering markdown from streaming text. It handles unterminated blocks gracefully (the core problem with react-markdown during mid-stream rendering), includes Shiki syntax highlighting for code blocks, and supports KaTeX math and Mermaid diagrams. Install the base package and tree-shake plugins as needed (`@streamdown/code`, `@streamdown/math`, `@streamdown/mermaid`).
|
|
148
|
+
|
|
149
|
+
Pay attention to streaming text animation — fast token delivery can look jarring, and slow delivery can look laggy. Throttling renders to ~50-100ms batches smooths things out.
|
|
150
|
+
|
|
151
|
+
It is critical to never introduce layout shift or jarring transitions when dealing with responses. Messages should cleanly and smoothly transition between thinking, streaming, and completed states. Tool use should fit beautifully within the conversation and should never cause abrupt layout shift.
|
|
152
|
+
|
|
153
|
+
### Scrolling
|
|
154
|
+
|
|
155
|
+
Use `use-stick-to-bottom` (`github.com/stackblitz-labs/use-stick-to-bottom`) for auto-scroll behavior. It handles the standard chat scroll contract: stick to bottom as new content streams in, but stop following if the user scrolls up. Don't hand-roll this — the edge cases (momentum scrolling, resize, streaming while scrolled up) are fiddly.
|
|
156
|
+
|
|
157
|
+
### Optimistic messages
|
|
158
|
+
|
|
159
|
+
When the user sends a message, add it to the conversation immediately — don't wait for the server to acknowledge. Show a thinking/typing indicator in the assistant's response area right away so the UI feels instant. The indicator should appear the moment the user hits send, not when the first token arrives.
|
|
160
|
+
|
|
161
|
+
### Tool calls
|
|
162
|
+
|
|
163
|
+
Show tool activity in the chat as a compact, inline status that appears when `onToolCallStart` fires and resolves when `onToolCallResult` arrives. Never show raw JSON, tool IDs, or internal details — just a human-readable description of what's happening.
|
|
164
|
+
|
|
165
|
+
### Input area
|
|
166
|
+
|
|
167
|
+
Fixed at the bottom. Auto-growing textarea using `react-textarea-autosize`, not a single-line input. Clear send affordance (button or Enter). Disabled while the agent is streaming, with a visible stop/cancel button. Placeholder text that reflects the agent's personality, not generic "Type a message..."
|
|
168
|
+
|
|
169
|
+
### Empty state
|
|
170
|
+
|
|
171
|
+
The first screen should invite conversation. A greeting from the agent, a few suggested prompts, or a concise description of what the agent can help with can go a long way. Always make sure they are optional though - the user needs to be able to chat directly if they want. Match the agent's voice — a casual todo assistant and a formal legal review agent should feel completely different from the first screen.
|
|
172
|
+
|
|
173
|
+
### Mobile
|
|
174
|
+
|
|
175
|
+
Chat is inherently mobile-friendly — lean into it. Pay attention to viewport sizing on mobile as the virtual keyboard changes the available height.
|
|
176
|
+
|
|
177
|
+
### Respect the brand
|
|
178
|
+
|
|
179
|
+
The chat UI uses the app's design system — colors, typography, voice from `@brand/`. Apply the same design standards as any other page in the web interface.
|
|
180
|
+
|
|
181
|
+
### Anti-patterns
|
|
182
|
+
|
|
183
|
+
- Avoid designs that look like dated messaging apps from 2015
|
|
184
|
+
- Avoid robotic empty states ("Hello! I'm your AI assistant. How can I help you today?")
|
|
@@ -76,6 +76,8 @@ auth.name;
|
|
|
76
76
|
auth.email;
|
|
77
77
|
```
|
|
78
78
|
|
|
79
|
+
For apps with an agent interface, the SDK also provides `createAgentChatClient()` for thread management and streaming chat. See the "Building Agent Interfaces" section for usage details.
|
|
80
|
+
|
|
79
81
|
The project uses `"jsx": "react-jsx"` (automatic JSX transform) — do not `import React from 'react'`. Only import the specific hooks and types you need (e.g., `import { useState, useEffect } from 'react'`).
|
|
80
82
|
|
|
81
83
|
On deploy, the platform runs `npm install && npm run build` in the web directory and hosts the output on CDN.
|
|
@@ -230,6 +232,74 @@ Expose methods as AI tools.
|
|
|
230
232
|
|
|
231
233
|
Each listed method becomes an MCP tool. Method names and descriptions from the manifest are used as tool names and descriptions.
|
|
232
234
|
|
|
235
|
+
## Agent (Conversational Interface)
|
|
236
|
+
|
|
237
|
+
A conversational interface where an LLM has access to the app's methods as tools. Unlike MCP (which exposes methods for external agents), the agent interface IS the agent — it has its own personality, system prompt, and model config, and orchestrates tool calls against the app's methods internally.
|
|
238
|
+
|
|
239
|
+
### Spec: `src/interfaces/agent.md`
|
|
240
|
+
|
|
241
|
+
The human-readable spec. Frontmatter contains structured fields; the prose body is the behavioral spec — voice, personality, capabilities, rules — written in MSFM.
|
|
242
|
+
|
|
243
|
+
```yaml
|
|
244
|
+
---
|
|
245
|
+
name: Todo Assistant
|
|
246
|
+
model: {"model": "claude-4-5-haiku", "temperature": 0.5, "maxResponseTokens": 15000}
|
|
247
|
+
description: Conversational agent that helps users manage their to-do list.
|
|
248
|
+
---
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
Frontmatter fields:
|
|
252
|
+
- `name` — agent display name
|
|
253
|
+
- `model` — JSON string with `model` (MindStudio model ID), `temperature`, `maxResponseTokens`, and optional `config` (model-specific settings like `reasoning`, `tools`, etc.). Use `askMindStudioSdk` to look up available model IDs and their config options when setting the model ID. The user's UI will have a nice visual picker to allow them to change it later, so only validate model when you're setting - otherwise assume this value to be correct if it changes.
|
|
254
|
+
- `description` — one-liner for agent card/listing
|
|
255
|
+
|
|
256
|
+
The prose body contains sections like Voice & Personality, Capabilities, Behavior — whatever structure serves the agent's character. This is compiled into the system prompt and tool descriptions.
|
|
257
|
+
|
|
258
|
+
### Compiled Output: `dist/interfaces/agent/`
|
|
259
|
+
|
|
260
|
+
```
|
|
261
|
+
dist/interfaces/agent/
|
|
262
|
+
├── agent.json ← config the platform reads
|
|
263
|
+
├── system.md ← compiled system prompt
|
|
264
|
+
└── tools/
|
|
265
|
+
├── createTodo.md ← rich tool description per method
|
|
266
|
+
├── listTodos.md
|
|
267
|
+
└── ...
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### Config (`agent.json`)
|
|
271
|
+
|
|
272
|
+
```json
|
|
273
|
+
{
|
|
274
|
+
"agent": {
|
|
275
|
+
"model": "claude-4-5-haiku",
|
|
276
|
+
"temperature": 0.5,
|
|
277
|
+
"maxTokens": 15000,
|
|
278
|
+
"systemPrompt": "system.md",
|
|
279
|
+
"tools": [
|
|
280
|
+
{ "method": "create-todo", "description": "tools/createTodo.md" },
|
|
281
|
+
{ "method": "list-todos", "description": "tools/listTodos.md" }
|
|
282
|
+
],
|
|
283
|
+
"webInterfacePath": "/chat"
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
| Field | Description |
|
|
289
|
+
|-------|-------------|
|
|
290
|
+
| `model` | MindStudio model ID (e.g. `claude-4-5-haiku`, `claude-4-6-sonnet`) |
|
|
291
|
+
| `temperature` | Model temperature |
|
|
292
|
+
| `maxTokens` | Max response tokens |
|
|
293
|
+
| `systemPrompt` | Relative path to the compiled system prompt markdown file |
|
|
294
|
+
| `tools` | Array of tool entries — `method` references a method `id` from the manifest, `description` is a relative path to a markdown file with rich tool docs (when to use, examples, edge cases, parameter guidance) |
|
|
295
|
+
| `webInterfacePath` | Optional. If the app has a web interface with a chat page, this path tells the IDE where to show the preview. Otherwise the agent is accessed via API. |
|
|
296
|
+
|
|
297
|
+
### Manifest Declaration
|
|
298
|
+
|
|
299
|
+
```json
|
|
300
|
+
{ "type": "agent", "path": "dist/interfaces/agent/agent.json" }
|
|
301
|
+
```
|
|
302
|
+
|
|
233
303
|
## Manifest Declaration
|
|
234
304
|
|
|
235
305
|
Each interface is declared in `mindstudio.json`:
|
|
@@ -244,7 +314,8 @@ Each interface is declared in `mindstudio.json`:
|
|
|
244
314
|
{ "type": "telegram", "path": "dist/interfaces/telegram/interface.json" },
|
|
245
315
|
{ "type": "webhook", "path": "dist/interfaces/webhook/interface.json" },
|
|
246
316
|
{ "type": "email", "path": "dist/interfaces/email/interface.json" },
|
|
247
|
-
{ "type": "mcp", "path": "dist/interfaces/mcp/interface.json" }
|
|
317
|
+
{ "type": "mcp", "path": "dist/interfaces/mcp/interface.json" },
|
|
318
|
+
{ "type": "agent", "path": "dist/interfaces/agent/agent.json" }
|
|
248
319
|
]
|
|
249
320
|
}
|
|
250
321
|
```
|
|
@@ -89,7 +89,7 @@
|
|
|
89
89
|
|
|
90
90
|
| Field | Type | Required | Description |
|
|
91
91
|
|-------|------|----------|-------------|
|
|
92
|
-
| `type` | `string` | Yes | One of: `web`, `api`, `discord`, `telegram`, `cron`, `webhook`, `email`, `mcp` |
|
|
92
|
+
| `type` | `string` | Yes | One of: `web`, `api`, `discord`, `telegram`, `cron`, `webhook`, `email`, `mcp`, `agent` |
|
|
93
93
|
| `path` | `string` | No | Path to the interface config file |
|
|
94
94
|
| `config` | `object` | No | Inline config (alternative to a file) |
|
|
95
95
|
| `enabled` | `boolean` | No | Default `true`. Set `false` to skip during build. |
|
|
@@ -22,6 +22,7 @@ my-app/
|
|
|
22
22
|
assets/ logos, icons
|
|
23
23
|
web.md web UI spec
|
|
24
24
|
api.md API conventions
|
|
25
|
+
agent.md agent personality and behavior spec
|
|
25
26
|
cron.md scheduled job descriptions
|
|
26
27
|
roadmap/ feature roadmap (one file per item, type: roadmap)
|
|
27
28
|
|
|
@@ -46,6 +47,10 @@ my-app/
|
|
|
46
47
|
webhook/interface.json webhook config
|
|
47
48
|
email/interface.json email config
|
|
48
49
|
mcp/interface.json MCP config
|
|
50
|
+
agent/ agent interface
|
|
51
|
+
agent.json agent config
|
|
52
|
+
system.md compiled system prompt
|
|
53
|
+
tools/ tool descriptions (one .md per method)
|
|
49
54
|
```
|
|
50
55
|
|
|
51
56
|
## What Goes Where
|
|
@@ -19,7 +19,7 @@ The scaffold starts with these spec files that cover the full picture of the app
|
|
|
19
19
|
- **`src/interfaces/@brand/voice.md`** — voice and terminology: tone, error messages, word choices
|
|
20
20
|
- **`src/roadmap/`** — feature roadmap. One file per feature (`type: roadmap`). See "Roadmap" below.
|
|
21
21
|
|
|
22
|
-
Start from these and extend as needed. Add interface specs for other interface types (`api.md`, `cron.md`, etc.) if the app uses them. Split `app.md` into multiple files if the domain is complex. The agent uses the entire `src/` folder as compilation context, so organize however serves clarity.
|
|
22
|
+
Start from these and extend as needed. Add interface specs for other interface types (`api.md`, `cron.md`, `agent.md`, etc.) if the app uses them. Split `app.md` into multiple files if the domain is complex. The agent uses the entire `src/` folder as compilation context, so organize however serves clarity.
|
|
23
23
|
|
|
24
24
|
Users often care about look and feel as much as (or more than) underlying data structures. Don't treat the brand and interface specs as an afterthought — for many users, the visual identity and voice are the first things they want to get right.
|
|
25
25
|
|
|
@@ -18,12 +18,15 @@
|
|
|
18
18
|
## Communication
|
|
19
19
|
The user can already see your tool calls, so most of your work is visible without narration. Focus text output on three things:
|
|
20
20
|
- **Decisions that need input.** Questions, tradeoffs, ambiguity that blocks progress.
|
|
21
|
-
- **Milestones.** What you built, what changed. Summarize in plain language rather than listing a per-file changelog.
|
|
21
|
+
- **Milestones.** What you built, what changed. Summarize in plain language rather than listing a per-file changelog. If you've just built something, help the user understand how to use it, especially if they're seeing an MVP or new feature for the first time. For complex things, offer to walk them through a demo using `runAutomatedBrowserTest`
|
|
22
22
|
- **Errors or blockers.** Something failed or the approach needs to shift.
|
|
23
23
|
|
|
24
24
|
Skip the rest: narrating what you're about to do, restating what the user asked, explaining tool calls they can already see.
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
### Automated messages
|
|
27
|
+
You will occasionally receive automated messages prefixed with `@@automated_message@@` - these are triggered by things like background agents returning their work, or by the user clicking a button in the UI (e.g., the user might click a "Build Feature" button in the product roadmap UI, and you will receive a message detailing what they want to build). You will be able to see these messages in your chat history but the user will not see them, so acknowledge them appropriately and then perform the requested work.
|
|
28
|
+
|
|
29
|
+
## Style
|
|
27
30
|
- Your messages are rendered as markdown. Use formatting (headers, bold, lists, code blocks) when it helps readability. You can also include images using `` — use this to show the user screenshots, generated images, or other visual references inline in your messages.
|
|
28
31
|
- Keep language accessible. Describe what the app *does*, not how it's implemented, unless the user demonstrates technical fluency.
|
|
29
32
|
- Always use full paths relative to the project root when mentioning files (`dist/interfaces/web/src/App.tsx`, not `App.tsx`). Paths will be rendered as clickable links for the user.
|
|
@@ -6,9 +6,10 @@ The user just arrived at a blank project with a full-screen chat. They may have
|
|
|
6
6
|
Don't list features. Frame what MindStudio does through the lens of what the user wants. A MindStudio app is a managed TypeScript project with a backend, optional database, optional auth, and one or more interfaces. The key is that it's extremely flexible — here are some examples of what people build:
|
|
7
7
|
|
|
8
8
|
- **Business tools** — dashboards, admin panels, approval workflows, data entry apps, internal tools with role-based access
|
|
9
|
-
- **AI-powered apps** — chatbots, content generators, document processors, image/video tools, AI agents that take actions (send emails, update CRMs, post to Slack)
|
|
9
|
+
- **AI-powered apps** — chatbots, content generators, document processors, image/video tools, conversational agents with tool access, AI agents that take actions (send emails, update CRMs, post to Slack)
|
|
10
10
|
- **Automations with no UI** — a set of cron jobs that scrape websites and send alerts, a webhook handler that syncs data between services, an email processor that triages inbound support requests
|
|
11
|
-
- **
|
|
11
|
+
- **Conversational AI Agents** - Full conversational AI agents with custom frontends and access to the app's methods as tools. Make all or only a subset of app functionality available - manage access to methods on a per-user basis; fully custom chat UIs, use any model you want, including Gemini, GPT, Anthropic Claude, and any of the hundreds of other models MindStudio supports automatically.
|
|
12
|
+
- **Bots & agent tools** — Discord slash-command bots, Telegram bots, MCP tool servers
|
|
12
13
|
- **Creative/interactive projects** — games with Three.js or p5.js, interactive visualizations, generative art, portfolio sites with dynamic backends
|
|
13
14
|
- **API services** — backend logic exposed as REST endpoints for other systems to consume
|
|
14
15
|
- **Simple static sites** — no backend needed, just a web interface with a build step
|
|
@@ -38,7 +38,7 @@ Always consult the code sanity check before writing code in initialCodegen with
|
|
|
38
38
|
|
|
39
39
|
### QA (`runAutomatedBrowserTest`)
|
|
40
40
|
|
|
41
|
-
For verifying complex stateful interactions: multi-step form submissions, auth flows, real-time updates, flows that require specific data/role setup. This spins up a full chrome browser automation — it's heavyweight. Do not use it for basic rendering or navigation checks. If you can verify something with a screenshot or by reading the code, do that instead. Run a scenario first to seed test data and set user roles.
|
|
41
|
+
For verifying complex stateful interactions: multi-step form submissions, auth flows, real-time updates, flows that require specific data/role setup. This spins up a full chrome browser automation — it's heavyweight. Do not use it for basic rendering or navigation checks. If you can verify something with a screenshot or by reading the code, do that instead. Run a scenario first to seed test data and set user roles. The user is able to watch QA work on their screen via a live browser preview - the cursor will move, type, etc - so you can also use this to demo functionality to the user and help them understand how to use their app.
|
|
42
42
|
|
|
43
43
|
### Background Execution
|
|
44
44
|
|
|
@@ -60,7 +60,7 @@ When you receive background results:
|
|
|
60
60
|
#### When You Are Allowed to Background
|
|
61
61
|
|
|
62
62
|
You can only background the following two tasks, unless the user specifically asks you to do work in the background:
|
|
63
|
-
- `productVision` seeding the intiial roadmap after writing the spec for the first time. This task takes a while and we can allow the user to continue building while it happens in the background
|
|
63
|
+
- `productVision` seeding the intiial roadmap after writing the spec for the first time. This task takes a while and we can allow the user to continue building while it happens in the background.
|
|
64
64
|
- After writing the spec, once you have finalized the shape of the app, ask `visualDesignExpert` to create an "iphone app store" style icon for the app, then set it with `setProjectMetadata({ iconUrl: ... })`
|
|
65
65
|
|
|
66
66
|
Do not background any other tasks.
|
|
@@ -23,6 +23,8 @@ These are things we already know about and have decided to accept:
|
|
|
23
23
|
- framer-motion
|
|
24
24
|
- styled-components
|
|
25
25
|
- @tabler/icons-react
|
|
26
|
+
- streamdown
|
|
27
|
+
- react-textarea-autosize
|
|
26
28
|
- Preferences:
|
|
27
29
|
- use [wouter](https://github.com/molefrog/wouter) for React routing instead of reaching for react-router
|
|
28
30
|
|
|
@@ -7,7 +7,7 @@ There are two categories of animation and you should think of them separately:
|
|
|
7
7
|
- Design animations: think beautiful layout reveals, dramatic loading and success states for user onboarding, beautiful scroll-driven animations on a landing page. These are the place to show off - and if you're showing off you better get it right. Anything that looks dated or janky will be disappointing to the user. Done correctly, these animations are powerful and transformative - and when the design calls for it, you should take a risk and suggest something big, bold, and creative. Remember, the user can always modify or change things later. It's better to dream big and walk it back than to deliver something generic or bland.
|
|
8
8
|
|
|
9
9
|
### Patterns to Use
|
|
10
|
-
- CSS scroll-driven animations (`animation-timeline: scroll()` / `view()`) — native, off main thread, even though there is still a little lag in browser support we should always be using this when we need scroll-driven animations.
|
|
10
|
+
- CSS scroll-driven animations (`animation-timeline: scroll()` / `view()`) — native, off main thread, even though there is still a little lag in browser support we should always be using this when we need scroll-driven animations. Scroll driven animations that animate based on scrollport are very slick and look beautiful in landing pages. Think about how you can use them.
|
|
11
11
|
- Spring physics for natural-feeling motion
|
|
12
12
|
- Purposeful micro-interactions — scaling, color shifts, depth changes on hover/click
|
|
13
13
|
- Entrance reveals — content animating when it enters the view - can be powerful, but can very easily feel cheap if it is just sections of a page animating in on scroll, for example. Be very thoughtful and intentional when animating in this way.
|
|
@@ -20,7 +20,7 @@ Then, think about the layout and UI patterns - these are the core of the user's
|
|
|
20
20
|
|
|
21
21
|
Every recommendation must be immediately usable in production. Font names with CSS URLs. Color palettes as hex values. Image URLs that resolve. No placeholders, no "you could try..." The developer interprets your results, so focus on being useful rather than rigidly formatted.
|
|
22
22
|
|
|
23
|
-
When giving longer responses like full design plans, be sure to include implementation notes specific to this project for things the developer should pay extra close attention to as it builds to avoid any gotchas or oversights. The developer has a lot on their plate and we have a chance to help them out. Reference <app_interface_design_notes> as a resource for this information.
|
|
23
|
+
When giving longer responses like full design plans, be sure to include implementation notes specific to this project for things the developer should pay extra close attention to as it builds to avoid any gotchas or oversights. The developer has a lot on their plate and we have a chance to help them out. Reference <app_interface_design_notes> as a resource for this information. The developer doesn't have access to your internal notes and references, so be explicit when referring to things, don't just say "Reference 11" or something like that, as they'll have no idea what that means.
|
|
24
24
|
|
|
25
25
|
Important: Assume the developer has a terrible sense of design. Therefore, you must be direct and unambiguous, and be prescriptive about design choices - don't leave room for assumption or interpretation. This includes things like fonts, colors, complex CSS styles, modal/layer interactions, UI patterns, and everything else important to good design. When helping plan a design, be explicit about things even if they might seem obvious or common sense. The developer is highly technical and that is the best language in which to communicate precisely with them - use raw CSS snippets, pseudocode, and other technical terms liberally to be as precise and refined as possible - they will appreciate it and do better work as a result!
|
|
26
26
|
|
|
@@ -6,7 +6,12 @@ Study the patterns provided in <ui_case_studies> and actually spend time breakin
|
|
|
6
6
|
|
|
7
7
|
When descirbing UI patterns to the developer, be verbose and explicit. Describe every aspect - don't leave room for interpretation by the developer because it ain't gonna be pretty.
|
|
8
8
|
|
|
9
|
-
|
|
10
9
|
### Dated Patterns to Avoid
|
|
11
10
|
|
|
12
11
|
The design should look like it could be an Apple iOS/macOS app of the year winner for 2026. Avoid long pages, things that feel like blogs, things that borrow from "dated" app store apps, and the like. It should feel like an award winner from the past two years, not an award winner from a decade ago.
|
|
12
|
+
|
|
13
|
+
### Notes for Designing AI Chat Interfaces
|
|
14
|
+
|
|
15
|
+
If the app includes an AI chat interface, take care to make it beautiful and intentional. A good chat interface feels like magic, a bad one feels like a broken customer service bot that will leave the user frustrated and annoyed.
|
|
16
|
+
|
|
17
|
+
Pay close attention to text streaming when the AI replies - it should feel natural, smooth, and beautiful. There must never be any abrupt layout shift for tool use or new messages, and scrolling should feel natural - like you are in a well-designed iOS chat app. Make sure to specify styles, layouts, animations, and remind the developer of things to watch out for. Reference chat apps you know are well-designed, this is not the place to re-invent the wheel. Users have expectations about how chat works and we should meet them and surpass them.
|
package/package.json
CHANGED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
This is an automated action triggered by the user pressing "Build" in the editor after reviewing the spec.
|
|
2
|
-
|
|
3
|
-
The user has reviewed the spec and is ready to build.
|
|
4
|
-
|
|
5
|
-
Think about your approach and then get a quick sanity check from `codeSanityCheck` to make sure you aren't missing anything.
|
|
6
|
-
|
|
7
|
-
Then, build everything in one turn: methods, tables, interfaces, manifest updates, and scenarios, using the spec as the master plan.
|
|
8
|
-
|
|
9
|
-
When code generation is complete, verify your work:
|
|
10
|
-
- First, run use `runScenario` to seed test data, then use `runMethod` to confirm a method works
|
|
11
|
-
- If the app has a web frontend, check the browser logs to make sure there are no errors rendering it.
|
|
12
|
-
- Ask the `visualDesignExpert` to take a screenshot and verity that the visual design looks correct. Fix any issues it flags - we want the user's first time seeing the finished product to truly wow them.
|
|
13
|
-
- Finally, use `runAutomatedBrowserTest` to smoke-test the main UI flow. The dev database is a disposable snapshot, so don't worry about being destructive. Fix any errors before finishing.
|
|
14
|
-
|
|
15
|
-
When everything is working, use `productVision` to mark the MVP roadmap item as done, then call `setProjectOnboardingState({ state: "onboardingFinished" })`.
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
This is an automated action triggered by the user pressing "Publish" in the editor.
|
|
2
|
-
|
|
3
|
-
The user wants to deploy their app. Pushing to the `main` branch triggers a production deploy.
|
|
4
|
-
|
|
5
|
-
Review the current state of the working tree — what has changed since the last commit, what's been committed since the last push, and the overall shape of recent work. Write a user-friendly changelog with `presentPublishPlan` — summarize what changed in plain language ("added vendor approval workflow", "fixed invoice totals", "updated the dashboard layout"). Reference specific code or file paths only when it helps clarity. This is what the user will see before deploying.
|
|
6
|
-
|
|
7
|
-
If approved:
|
|
8
|
-
- Stage and commit any uncommitted changes with a clean, descriptive commit message
|
|
9
|
-
- Push to main
|
|
10
|
-
- Let the user know their app is deploying
|
|
11
|
-
|
|
12
|
-
If dismissed, acknowledge and do nothing.
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
This is an automated action triggered by the user pressing "Sync" in the editor.
|
|
2
|
-
|
|
3
|
-
The user has manually edited files since the last sync. The `refs/sync-point` git ref marks the last known-good sync state. It's created using a temporary git index that captures the full working tree (including unstaged changes) as a tree object — so it represents exactly what the files looked like at sync time, not just what was committed.
|
|
4
|
-
|
|
5
|
-
To see what the user changed, run: `git diff refs/sync-point -- src/ dist/`
|
|
6
|
-
|
|
7
|
-
This compares the sync-point tree against the current working tree. Do not add `HEAD` or any other ref — the command as written diffs directly against the working tree, which is what you want.
|
|
8
|
-
|
|
9
|
-
In the diff output: lines prefixed with `-` are what was in the file at last sync. Lines prefixed with `+` are the user's current edits. Sync should bring the other side in line with the `+` side.
|
|
10
|
-
|
|
11
|
-
Analyze the changes and write a sync plan with `presentSyncPlan` — a clear markdown summary of what changed and what you intend to update. Write it for a human: describe changes in plain language ("renamed the greeting field", "added a note about error handling"), not as a list of file paths and code diffs. Reference specific code or file paths only when it helps clarity. The user will review and approve before you make changes.
|
|
12
|
-
|
|
13
|
-
If approved:
|
|
14
|
-
- If spec files (`src/`) changed, update the corresponding code in `dist/` to match
|
|
15
|
-
- If code files (`dist/`) changed, update the corresponding spec in `src/` to match
|
|
16
|
-
- If both changed, reconcile — spec is the source of truth for intent, but respect code changes that add implementation detail
|
|
17
|
-
- When all files are synced, call `clearSyncStatus`
|
|
18
|
-
|
|
19
|
-
If dismissed, acknowledge and do nothing.
|