@mindstudio-ai/remy 0.1.119 → 0.1.121

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/headless.js CHANGED
@@ -382,6 +382,8 @@ Current date: ${now}
382
382
 
383
383
  <mindstudio_agent_sdk_docs>
384
384
  {{compiled/sdk-actions.md}}
385
+
386
+ {{compiled/task-agents.md}}
385
387
  </mindstudio_agent_sdk_docs>
386
388
 
387
389
  <mindstudio_flavored_markdown_spec_docs>
@@ -5269,16 +5271,6 @@ async function runTurn(params) {
5269
5271
  apiConfig,
5270
5272
  getContext: () => {
5271
5273
  const parts = [];
5272
- if (userMessage) {
5273
- parts.push(`User message: ${userMessage.slice(-200)}`);
5274
- }
5275
- if (onboardingState) {
5276
- parts.push(`Build phase: ${onboardingState}`);
5277
- }
5278
- const text = subAgentText || getTextContent(contentBlocks).slice(-500);
5279
- if (text) {
5280
- parts.push(`Assistant text: ${text}`);
5281
- }
5282
5274
  const toolName = currentToolNames || getToolCalls(contentBlocks).filter((tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)).at(-1)?.name || lastCompletedTools;
5283
5275
  if (toolName) {
5284
5276
  parts.push(`Tool: ${toolName}`);
@@ -5289,6 +5281,16 @@ async function runTurn(params) {
5289
5281
  if (lastCompletedResult) {
5290
5282
  parts.push(`Tool result: ${lastCompletedResult.slice(-200)}`);
5291
5283
  }
5284
+ const text = subAgentText || getTextContent(contentBlocks).slice(-500);
5285
+ if (text) {
5286
+ parts.push(`Assistant text: ${text}`);
5287
+ }
5288
+ if (onboardingState && onboardingState !== "onboardingFinished") {
5289
+ parts.push(`Build phase: ${onboardingState}`);
5290
+ }
5291
+ if (userMessage) {
5292
+ parts.push(`User request: ${userMessage.slice(-100)}`);
5293
+ }
5292
5294
  return parts.join("\n");
5293
5295
  },
5294
5296
  onStatus: (label) => onEvent({ type: "status", message: label }),
package/dist/index.js CHANGED
@@ -5319,16 +5319,6 @@ async function runTurn(params) {
5319
5319
  apiConfig,
5320
5320
  getContext: () => {
5321
5321
  const parts = [];
5322
- if (userMessage) {
5323
- parts.push(`User message: ${userMessage.slice(-200)}`);
5324
- }
5325
- if (onboardingState) {
5326
- parts.push(`Build phase: ${onboardingState}`);
5327
- }
5328
- const text = subAgentText || getTextContent(contentBlocks).slice(-500);
5329
- if (text) {
5330
- parts.push(`Assistant text: ${text}`);
5331
- }
5332
5322
  const toolName = currentToolNames || getToolCalls(contentBlocks).filter((tc) => !STATUS_EXCLUDED_TOOLS.has(tc.name)).at(-1)?.name || lastCompletedTools;
5333
5323
  if (toolName) {
5334
5324
  parts.push(`Tool: ${toolName}`);
@@ -5339,6 +5329,16 @@ async function runTurn(params) {
5339
5329
  if (lastCompletedResult) {
5340
5330
  parts.push(`Tool result: ${lastCompletedResult.slice(-200)}`);
5341
5331
  }
5332
+ const text = subAgentText || getTextContent(contentBlocks).slice(-500);
5333
+ if (text) {
5334
+ parts.push(`Assistant text: ${text}`);
5335
+ }
5336
+ if (onboardingState && onboardingState !== "onboardingFinished") {
5337
+ parts.push(`Build phase: ${onboardingState}`);
5338
+ }
5339
+ if (userMessage) {
5340
+ parts.push(`User request: ${userMessage.slice(-100)}`);
5341
+ }
5342
5342
  return parts.join("\n");
5343
5343
  },
5344
5344
  onStatus: (label) => onEvent({ type: "status", message: label }),
@@ -5942,6 +5942,8 @@ Current date: ${now}
5942
5942
 
5943
5943
  <mindstudio_agent_sdk_docs>
5944
5944
  {{compiled/sdk-actions.md}}
5945
+
5946
+ {{compiled/task-agents.md}}
5945
5947
  </mindstudio_agent_sdk_docs>
5946
5948
 
5947
5949
  <mindstudio_flavored_markdown_spec_docs>
@@ -1,6 +1,6 @@
1
1
  # Methods
2
2
 
3
- A method is a named async function that runs on the platform. It's the universal unit of backend logic — every interface (web, API, Discord, cron, webhook) invokes methods. Methods run in isolated sandboxes. One file per method, one named export.
3
+ A method is a named async function that runs on the platform. It's the universal unit of backend logic — every interface (web, API, Discord, cron, webhook) invokes methods. One file per method, one named export.
4
4
 
5
5
  ## Writing a Method
6
6
 
@@ -192,6 +192,37 @@ export async function createPurchaseOrder(input: {
192
192
  }
193
193
  ```
194
194
 
195
+ ## Fire-and-Forget Background Tasks
196
+
197
+ A method can return immediately while kicking off slow work (like `runTask()`) that continues in the background. Don't await the slow call — use `.then()` / `.catch()` to update the record when it completes, and return an early result to the caller. The frontend polls the record's status to track progress.
198
+
199
+ ```typescript
200
+ export async function enrichRestaurant(input: { id: string; name: string }) {
201
+ await Restaurants.update(input.id, { status: 'enriching' });
202
+
203
+ // Fire — don't await
204
+ agent.runTask<RestaurantData>({
205
+ prompt: '...',
206
+ input: { name: input.name },
207
+ tools: ['searchGoogle', 'fetchUrl', 'generateImage'],
208
+ structuredOutputExample: { /* ... */ },
209
+ model: 'claude-4-6-sonnet',
210
+ }).then(async (result) => {
211
+ if (result.parsedSuccessfully) {
212
+ await Restaurants.update(input.id, { ...result.output, status: 'complete' });
213
+ } else {
214
+ await Restaurants.update(input.id, { status: 'failed' });
215
+ }
216
+ }).catch(async () => {
217
+ await Restaurants.update(input.id, { status: 'failed' });
218
+ });
219
+
220
+ return { status: 'enriching' };
221
+ }
222
+ ```
223
+
224
+ This works because the execution environment persists between requests. The un-awaited promise continues after the method returns. DB, auth, and SDK all work normally in the background chain. For critical workflows, write a "pending" record before firing so incomplete tasks can be detected and retried.
225
+
195
226
  ## Shared Helpers
196
227
 
197
228
  Code shared between methods goes in `dist/methods/src/common/`. Helpers are not listed in the manifest — they're internal, imported by methods but not directly invocable.
@@ -94,7 +94,7 @@ const { vendor } = await api.approveVendor({ vendorId: '...' });
94
94
  - **Managed databases.** SQLite with typed schemas. Push a schema change and the platform diffs, migrates, and promotes atomically.
95
95
  - **Built-in auth.** Opt-in via manifest. Developer builds login UI, platform handles verification codes (email/SMS), cookie sessions, and role enforcement. Backend methods use `auth.requireRole('admin')` for access control.
96
96
  - **Multiple interfaces, one codebase.** Web, API, Discord, Telegram, Cron, Webhook, Email, MCP — all invoke the same methods. Methods don't know which interface called them.
97
- - **Sandboxed execution.** Methods run in isolated sandboxes with npm packages pre-installed.
97
+ - **Sandboxed execution.** Each method invocation runs in its own isolated execution context with npm packages pre-installed.
98
98
  - **Git-native deployment.** Push to default branch to deploy. Push to feature branch for preview. Rollback is a git revert.
99
99
 
100
100
  ## Minimum Viable App
@@ -157,3 +157,7 @@ MindStudio SDK allows us to build all kinds of amazing AI experiences in apps, i
157
157
  - Detailed image and video analysis
158
158
 
159
159
  Consider the ways in which AI can be incorporated into backend methods to solve problems and be creative.
160
+
161
+ ### Task Agents
162
+
163
+ For multi-step tasks where the model needs to autonomously compose actions (research + scrape + generate, enrichment pipelines, content creation), use `runTask()` instead of chaining actions manually. It runs an agent loop with the SDK actions as tools and returns structured JSON. See the task agents reference for full details.
@@ -0,0 +1,187 @@
1
+ # Task Agents (`MindStudioAgent runTask`)
2
+
3
+ A user types the name of a restaurant into your app, or uploads a photo of a storefront. The API call returns early, and in the background, a task agent searches Google, finds the official website, scrapes the address, gets the official social media accounts, and generates a stylized watercolor postcard of the exterior from images it found online. The user gets back a rich, illustrated card with the canonical name, website, address, and a custom image. A few tool calls (some in parallel), fully autonomous.
4
+
5
+ `runTask()` makes this possible. It runs a multi-step, tool-use agent loop: give it a prompt, a set of SDK actions as tools, and an example of the structured output you want. The platform runs the loop (calling the model, executing tool calls, feeding results back) until the model produces JSON matching your output example. The model decides what to do next based on intermediate results — retrying searches with different terms, working around failed tools, batching independent calls in parallel.
6
+
7
+ This is one of the most powerful pieces of the MindStudio SDK and can make turn apps from amazing into truly magical. Use `askMindStudioSdk` to help construct the perfect agent for a task.
8
+
9
+ ## When to Use
10
+
11
+ This is the tool to reach for whenever a feature would be dramatically more compelling if the app could autonomously research, enrich, or create on behalf of the user. Think about the difference between "user enters a restaurant name and it gets saved" vs. "user enters a restaurant name and gets back a fully researched, illustrated card." Task agents close that gap.
12
+
13
+ Run tasks in the background — depending on complexity they can take time to complete. Return an early partial result to the user and upsert later with the final result when the agent finishes.
14
+
15
+ - **Research and enrichment:** "Given this email, find the person's LinkedIn, role, company, and a headshot" — the model searches, scrapes, extracts, and assembles structured data.
16
+ - **Content creation pipelines:** "Write SEO copy for this product in 3 languages, generate a hero image, extract keywords" — the model calls text generation, image generation, and analysis actions as needed.
17
+ - **Data processing with judgment:** "Given this restaurant name, find the canonical name, website, address, and create a stylized illustration" — the model searches, verifies, generates, and returns clean structured output.
18
+ - **Any multi-step task with branching logic:** If the model might need to retry a search with different terms, try a different approach when one fails, or make decisions based on intermediate results.
19
+
20
+ ## When NOT to Use
21
+
22
+ - **Simple linear pipelines (2-3 steps, no branching):** Just call the SDK actions directly in sequence. `runTask()` adds overhead from the model reasoning about what to do next.
23
+ - **Chat/conversation:** Use an Agent interface instead. Task agents are single-shot, no persistent conversation history.
24
+ - **One-off text generation:** Just use `generateText()` directly.
25
+
26
+ ## Usage
27
+
28
+ ```typescript
29
+ import { MindStudioAgent } from '@mindstudio-ai/agent';
30
+
31
+ const agent = new MindStudioAgent();
32
+
33
+ const result = await agent.runTask<{
34
+ name: string;
35
+ url: string;
36
+ address: string;
37
+ photoUrl: string;
38
+ }>({
39
+ prompt: `You are a restaurant research assistant. Given a restaurant name,
40
+ find its canonical name, website URL, full address, and create a stylized
41
+ watercolor illustration of the restaurant exterior.`,
42
+
43
+ input: { restaurantName: 'Tartine Bakery SF' },
44
+
45
+ tools: [
46
+ 'searchGoogle',
47
+ 'fetchUrl',
48
+ { method: 'generateImage', defaults: { imageModelOverride: { model: 'seedream-4.5' } } },
49
+ ],
50
+
51
+ structuredOutputExample: {
52
+ name: 'Tartine Bakery',
53
+ url: 'https://tartinebakery.com',
54
+ address: '600 Guerrero St, San Francisco, CA 94110',
55
+ photoUrl: 'https://cdn.mindstudio.ai/...',
56
+ },
57
+
58
+ model: 'claude-4-6-sonnet',
59
+ maxTurns: 15,
60
+ });
61
+
62
+ // Always validate before using output
63
+ if (!result.parsedSuccessfully) {
64
+ console.error('Task failed to produce structured output:', result.outputRaw);
65
+ throw new Error('Task agent failed');
66
+ }
67
+
68
+ console.log(result.output.name); // 'Tartine Bakery'
69
+ console.log(result.output.photoUrl); // URL to the generated illustration
70
+ ```
71
+
72
+ ## Always Validate Output
73
+
74
+ `runTask()` can return successfully with garbage output — fields null, data echoed back, or raw text instead of JSON. The result includes `parsedSuccessfully` to make this explicit. Always check it before using the output:
75
+
76
+ ```typescript
77
+ const result = await agent.runTask<MyType>({ ... });
78
+
79
+ if (!result.parsedSuccessfully) {
80
+ console.error('Task output was not valid JSON:', result.outputRaw);
81
+ throw new Error('Task agent failed to produce structured output');
82
+ }
83
+
84
+ // Now safe to use result.output
85
+ await Table.update(id, result.output);
86
+ ```
87
+
88
+ ## Tool Configuration
89
+
90
+ Tools are SDK action names. The model gets the full input schema for each tool so it knows what parameters to pass. Only include tools the task actually needs — the model may use extra tools unnecessarily.
91
+
92
+ Use tool defaults for model/config choices. Use the prompt for task-level instructions.
93
+
94
+ ```typescript
95
+ tools: [
96
+ // Simple — just the action name
97
+ 'searchGoogle',
98
+ 'fetchUrl',
99
+ 'scrapeUrl',
100
+
101
+ // With defaults — override specific input fields while letting the model control the rest
102
+ { method: 'generateImage', defaults: { imageModelOverride: { model: 'seedream-4.5' } } },
103
+ { method: 'analyzeImage', defaults: { visionModelOverride: { model: 'gemini-3-flash' } } },
104
+ ]
105
+ ```
106
+
107
+ When the model calls a tool, the platform deep-merges the model's arguments with the developer's defaults. The model decides what to do (prompt, query, parameters), the developer controls which model/config to use. If the model needs to search and generate an image and those are independent, it will call both tools in the same turn (parallel execution server-side).
108
+
109
+ ## Options
110
+
111
+ | Field | Required | Default | Description |
112
+ |-------|----------|---------|-------------|
113
+ | `prompt` | Yes | — | System prompt defining the agent's behavior |
114
+ | `input` | Yes | — | Structured input (passed as user message) |
115
+ | `tools` | Yes | — | SDK action names with optional defaults |
116
+ | `structuredOutputExample` | Yes | — | Object or JSON string showing expected output shape. Use realistic example values, not placeholders like `'string'` |
117
+ | `model` | Yes | — | Model ID (must support tool use) |
118
+ | `maxTurns` | No | 10 | Max loop iterations (capped at 25) |
119
+ | `onEvent` | No | — | SSE event callback for real-time streaming |
120
+
121
+ ## Models
122
+
123
+ Use `askMindStudioSdk` for appropriate models given the task and its complexity.
124
+
125
+ ## Return Value
126
+
127
+ ```typescript
128
+ interface RunTaskResult<T> {
129
+ output: T; // Parsed structured output matching your example
130
+ outputRaw: string; // Raw model text before JSON parse
131
+ parsedSuccessfully: boolean; // Whether output was valid JSON
132
+ turns: number; // Number of loop iterations used
133
+ usage: {
134
+ inputTokens: number;
135
+ outputTokens: number;
136
+ totalBillingCost: number;
137
+ };
138
+ toolCalls: Array<{ // Execution log for debugging
139
+ name: string;
140
+ success: boolean;
141
+ durationMs: number;
142
+ }>;
143
+ }
144
+ ```
145
+
146
+ When something goes wrong, `toolCalls` is the first thing to check. If it's empty, the model never used any tools (prompt probably isn't clear enough). If a tool failed, the model may have worked around it or produced garbage.
147
+
148
+ ## Streaming
149
+
150
+ Pass an `onEvent` callback to get real-time events:
151
+
152
+ ```typescript
153
+ const result = await agent.runTask({
154
+ // ... same options ...
155
+ onEvent: (event) => {
156
+ if (event.type === 'text') console.log('Agent:', event.text);
157
+ if (event.type === 'tool_call_start') console.log(`Calling ${event.name}...`);
158
+ if (event.type === 'tool_call_result') console.log('Result:', event.output);
159
+ },
160
+ });
161
+ ```
162
+
163
+ Event types: `text`, `thinking`, `thinking_complete`, `tool_use`, `tool_input_delta`, `tool_input_args`, `tool_call_start`, `tool_call_result`, `error`, `done`.
164
+
165
+ Without `onEvent`, the SDK uses async polling (returns silently when complete). In dev mode (via the dev tunnel), progress and results are automatically logged to console with no setup needed.
166
+
167
+ ## Error Handling
168
+
169
+ - Model produces non-JSON output: retried automatically if turns remain
170
+ - Tool execution fails: error fed back to model, it can retry or work around it
171
+ - Max turns exceeded: one final forced output attempt with tools disabled
172
+ - If output still can't be parsed: `parsedSuccessfully` will be `false`, raw text available in `outputRaw`
173
+
174
+ ```typescript
175
+ try {
176
+ const result = await agent.runTask({ ... });
177
+ if (!result.parsedSuccessfully) {
178
+ // Task completed but output wasn't valid JSON
179
+ console.error('Raw output:', result.outputRaw);
180
+ console.error('Tool calls:', result.toolCalls);
181
+ }
182
+ } catch (err) {
183
+ if (err instanceof MindStudioError) {
184
+ // err.code: 'task_execution_error' | 'poll_token_expired' | 'stream_error'
185
+ }
186
+ }
187
+ ```
@@ -28,6 +28,8 @@ Process logs are available at .logs/ in NDJSON format (one JSON object per line)
28
28
  ### MindStudio SDK
29
29
  For any work involving AI models, external actions (web scraping, email, SMS), or third-party API/OAuth connections, prefer the `@mindstudio-ai/agent` SDK. It removes the need to research API methods, configure keys and tokens, or require the user to set up developer accounts.
30
30
 
31
+ For multi-step tasks with branching logic (research, enrichment, content pipelines), use `runTask()` instead of manually chaining SDK actions. It runs an autonomous agent loop that composes actions, retries on failure, and returns structured JSON. See the task agents reference for details.
32
+
31
33
  ### Auth
32
34
  - Not every app needs auth, and even for apps that do need auth, not every screen needs auth. Think intentionally about places where auth is required. Don't make auth be the first thing a user sees - that's jarring. Only show auth at intuitive and natural moments in the user's journey - be thoughtful about how to implement auth in the UI.
33
35
  - Frontend interfaces are always untrusted. Always enforce auth in backend methods. Use frontend auth and role information as a hint to conditionally show/hide UI to make the experience pleasant and seamless for users depending on their state, but remember to always use backend methods for gating data that is conditional on auth.
@@ -10,7 +10,7 @@ MindStudio apps are full-stack TypeScript projects. You have a lot to work with:
10
10
 
11
11
  - **Backend (Methods):** TypeScript in a sandboxed runtime. Any npm package. Managed SQLite database with typed schemas and automatic migrations. Built-in app-managed auth with email/SMS verification, cookie sessions, and role enforcement. None of these are required — use what the app needs.
12
12
  - **Frontend (Web Interface):** Starts as Vite + React, but any TypeScript project with a build command works. Any framework, any library, or no framework at all.
13
- - **AI & integrations:** The `@mindstudio-ai/agent` SDK gives access to 200+ AI models (OpenAI, Anthropic, Google, Meta, Mistral, and more) and 1000+ integrations (email, SMS, Slack, HubSpot, Google Workspace, web scraping, image/video generation, media processing) with zero configuration — credentials are handled automatically. No API keys needed. This SDK is really robust and used in production by 100k+ users and their AI agents.
13
+ - **AI & integrations:** The `@mindstudio-ai/agent` SDK gives access to 200+ AI models (OpenAI, Anthropic, Google, Meta, Mistral, and more) and 1000+ integrations (email, SMS, Slack, HubSpot, Google Workspace, web scraping, image/video generation, media processing) with zero configuration — credentials are handled automatically. No API keys needed. Beyond individual actions, `runTask()` lets you spin up lightweight autonomous task agents that chain these actions together with judgment — e.g., a user types a restaurant name and the backend autonomously researches it in the background, finds the address, and generates a custom illustration. Think about where this kind of enrichment would make a feature go from functional to magical.
14
14
  - **Interfaces:** Web UI, REST API, cron jobs, webhooks, Discord bots, Telegram bots, MCP tool servers, email processors, conversational AI agents — all backed by the same methods. An app can use any combination.
15
15
 
16
16
  This is a capable, stable platform. Build with confidence; you're building production-grade apps, not fragile prototypes.
@@ -58,6 +58,10 @@ When a plan includes multiple screens/API calls, always note this item for the d
58
58
 
59
59
  - **Auth state read once instead of subscribed.** If the plan reads `auth.currentUser` or `auth.getCurrentUser()` in a `useState` initializer, at component top-level, or in a one-time check, the UI won't update after login/logout. The correct pattern is `auth.onAuthStateChanged(cb)` which fires immediately and on every auth transition. Flag if you see auth state read without a subscription.
60
60
 
61
+ - **Manual multi-step MindStudio SDK action chains that should be `runTask()`.** If a method chains AI-driven SDK actions with branching logic (search, then scrape based on results, then generate based on what was scraped), that's a `runTask()` use case. `runTask()` runs an agent loop that autonomously calls SDK actions as tools and returns structured JSON. The developer writes a prompt and an output example instead of imperative code. Flag when you see methods with complex sequential/branching SDK action chains — especially research, enrichment, or content generation pipelines. Similarly, flag opportunities where the developer might not have realized they could get better and richer data via runTask - it's a really powerful lever for working with data (e.g., user provides some fragment and agent task goes off and enriches it) that the developer might not have remembered when planning their work.
62
+
63
+ - **MindStudio SDK `runTask()` output used without validation.** `runTask()` can return successfully with garbage output (null fields, echoed input, raw text). The result includes `parsedSuccessfully` — if the plan uses `result.output` without checking `result.parsedSuccessfully` first, flag it. This is the #1 footgun with task agents.
64
+
61
65
  - **Layout shift with dynamic data or AI generated text** If the plan includes dynamically-sized data (e.g., a wizard form with questions of differing lengths) or AI generated text (where text stream length is unpredictable), make sure to flag concerns about layout stability. Everything must either be a fixed size or smoothly animate between sizes. Text can never be clipped by a container or cause layout to jump around or grow in snappy/janky ways. Make sure to remind the developer that this is important to pay attention to.
62
66
 
63
67
  ## When to stay quiet
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mindstudio-ai/remy",
3
- "version": "0.1.119",
3
+ "version": "0.1.121",
4
4
  "description": "MindStudio coding agent",
5
5
  "repository": {
6
6
  "type": "git",