@mindstudio-ai/remy 0.1.138 → 0.1.140
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/headless.js +65 -22
- package/dist/index.js +65 -22
- package/dist/prompt/compiled/methods.md +40 -6
- package/dist/prompt/static/coding.md +2 -0
- package/package.json +1 -1
package/dist/headless.js
CHANGED
|
@@ -2352,7 +2352,30 @@ var listDirTool = {
|
|
|
2352
2352
|
const capped = children.slice(0, MAX_CHILDREN);
|
|
2353
2353
|
for (const child of capped) {
|
|
2354
2354
|
if (child.isDirectory()) {
|
|
2355
|
-
|
|
2355
|
+
const [childDisplay, childFinalPath] = await collapsePath(
|
|
2356
|
+
finalPath,
|
|
2357
|
+
child.name
|
|
2358
|
+
);
|
|
2359
|
+
lines.push(` ${childDisplay}/`);
|
|
2360
|
+
try {
|
|
2361
|
+
const grandchildren = await readAndSort(childFinalPath);
|
|
2362
|
+
const gcCapped = grandchildren.slice(0, MAX_CHILDREN);
|
|
2363
|
+
for (const gc of gcCapped) {
|
|
2364
|
+
if (gc.isDirectory()) {
|
|
2365
|
+
lines.push(` ${gc.name}/`);
|
|
2366
|
+
} else {
|
|
2367
|
+
lines.push(
|
|
2368
|
+
await formatFile(childFinalPath, gc.name, " ")
|
|
2369
|
+
);
|
|
2370
|
+
}
|
|
2371
|
+
}
|
|
2372
|
+
if (grandchildren.length > MAX_CHILDREN) {
|
|
2373
|
+
lines.push(
|
|
2374
|
+
` ... and ${grandchildren.length - MAX_CHILDREN} more`
|
|
2375
|
+
);
|
|
2376
|
+
}
|
|
2377
|
+
} catch {
|
|
2378
|
+
}
|
|
2356
2379
|
} else {
|
|
2357
2380
|
lines.push(await formatFile(finalPath, child.name, " "));
|
|
2358
2381
|
}
|
|
@@ -2865,8 +2888,10 @@ async function runSubAgent(config) {
|
|
|
2865
2888
|
requestId,
|
|
2866
2889
|
history,
|
|
2867
2890
|
background,
|
|
2868
|
-
onBackgroundComplete
|
|
2891
|
+
onBackgroundComplete,
|
|
2892
|
+
captureArtifacts
|
|
2869
2893
|
} = config;
|
|
2894
|
+
const artifacts = {};
|
|
2870
2895
|
const bgAbort = background ? new AbortController() : null;
|
|
2871
2896
|
const signal = background ? bgAbort.signal : parentSignal;
|
|
2872
2897
|
const agentName = subAgentId || "sub-agent";
|
|
@@ -3038,7 +3063,12 @@ ${partial}` : "[INTERRUPTED] Agent was interrupted before producing output.",
|
|
|
3038
3063
|
if (stopReason !== "tool_use" || toolCalls.length === 0) {
|
|
3039
3064
|
statusWatcher.stop();
|
|
3040
3065
|
const text = getPartialText(contentBlocks);
|
|
3041
|
-
|
|
3066
|
+
const hasArtifacts = Object.keys(artifacts).length > 0;
|
|
3067
|
+
return {
|
|
3068
|
+
text,
|
|
3069
|
+
messages: thisInvocation(),
|
|
3070
|
+
...hasArtifacts ? { artifacts } : {}
|
|
3071
|
+
};
|
|
3042
3072
|
}
|
|
3043
3073
|
log5.info("Tools executing", {
|
|
3044
3074
|
requestId,
|
|
@@ -3149,6 +3179,12 @@ ${partial}` : "[INTERRUPTED] Agent was interrupted before producing output.",
|
|
|
3149
3179
|
if (innerMsgs) {
|
|
3150
3180
|
block.subAgentMessages = innerMsgs;
|
|
3151
3181
|
}
|
|
3182
|
+
if (captureArtifacts?.includes(block.name) && !r.isError) {
|
|
3183
|
+
try {
|
|
3184
|
+
artifacts[block.name] = JSON.parse(r.result);
|
|
3185
|
+
} catch {
|
|
3186
|
+
}
|
|
3187
|
+
}
|
|
3152
3188
|
}
|
|
3153
3189
|
messages.push({
|
|
3154
3190
|
role: "user",
|
|
@@ -3480,13 +3516,22 @@ var browserAutomationTool = {
|
|
|
3480
3516
|
}
|
|
3481
3517
|
return result2;
|
|
3482
3518
|
},
|
|
3483
|
-
toolRegistry: context.toolRegistry
|
|
3519
|
+
toolRegistry: context.toolRegistry,
|
|
3520
|
+
captureArtifacts: ["screenshotFullPage"]
|
|
3484
3521
|
});
|
|
3485
3522
|
try {
|
|
3486
3523
|
await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
|
|
3487
3524
|
} catch {
|
|
3488
3525
|
}
|
|
3489
3526
|
context.subAgentMessages?.set(context.toolCallId, result.messages);
|
|
3527
|
+
const ss = result.artifacts?.screenshotFullPage;
|
|
3528
|
+
if (ss?.url) {
|
|
3529
|
+
return JSON.stringify({
|
|
3530
|
+
text: result.text,
|
|
3531
|
+
screenshotUrl: ss.url,
|
|
3532
|
+
...ss.styleMap ? { styleMap: ss.styleMap } : {}
|
|
3533
|
+
});
|
|
3534
|
+
}
|
|
3490
3535
|
return result.text;
|
|
3491
3536
|
} finally {
|
|
3492
3537
|
release();
|
|
@@ -3534,19 +3579,18 @@ var screenshotTool = {
|
|
|
3534
3579
|
if (input.instructions && context) {
|
|
3535
3580
|
const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
|
|
3536
3581
|
const result = await browserAutomationTool.execute({ task }, context);
|
|
3537
|
-
const
|
|
3538
|
-
|
|
3539
|
-
);
|
|
3540
|
-
if (!urlMatch) {
|
|
3541
|
-
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${result}`;
|
|
3542
|
-
}
|
|
3543
|
-
const url = urlMatch[0];
|
|
3582
|
+
const resultStr = result;
|
|
3583
|
+
let url;
|
|
3544
3584
|
let styleMap;
|
|
3545
3585
|
try {
|
|
3546
|
-
const parsed = JSON.parse(
|
|
3547
|
-
|
|
3586
|
+
const parsed = JSON.parse(resultStr);
|
|
3587
|
+
url = parsed.screenshotUrl;
|
|
3588
|
+
styleMap = parsed.styleMap;
|
|
3548
3589
|
} catch {
|
|
3549
3590
|
}
|
|
3591
|
+
if (!url) {
|
|
3592
|
+
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
|
|
3593
|
+
}
|
|
3550
3594
|
const analysisPrompt = buildScreenshotAnalysisPrompt({
|
|
3551
3595
|
prompt: input.prompt,
|
|
3552
3596
|
styleMap
|
|
@@ -3870,19 +3914,18 @@ async function execute5(input, onLog, context) {
|
|
|
3870
3914
|
try {
|
|
3871
3915
|
const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
|
|
3872
3916
|
const result = await browserAutomationTool.execute({ task }, context);
|
|
3873
|
-
const
|
|
3874
|
-
|
|
3875
|
-
);
|
|
3876
|
-
if (!urlMatch) {
|
|
3877
|
-
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${result}`;
|
|
3878
|
-
}
|
|
3879
|
-
const url = urlMatch[0];
|
|
3917
|
+
const resultStr = result;
|
|
3918
|
+
let url;
|
|
3880
3919
|
let styleMap;
|
|
3881
3920
|
try {
|
|
3882
|
-
const parsed = JSON.parse(
|
|
3883
|
-
|
|
3921
|
+
const parsed = JSON.parse(resultStr);
|
|
3922
|
+
url = parsed.screenshotUrl;
|
|
3923
|
+
styleMap = parsed.styleMap;
|
|
3884
3924
|
} catch {
|
|
3885
3925
|
}
|
|
3926
|
+
if (!url) {
|
|
3927
|
+
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
|
|
3928
|
+
}
|
|
3886
3929
|
const analysisPrompt = buildScreenshotAnalysisPrompt({
|
|
3887
3930
|
prompt: input.prompt,
|
|
3888
3931
|
styleMap
|
package/dist/index.js
CHANGED
|
@@ -2660,7 +2660,30 @@ var init_listDir = __esm({
|
|
|
2660
2660
|
const capped = children.slice(0, MAX_CHILDREN);
|
|
2661
2661
|
for (const child of capped) {
|
|
2662
2662
|
if (child.isDirectory()) {
|
|
2663
|
-
|
|
2663
|
+
const [childDisplay, childFinalPath] = await collapsePath(
|
|
2664
|
+
finalPath,
|
|
2665
|
+
child.name
|
|
2666
|
+
);
|
|
2667
|
+
lines.push(` ${childDisplay}/`);
|
|
2668
|
+
try {
|
|
2669
|
+
const grandchildren = await readAndSort(childFinalPath);
|
|
2670
|
+
const gcCapped = grandchildren.slice(0, MAX_CHILDREN);
|
|
2671
|
+
for (const gc of gcCapped) {
|
|
2672
|
+
if (gc.isDirectory()) {
|
|
2673
|
+
lines.push(` ${gc.name}/`);
|
|
2674
|
+
} else {
|
|
2675
|
+
lines.push(
|
|
2676
|
+
await formatFile(childFinalPath, gc.name, " ")
|
|
2677
|
+
);
|
|
2678
|
+
}
|
|
2679
|
+
}
|
|
2680
|
+
if (grandchildren.length > MAX_CHILDREN) {
|
|
2681
|
+
lines.push(
|
|
2682
|
+
` ... and ${grandchildren.length - MAX_CHILDREN} more`
|
|
2683
|
+
);
|
|
2684
|
+
}
|
|
2685
|
+
} catch {
|
|
2686
|
+
}
|
|
2664
2687
|
} else {
|
|
2665
2688
|
lines.push(await formatFile(finalPath, child.name, " "));
|
|
2666
2689
|
}
|
|
@@ -3244,8 +3267,10 @@ async function runSubAgent(config) {
|
|
|
3244
3267
|
requestId,
|
|
3245
3268
|
history,
|
|
3246
3269
|
background,
|
|
3247
|
-
onBackgroundComplete
|
|
3270
|
+
onBackgroundComplete,
|
|
3271
|
+
captureArtifacts
|
|
3248
3272
|
} = config;
|
|
3273
|
+
const artifacts = {};
|
|
3249
3274
|
const bgAbort = background ? new AbortController() : null;
|
|
3250
3275
|
const signal = background ? bgAbort.signal : parentSignal;
|
|
3251
3276
|
const agentName = subAgentId || "sub-agent";
|
|
@@ -3417,7 +3442,12 @@ ${partial}` : "[INTERRUPTED] Agent was interrupted before producing output.",
|
|
|
3417
3442
|
if (stopReason !== "tool_use" || toolCalls.length === 0) {
|
|
3418
3443
|
statusWatcher.stop();
|
|
3419
3444
|
const text = getPartialText(contentBlocks);
|
|
3420
|
-
|
|
3445
|
+
const hasArtifacts = Object.keys(artifacts).length > 0;
|
|
3446
|
+
return {
|
|
3447
|
+
text,
|
|
3448
|
+
messages: thisInvocation(),
|
|
3449
|
+
...hasArtifacts ? { artifacts } : {}
|
|
3450
|
+
};
|
|
3421
3451
|
}
|
|
3422
3452
|
log6.info("Tools executing", {
|
|
3423
3453
|
requestId,
|
|
@@ -3528,6 +3558,12 @@ ${partial}` : "[INTERRUPTED] Agent was interrupted before producing output.",
|
|
|
3528
3558
|
if (innerMsgs) {
|
|
3529
3559
|
block.subAgentMessages = innerMsgs;
|
|
3530
3560
|
}
|
|
3561
|
+
if (captureArtifacts?.includes(block.name) && !r.isError) {
|
|
3562
|
+
try {
|
|
3563
|
+
artifacts[block.name] = JSON.parse(r.result);
|
|
3564
|
+
} catch {
|
|
3565
|
+
}
|
|
3566
|
+
}
|
|
3531
3567
|
}
|
|
3532
3568
|
messages.push({
|
|
3533
3569
|
role: "user",
|
|
@@ -3895,13 +3931,22 @@ var init_browserAutomation = __esm({
|
|
|
3895
3931
|
}
|
|
3896
3932
|
return result2;
|
|
3897
3933
|
},
|
|
3898
|
-
toolRegistry: context.toolRegistry
|
|
3934
|
+
toolRegistry: context.toolRegistry,
|
|
3935
|
+
captureArtifacts: ["screenshotFullPage"]
|
|
3899
3936
|
});
|
|
3900
3937
|
try {
|
|
3901
3938
|
await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
|
|
3902
3939
|
} catch {
|
|
3903
3940
|
}
|
|
3904
3941
|
context.subAgentMessages?.set(context.toolCallId, result.messages);
|
|
3942
|
+
const ss = result.artifacts?.screenshotFullPage;
|
|
3943
|
+
if (ss?.url) {
|
|
3944
|
+
return JSON.stringify({
|
|
3945
|
+
text: result.text,
|
|
3946
|
+
screenshotUrl: ss.url,
|
|
3947
|
+
...ss.styleMap ? { styleMap: ss.styleMap } : {}
|
|
3948
|
+
});
|
|
3949
|
+
}
|
|
3905
3950
|
return result.text;
|
|
3906
3951
|
} finally {
|
|
3907
3952
|
release();
|
|
@@ -3959,19 +4004,18 @@ var init_screenshot2 = __esm({
|
|
|
3959
4004
|
if (input.instructions && context) {
|
|
3960
4005
|
const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
|
|
3961
4006
|
const result = await browserAutomationTool.execute({ task }, context);
|
|
3962
|
-
const
|
|
3963
|
-
|
|
3964
|
-
);
|
|
3965
|
-
if (!urlMatch) {
|
|
3966
|
-
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${result}`;
|
|
3967
|
-
}
|
|
3968
|
-
const url = urlMatch[0];
|
|
4007
|
+
const resultStr = result;
|
|
4008
|
+
let url;
|
|
3969
4009
|
let styleMap;
|
|
3970
4010
|
try {
|
|
3971
|
-
const parsed = JSON.parse(
|
|
3972
|
-
|
|
4011
|
+
const parsed = JSON.parse(resultStr);
|
|
4012
|
+
url = parsed.screenshotUrl;
|
|
4013
|
+
styleMap = parsed.styleMap;
|
|
3973
4014
|
} catch {
|
|
3974
4015
|
}
|
|
4016
|
+
if (!url) {
|
|
4017
|
+
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
|
|
4018
|
+
}
|
|
3975
4019
|
const analysisPrompt = buildScreenshotAnalysisPrompt({
|
|
3976
4020
|
prompt: input.prompt,
|
|
3977
4021
|
styleMap
|
|
@@ -4311,19 +4355,18 @@ async function execute5(input, onLog, context) {
|
|
|
4311
4355
|
try {
|
|
4312
4356
|
const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
|
|
4313
4357
|
const result = await browserAutomationTool.execute({ task }, context);
|
|
4314
|
-
const
|
|
4315
|
-
|
|
4316
|
-
);
|
|
4317
|
-
if (!urlMatch) {
|
|
4318
|
-
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${result}`;
|
|
4319
|
-
}
|
|
4320
|
-
const url = urlMatch[0];
|
|
4358
|
+
const resultStr = result;
|
|
4359
|
+
let url;
|
|
4321
4360
|
let styleMap;
|
|
4322
4361
|
try {
|
|
4323
|
-
const parsed = JSON.parse(
|
|
4324
|
-
|
|
4362
|
+
const parsed = JSON.parse(resultStr);
|
|
4363
|
+
url = parsed.screenshotUrl;
|
|
4364
|
+
styleMap = parsed.styleMap;
|
|
4325
4365
|
} catch {
|
|
4326
4366
|
}
|
|
4367
|
+
if (!url) {
|
|
4368
|
+
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
|
|
4369
|
+
}
|
|
4327
4370
|
const analysisPrompt = buildScreenshotAnalysisPrompt({
|
|
4328
4371
|
prompt: input.prompt,
|
|
4329
4372
|
styleMap
|
|
@@ -245,20 +245,54 @@ export function getApprovalState(approvals: Approval[]) {
|
|
|
245
245
|
|
|
246
246
|
## Streaming
|
|
247
247
|
|
|
248
|
-
Methods can
|
|
248
|
+
Methods can push real-time updates to the frontend using `stream()`. This is the standard pattern for any method that takes more than a few seconds.
|
|
249
249
|
|
|
250
250
|
```typescript
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
251
|
+
import { mindstudio, stream } from '@mindstudio-ai/agent';
|
|
252
|
+
|
|
253
|
+
export async function enrichProfile(input: { name: string }) {
|
|
254
|
+
await stream('Researching...');
|
|
255
|
+
|
|
256
|
+
const { content } = await mindstudio.generateText(
|
|
257
|
+
{ message: `Find background info on ${input.name}` },
|
|
258
|
+
{ onLog: (event) => stream({ status: event.value }) },
|
|
259
|
+
);
|
|
260
|
+
|
|
261
|
+
await stream({ status: 'generating_image', progress: 0.5 });
|
|
262
|
+
|
|
263
|
+
const { imageUrl } = await mindstudio.generateImage(
|
|
264
|
+
{ prompt: `Professional portrait illustration of ${input.name}` },
|
|
265
|
+
{ onLog: (event) => stream({ status: event.value }) },
|
|
266
|
+
);
|
|
267
|
+
|
|
268
|
+
return { bio: content, imageUrl };
|
|
269
|
+
}
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
Two data types:
|
|
273
|
+
- `stream('text')` sends a text token (like LLM streaming output)
|
|
274
|
+
- `stream({ ... })` sends structured data (progress, status, intermediate results)
|
|
275
|
+
|
|
276
|
+
Every SDK action accepts an `onLog` callback that emits execution progress. Pipe it through `stream()` so the frontend sees what's happening inside each action in real time. Use `stream()` directly for your own status messages between actions.
|
|
277
|
+
|
|
278
|
+
When there's no active stream (method not called with `stream: true`, CLI execution, background jobs), `stream()` is a silent no-op. Always safe to include unconditionally.
|
|
279
|
+
|
|
280
|
+
### Frontend
|
|
281
|
+
|
|
282
|
+
The frontend calls the method with `stream: true` and receives updates via `onToken`. The `text` value is accumulated (not a delta), so replace your display content each time.
|
|
283
|
+
|
|
284
|
+
```typescript
|
|
285
|
+
const result = await api.enrichProfile(
|
|
286
|
+
{ name: 'Alice' },
|
|
254
287
|
{
|
|
255
288
|
stream: true,
|
|
256
|
-
onToken: (text) =>
|
|
289
|
+
onToken: (text) => setResponseText(text),
|
|
257
290
|
},
|
|
258
291
|
);
|
|
292
|
+
// result is the same final output you'd get without streaming
|
|
259
293
|
```
|
|
260
294
|
|
|
261
|
-
|
|
295
|
+
Use `onStreamError` for transient error handling. The method's promise still resolves with the final return value once execution completes.
|
|
262
296
|
|
|
263
297
|
## Raw Request Context (API Interface)
|
|
264
298
|
|
|
@@ -30,6 +30,8 @@ For any work involving AI models, external actions (web scraping, email, SMS), o
|
|
|
30
30
|
|
|
31
31
|
For multi-step tasks with branching logic (research, enrichment, content pipelines), use `runTask()` instead of manually chaining SDK actions. It runs an autonomous agent loop that composes actions, retries on failure, and returns structured JSON. See the task agents reference for details.
|
|
32
32
|
|
|
33
|
+
For methods that take more than a few seconds, use `stream()` from `@mindstudio-ai/agent` to push real-time progress to the frontend. Pipe `onLog` from SDK actions through `stream()` so users see what's happening. The frontend calls the method with `stream: true` and gets updates via `onToken`. See the methods reference for the full pattern.
|
|
34
|
+
|
|
33
35
|
### Auth
|
|
34
36
|
- Not every app needs auth, and even for apps that do need auth, not every screen needs auth. Think intentionally about places where auth is required. Don't make auth be the first thing a user sees - that's jarring. Only show auth at intuitive and natural moments in the user's journey - be thoughtful about how to implement auth in the UI.
|
|
35
37
|
- Frontend interfaces are always untrusted. Always enforce auth in backend methods. Use frontend auth and role information as a hint to conditionally show/hide UI to make the experience pleasant and seamless for users depending on their state, but remember to always use backend methods for gating data that is conditional on auth.
|