@mindstudio-ai/remy 0.1.138 → 0.1.140

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/headless.js CHANGED
@@ -2352,7 +2352,30 @@ var listDirTool = {
2352
2352
  const capped = children.slice(0, MAX_CHILDREN);
2353
2353
  for (const child of capped) {
2354
2354
  if (child.isDirectory()) {
2355
- lines.push(` ${child.name}/`);
2355
+ const [childDisplay, childFinalPath] = await collapsePath(
2356
+ finalPath,
2357
+ child.name
2358
+ );
2359
+ lines.push(` ${childDisplay}/`);
2360
+ try {
2361
+ const grandchildren = await readAndSort(childFinalPath);
2362
+ const gcCapped = grandchildren.slice(0, MAX_CHILDREN);
2363
+ for (const gc of gcCapped) {
2364
+ if (gc.isDirectory()) {
2365
+ lines.push(` ${gc.name}/`);
2366
+ } else {
2367
+ lines.push(
2368
+ await formatFile(childFinalPath, gc.name, " ")
2369
+ );
2370
+ }
2371
+ }
2372
+ if (grandchildren.length > MAX_CHILDREN) {
2373
+ lines.push(
2374
+ ` ... and ${grandchildren.length - MAX_CHILDREN} more`
2375
+ );
2376
+ }
2377
+ } catch {
2378
+ }
2356
2379
  } else {
2357
2380
  lines.push(await formatFile(finalPath, child.name, " "));
2358
2381
  }
@@ -2865,8 +2888,10 @@ async function runSubAgent(config) {
2865
2888
  requestId,
2866
2889
  history,
2867
2890
  background,
2868
- onBackgroundComplete
2891
+ onBackgroundComplete,
2892
+ captureArtifacts
2869
2893
  } = config;
2894
+ const artifacts = {};
2870
2895
  const bgAbort = background ? new AbortController() : null;
2871
2896
  const signal = background ? bgAbort.signal : parentSignal;
2872
2897
  const agentName = subAgentId || "sub-agent";
@@ -3038,7 +3063,12 @@ ${partial}` : "[INTERRUPTED] Agent was interrupted before producing output.",
3038
3063
  if (stopReason !== "tool_use" || toolCalls.length === 0) {
3039
3064
  statusWatcher.stop();
3040
3065
  const text = getPartialText(contentBlocks);
3041
- return { text, messages: thisInvocation() };
3066
+ const hasArtifacts = Object.keys(artifacts).length > 0;
3067
+ return {
3068
+ text,
3069
+ messages: thisInvocation(),
3070
+ ...hasArtifacts ? { artifacts } : {}
3071
+ };
3042
3072
  }
3043
3073
  log5.info("Tools executing", {
3044
3074
  requestId,
@@ -3149,6 +3179,12 @@ ${partial}` : "[INTERRUPTED] Agent was interrupted before producing output.",
3149
3179
  if (innerMsgs) {
3150
3180
  block.subAgentMessages = innerMsgs;
3151
3181
  }
3182
+ if (captureArtifacts?.includes(block.name) && !r.isError) {
3183
+ try {
3184
+ artifacts[block.name] = JSON.parse(r.result);
3185
+ } catch {
3186
+ }
3187
+ }
3152
3188
  }
3153
3189
  messages.push({
3154
3190
  role: "user",
@@ -3480,13 +3516,22 @@ var browserAutomationTool = {
3480
3516
  }
3481
3517
  return result2;
3482
3518
  },
3483
- toolRegistry: context.toolRegistry
3519
+ toolRegistry: context.toolRegistry,
3520
+ captureArtifacts: ["screenshotFullPage"]
3484
3521
  });
3485
3522
  try {
3486
3523
  await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
3487
3524
  } catch {
3488
3525
  }
3489
3526
  context.subAgentMessages?.set(context.toolCallId, result.messages);
3527
+ const ss = result.artifacts?.screenshotFullPage;
3528
+ if (ss?.url) {
3529
+ return JSON.stringify({
3530
+ text: result.text,
3531
+ screenshotUrl: ss.url,
3532
+ ...ss.styleMap ? { styleMap: ss.styleMap } : {}
3533
+ });
3534
+ }
3490
3535
  return result.text;
3491
3536
  } finally {
3492
3537
  release();
@@ -3534,19 +3579,18 @@ var screenshotTool = {
3534
3579
  if (input.instructions && context) {
3535
3580
  const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
3536
3581
  const result = await browserAutomationTool.execute({ task }, context);
3537
- const urlMatch = result.match(
3538
- /https:\/\/[^\s"')]+\.(?:png|jpg|jpeg|webp)/i
3539
- );
3540
- if (!urlMatch) {
3541
- return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${result}`;
3542
- }
3543
- const url = urlMatch[0];
3582
+ const resultStr = result;
3583
+ let url;
3544
3584
  let styleMap;
3545
3585
  try {
3546
- const parsed = JSON.parse(result);
3547
- styleMap = parsed?.styleMap;
3586
+ const parsed = JSON.parse(resultStr);
3587
+ url = parsed.screenshotUrl;
3588
+ styleMap = parsed.styleMap;
3548
3589
  } catch {
3549
3590
  }
3591
+ if (!url) {
3592
+ return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
3593
+ }
3550
3594
  const analysisPrompt = buildScreenshotAnalysisPrompt({
3551
3595
  prompt: input.prompt,
3552
3596
  styleMap
@@ -3870,19 +3914,18 @@ async function execute5(input, onLog, context) {
3870
3914
  try {
3871
3915
  const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
3872
3916
  const result = await browserAutomationTool.execute({ task }, context);
3873
- const urlMatch = result.match(
3874
- /https:\/\/[^\s"')]+\.(?:png|jpg|jpeg|webp)/i
3875
- );
3876
- if (!urlMatch) {
3877
- return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${result}`;
3878
- }
3879
- const url = urlMatch[0];
3917
+ const resultStr = result;
3918
+ let url;
3880
3919
  let styleMap;
3881
3920
  try {
3882
- const parsed = JSON.parse(result);
3883
- styleMap = parsed?.styleMap;
3921
+ const parsed = JSON.parse(resultStr);
3922
+ url = parsed.screenshotUrl;
3923
+ styleMap = parsed.styleMap;
3884
3924
  } catch {
3885
3925
  }
3926
+ if (!url) {
3927
+ return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
3928
+ }
3886
3929
  const analysisPrompt = buildScreenshotAnalysisPrompt({
3887
3930
  prompt: input.prompt,
3888
3931
  styleMap
package/dist/index.js CHANGED
@@ -2660,7 +2660,30 @@ var init_listDir = __esm({
2660
2660
  const capped = children.slice(0, MAX_CHILDREN);
2661
2661
  for (const child of capped) {
2662
2662
  if (child.isDirectory()) {
2663
- lines.push(` ${child.name}/`);
2663
+ const [childDisplay, childFinalPath] = await collapsePath(
2664
+ finalPath,
2665
+ child.name
2666
+ );
2667
+ lines.push(` ${childDisplay}/`);
2668
+ try {
2669
+ const grandchildren = await readAndSort(childFinalPath);
2670
+ const gcCapped = grandchildren.slice(0, MAX_CHILDREN);
2671
+ for (const gc of gcCapped) {
2672
+ if (gc.isDirectory()) {
2673
+ lines.push(` ${gc.name}/`);
2674
+ } else {
2675
+ lines.push(
2676
+ await formatFile(childFinalPath, gc.name, " ")
2677
+ );
2678
+ }
2679
+ }
2680
+ if (grandchildren.length > MAX_CHILDREN) {
2681
+ lines.push(
2682
+ ` ... and ${grandchildren.length - MAX_CHILDREN} more`
2683
+ );
2684
+ }
2685
+ } catch {
2686
+ }
2664
2687
  } else {
2665
2688
  lines.push(await formatFile(finalPath, child.name, " "));
2666
2689
  }
@@ -3244,8 +3267,10 @@ async function runSubAgent(config) {
3244
3267
  requestId,
3245
3268
  history,
3246
3269
  background,
3247
- onBackgroundComplete
3270
+ onBackgroundComplete,
3271
+ captureArtifacts
3248
3272
  } = config;
3273
+ const artifacts = {};
3249
3274
  const bgAbort = background ? new AbortController() : null;
3250
3275
  const signal = background ? bgAbort.signal : parentSignal;
3251
3276
  const agentName = subAgentId || "sub-agent";
@@ -3417,7 +3442,12 @@ ${partial}` : "[INTERRUPTED] Agent was interrupted before producing output.",
3417
3442
  if (stopReason !== "tool_use" || toolCalls.length === 0) {
3418
3443
  statusWatcher.stop();
3419
3444
  const text = getPartialText(contentBlocks);
3420
- return { text, messages: thisInvocation() };
3445
+ const hasArtifacts = Object.keys(artifacts).length > 0;
3446
+ return {
3447
+ text,
3448
+ messages: thisInvocation(),
3449
+ ...hasArtifacts ? { artifacts } : {}
3450
+ };
3421
3451
  }
3422
3452
  log6.info("Tools executing", {
3423
3453
  requestId,
@@ -3528,6 +3558,12 @@ ${partial}` : "[INTERRUPTED] Agent was interrupted before producing output.",
3528
3558
  if (innerMsgs) {
3529
3559
  block.subAgentMessages = innerMsgs;
3530
3560
  }
3561
+ if (captureArtifacts?.includes(block.name) && !r.isError) {
3562
+ try {
3563
+ artifacts[block.name] = JSON.parse(r.result);
3564
+ } catch {
3565
+ }
3566
+ }
3531
3567
  }
3532
3568
  messages.push({
3533
3569
  role: "user",
@@ -3895,13 +3931,22 @@ var init_browserAutomation = __esm({
3895
3931
  }
3896
3932
  return result2;
3897
3933
  },
3898
- toolRegistry: context.toolRegistry
3934
+ toolRegistry: context.toolRegistry,
3935
+ captureArtifacts: ["screenshotFullPage"]
3899
3936
  });
3900
3937
  try {
3901
3938
  await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
3902
3939
  } catch {
3903
3940
  }
3904
3941
  context.subAgentMessages?.set(context.toolCallId, result.messages);
3942
+ const ss = result.artifacts?.screenshotFullPage;
3943
+ if (ss?.url) {
3944
+ return JSON.stringify({
3945
+ text: result.text,
3946
+ screenshotUrl: ss.url,
3947
+ ...ss.styleMap ? { styleMap: ss.styleMap } : {}
3948
+ });
3949
+ }
3905
3950
  return result.text;
3906
3951
  } finally {
3907
3952
  release();
@@ -3959,19 +4004,18 @@ var init_screenshot2 = __esm({
3959
4004
  if (input.instructions && context) {
3960
4005
  const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
3961
4006
  const result = await browserAutomationTool.execute({ task }, context);
3962
- const urlMatch = result.match(
3963
- /https:\/\/[^\s"')]+\.(?:png|jpg|jpeg|webp)/i
3964
- );
3965
- if (!urlMatch) {
3966
- return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${result}`;
3967
- }
3968
- const url = urlMatch[0];
4007
+ const resultStr = result;
4008
+ let url;
3969
4009
  let styleMap;
3970
4010
  try {
3971
- const parsed = JSON.parse(result);
3972
- styleMap = parsed?.styleMap;
4011
+ const parsed = JSON.parse(resultStr);
4012
+ url = parsed.screenshotUrl;
4013
+ styleMap = parsed.styleMap;
3973
4014
  } catch {
3974
4015
  }
4016
+ if (!url) {
4017
+ return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
4018
+ }
3975
4019
  const analysisPrompt = buildScreenshotAnalysisPrompt({
3976
4020
  prompt: input.prompt,
3977
4021
  styleMap
@@ -4311,19 +4355,18 @@ async function execute5(input, onLog, context) {
4311
4355
  try {
4312
4356
  const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
4313
4357
  const result = await browserAutomationTool.execute({ task }, context);
4314
- const urlMatch = result.match(
4315
- /https:\/\/[^\s"')]+\.(?:png|jpg|jpeg|webp)/i
4316
- );
4317
- if (!urlMatch) {
4318
- return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${result}`;
4319
- }
4320
- const url = urlMatch[0];
4358
+ const resultStr = result;
4359
+ let url;
4321
4360
  let styleMap;
4322
4361
  try {
4323
- const parsed = JSON.parse(result);
4324
- styleMap = parsed?.styleMap;
4362
+ const parsed = JSON.parse(resultStr);
4363
+ url = parsed.screenshotUrl;
4364
+ styleMap = parsed.styleMap;
4325
4365
  } catch {
4326
4366
  }
4367
+ if (!url) {
4368
+ return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
4369
+ }
4327
4370
  const analysisPrompt = buildScreenshotAnalysisPrompt({
4328
4371
  prompt: input.prompt,
4329
4372
  styleMap
@@ -245,20 +245,54 @@ export function getApprovalState(approvals: Approval[]) {
245
245
 
246
246
  ## Streaming
247
247
 
248
- Methods can stream token-by-token output (useful for AI-generated content):
248
+ Methods can push real-time updates to the frontend using `stream()`. This is the standard pattern for any method that takes more than a few seconds.
249
249
 
250
250
  ```typescript
251
- // Frontend
252
- const result = await api.generateReport(
253
- { month: 'march' },
251
+ import { mindstudio, stream } from '@mindstudio-ai/agent';
252
+
253
+ export async function enrichProfile(input: { name: string }) {
254
+ await stream('Researching...');
255
+
256
+ const { content } = await mindstudio.generateText(
257
+ { message: `Find background info on ${input.name}` },
258
+ { onLog: (event) => stream({ status: event.value }) },
259
+ );
260
+
261
+ await stream({ status: 'generating_image', progress: 0.5 });
262
+
263
+ const { imageUrl } = await mindstudio.generateImage(
264
+ { prompt: `Professional portrait illustration of ${input.name}` },
265
+ { onLog: (event) => stream({ status: event.value }) },
266
+ );
267
+
268
+ return { bio: content, imageUrl };
269
+ }
270
+ ```
271
+
272
+ Two data types:
273
+ - `stream('text')` sends a text token (like LLM streaming output)
274
+ - `stream({ ... })` sends structured data (progress, status, intermediate results)
275
+
276
+ Every SDK action accepts an `onLog` callback that emits execution progress. Pipe it through `stream()` so the frontend sees what's happening inside each action in real time. Use `stream()` directly for your own status messages between actions.
277
+
278
+ When there's no active stream (method not called with `stream: true`, CLI execution, background jobs), `stream()` is a silent no-op. Always safe to include unconditionally.
279
+
280
+ ### Frontend
281
+
282
+ The frontend calls the method with `stream: true` and receives updates via `onToken`. The `text` value is accumulated (not a delta), so replace your display content each time.
283
+
284
+ ```typescript
285
+ const result = await api.enrichProfile(
286
+ { name: 'Alice' },
254
287
  {
255
288
  stream: true,
256
- onToken: (text) => setPreview(text),
289
+ onToken: (text) => setResponseText(text),
257
290
  },
258
291
  );
292
+ // result is the same final output you'd get without streaming
259
293
  ```
260
294
 
261
- The platform handles the SSE transport. The method returns normally streaming is managed by the SDK and platform, not by your method code.
295
+ Use `onStreamError` for transient error handling. The method's promise still resolves with the final return value once execution completes.
262
296
 
263
297
  ## Raw Request Context (API Interface)
264
298
 
@@ -30,6 +30,8 @@ For any work involving AI models, external actions (web scraping, email, SMS), o
30
30
 
31
31
  For multi-step tasks with branching logic (research, enrichment, content pipelines), use `runTask()` instead of manually chaining SDK actions. It runs an autonomous agent loop that composes actions, retries on failure, and returns structured JSON. See the task agents reference for details.
32
32
 
33
+ For methods that take more than a few seconds, use `stream()` from `@mindstudio-ai/agent` to push real-time progress to the frontend. Pipe `onLog` from SDK actions through `stream()` so users see what's happening. The frontend calls the method with `stream: true` and gets updates via `onToken`. See the methods reference for the full pattern.
34
+
33
35
  ### Auth
34
36
  - Not every app needs auth, and even for apps that do need auth, not every screen needs auth. Think intentionally about places where auth is required. Don't make auth be the first thing a user sees - that's jarring. Only show auth at intuitive and natural moments in the user's journey - be thoughtful about how to implement auth in the UI.
35
37
  - Frontend interfaces are always untrusted. Always enforce auth in backend methods. Use frontend auth and role information as a hint to conditionally show/hide UI to make the experience pleasant and seamless for users depending on their state, but remember to always use backend methods for gating data that is conditional on auth.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mindstudio-ai/remy",
3
- "version": "0.1.138",
3
+ "version": "0.1.140",
4
4
  "description": "MindStudio coding agent",
5
5
  "repository": {
6
6
  "type": "git",