donobu 5.55.0 → 5.56.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -134,6 +134,14 @@ export declare class DonobuFlow {
134
134
  * Note that this *bypasses* the normal state transition logic!
135
135
  */
136
136
  private onUserInterruption;
137
+ /**
138
+ * Incorporates the compose-field text from a ▶/⏩ action: if the flow has no
139
+ * standing goal yet, the text becomes the `overallObjective`; otherwise it's
140
+ * added as extra guidance. Either way it's injected into the LLM history (the
141
+ * system prompt was built at init, possibly before any objective existed) and
142
+ * recorded in the timeline. No-op for empty text.
143
+ */
144
+ private applyComposeInstruction;
137
145
  /**
138
146
  * Closes out the currently-proposed AI tool call(s) without executing them:
139
147
  * emits a `tool_call_result` for each (so the LLM message history stays
@@ -163,11 +171,16 @@ export declare class DonobuFlow {
163
171
  private applyRunModeChange;
164
172
  /**
165
173
  * Whether the flow can hand control to the AI: it needs both a GPT client and
166
- * an overall objective for the agent to pursue. Surfaced to the UI (as
167
- * `canUseAi`) so the autonomy selector can disable the AI modes when they
168
- * wouldn't work — e.g. a Playwright-imported test with no objective.
174
+ * a goal to pursue.
169
175
  */
170
176
  private canHandOffToAi;
177
+ /**
178
+ * Whether there is a standing goal for the AI to pursue (a non-empty
179
+ * `overallObjective`). Surfaced to the UI as `hasGoal` to drive the
180
+ * transport: ⏩ Fast-forward (autonomous run) is only offered with a goal,
181
+ * and ▶ Play needs either a goal or a typed instruction.
182
+ */
183
+ private hasGoal;
171
184
  /**
172
185
  * This method is called if there is an unhandled unexpected exception. This
173
186
  * method will mark the flow as a failure.
@@ -161,12 +161,13 @@ class DonobuFlow {
161
161
  try {
162
162
  this.controlPanel.update({
163
163
  state: this.metadata.state,
164
- availableToolNames: this.toolManager.tools.map((t) => t.name),
164
+ runMode: this.metadata.runMode,
165
+ overallObjective: this.metadata.overallObjective,
166
+ allowedTools: this.metadata.allowedTools,
165
167
  pendingToolCalls: this.metadata.state === 'WAITING_FOR_APPROVAL'
166
168
  ? [...this.proposedToolCalls]
167
169
  : undefined,
168
- runMode: this.metadata.runMode,
169
- canUseAi: this.canHandOffToAi(),
170
+ hasGptClient: this.gptClient !== null,
170
171
  });
171
172
  switch (this.metadata.state) {
172
173
  case 'UNSTARTED':
@@ -324,6 +325,11 @@ class DonobuFlow {
324
325
  // Set the next state based on user action
325
326
  switch (userAction.type) {
326
327
  case 'PAUSE':
328
+ // Pausing while an AI proposal awaits approval abandons that proposal so
329
+ // the user returns to a clean compose state rather than a stale prompt.
330
+ if (this.metadata.state === 'WAITING_FOR_APPROVAL') {
331
+ this.closeOutPendingProposals('Superseded because the user paused before approving; not executed.');
332
+ }
327
333
  this.metadata.state = 'PAUSED';
328
334
  await this.targetInspector.hideInteractionCursor();
329
335
  break;
@@ -441,9 +447,73 @@ class DonobuFlow {
441
447
  await this.applyRunModeChange(userAction.runMode, userAction.approvePending ?? false);
442
448
  break;
443
449
  }
450
+ case 'STEP': {
451
+ // ▶ Play: start supervised running toward the goal — the AI proposes
452
+ // each action and the user approves it before it runs, continuing until
453
+ // the objective is met or the user pauses. Needs a GPT client and a goal
454
+ // (the typed instruction can supply the goal).
455
+ if (!this.gptClient) {
456
+ break;
457
+ }
458
+ // The user is directing the next move, which supersedes anything still
459
+ // queued (e.g. unreplayed recorded steps of a paused DETERMINISTIC run).
460
+ this.closeOutPendingProposals('Superseded by the user directing the next action; not executed.');
461
+ await this.applyComposeInstruction(userAction.instruction);
462
+ if (!this.hasGoal()) {
463
+ break;
464
+ }
465
+ this.metadata.runMode = 'SUPERVISED';
466
+ await this.targetInspector.showInteractionCursor();
467
+ this.metadata.state = 'RESUMING';
468
+ break;
469
+ }
470
+ case 'RUN': {
471
+ // ⏩ Fast-forward: run autonomously toward the goal until done/paused.
472
+ if (!this.gptClient) {
473
+ break;
474
+ }
475
+ this.closeOutPendingProposals('Superseded by the user directing the next action; not executed.');
476
+ await this.applyComposeInstruction(userAction.instruction);
477
+ if (!this.hasGoal()) {
478
+ break;
479
+ }
480
+ this.metadata.runMode = 'AUTONOMOUS';
481
+ await this.targetInspector.showInteractionCursor();
482
+ this.metadata.state = 'RESUMING';
483
+ break;
484
+ }
444
485
  }
445
486
  await this.persistence.setFlowMetadata(this.metadata);
446
487
  }
488
+ /**
489
+ * Incorporates the compose-field text from a ▶/⏩ action: if the flow has no
490
+ * standing goal yet, the text becomes the `overallObjective`; otherwise it's
491
+ * added as extra guidance. Either way it's injected into the LLM history (the
492
+ * system prompt was built at init, possibly before any objective existed) and
493
+ * recorded in the timeline. No-op for empty text.
494
+ */
495
+ async applyComposeInstruction(instruction) {
496
+ const text = instruction?.trim();
497
+ if (!text) {
498
+ return;
499
+ }
500
+ const settingObjective = !this.hasGoal();
501
+ if (settingObjective) {
502
+ this.metadata.overallObjective = text;
503
+ }
504
+ this.gptMessages.push({
505
+ type: 'user',
506
+ items: [
507
+ {
508
+ type: 'text',
509
+ text: settingObjective
510
+ ? `Your overall objective: ${text}`
511
+ : `${DonobuFlow.USER_INTERRUPT_MARKER}: ${text}`,
512
+ },
513
+ ],
514
+ });
515
+ await this.recordAdHocToolCall(text, text);
516
+ }
447
517
  /**
448
518
  * Closes out the currently-proposed AI tool call(s) without executing them:
449
519
  * emits a `tool_call_result` for each (so the LLM message history stays
@@ -515,10 +585,15 @@ class DonobuFlow {
515
585
  !this.canHandOffToAi()) {
516
586
  return;
517
587
  }
588
+ // A deliberate pause should survive a mode change: update the run mode but
589
+ // keep the flow parked, so it only continues when the user hits play
590
+ // (RESUME). Other rest points (awaiting approval, waiting on the user) are
591
+ // active decision points, so a switch there takes effect immediately.
592
+ const wasPaused = this.metadata.state === 'PAUSED';
518
593
  if (runMode === this.metadata.runMode &&
519
594
  this.proposedToolCalls.length === 0) {
520
595
  // Nothing to change.
521
- this.metadata.state = 'RESUMING';
596
+ this.metadata.state = wasPaused ? 'PAUSED' : 'RESUMING';
522
597
  return;
523
598
  }
524
599
  const previousRunMode = this.metadata.runMode;
@@ -571,6 +646,14 @@ class DonobuFlow {
571
646
  : 'User handed off to Donobu.';
572
647
  await this.recordAdHocToolCall(note, note);
573
648
  }
649
+ if (wasPaused) {
650
+ // Stay paused after the mode change; the user resumes deliberately with
651
+ // play. Leave the cursor as-is — the RESUME handler shows/hides it when
652
+ // the flow actually continues.
653
+ this.metadata.state = 'PAUSED';
654
+ this.metadata.nextState = 'PAUSED';
655
+ return;
656
+ }
574
657
  // The interaction cursor belongs to the AI; show it for AI modes, hide it
575
658
  // when the human takes over.
576
659
  if (runMode === 'INSTRUCT') {
@@ -584,13 +667,19 @@ class DonobuFlow {
584
667
  }
585
668
  /**
586
669
  * Whether the flow can hand control to the AI: it needs both a GPT client and
587
- * an overall objective for the agent to pursue. Surfaced to the UI (as
588
- * `canUseAi`) so the autonomy selector can disable the AI modes when they
589
- * wouldn't work — e.g. a Playwright-imported test with no objective.
670
+ * a goal to pursue.
590
671
  */
591
672
  canHandOffToAi() {
592
- return (this.gptClient !== null &&
593
- (this.metadata.overallObjective?.trim().length ?? 0) > 0);
673
+ return this.gptClient !== null && this.hasGoal();
674
+ }
675
+ /**
676
+ * Whether there is a standing goal for the AI to pursue (a non-empty
677
+ * `overallObjective`). Surfaced to the UI as `hasGoal` to drive the
678
+ * transport: ⏩ Fast-forward (autonomous run) is only offered with a goal,
679
+ * and ▶ Play needs either a goal or a typed instruction.
680
+ */
681
+ hasGoal() {
682
+ return (this.metadata.overallObjective?.trim().length ?? 0) > 0;
594
683
  }
595
684
  /**
596
685
  * This method is called if there is an unhandled unexpected exception. This
@@ -897,9 +986,15 @@ Message: ${dialog.message()}`;
897
986
  switch (this.metadata.runMode) {
898
987
  case 'AUTONOMOUS':
899
988
  case 'SUPERVISED':
900
- // The LLM is driving the flow, so ask the LLM what to do next.
901
- // (In SUPERVISED mode the proposal will then wait for approval.)
902
- nextState = 'QUERYING_LLM_FOR_NEXT_ACTION';
989
+ // The LLM drives continuously toward a goal but only if there is
990
+ // one. Without a goal, rest in the compose state until the user
991
+ // supplies it (via a ▶/⏩ action). SUPERVISED differs only in that
992
+ // each proposed action is gated for the user's approval (see the
993
+ // approval check above); it keeps proposing the next step after each
994
+ // approval until the objective is met or the user pauses.
995
+ nextState = this.hasGoal()
996
+ ? 'QUERYING_LLM_FOR_NEXT_ACTION'
997
+ : 'WAITING_ON_USER_FOR_NEXT_ACTION';
903
998
  break;
904
999
  case 'INSTRUCT':
905
1000
  // A user is driving the flow, so wait for them to tell us what to
@@ -1058,7 +1153,7 @@ Message: ${dialog.message()}`;
1058
1153
  if (!proposedToolCall) {
1059
1154
  return;
1060
1155
  }
1061
- // This proposal is now being executed, so its approval (if any) is spent.
1156
+ // This proposal is being executed, so its approval (if any) is spent.
1062
1157
  if (proposedToolCall.toolCallId) {
1063
1158
  this.approvedToolCallIds.delete(proposedToolCall.toolCallId);
1064
1159
  }
@@ -828,15 +828,14 @@ async function validateFlowParams(flowParams, gptClient, initialRunMode, toolReg
828
828
  switch (initialRunMode) {
829
829
  case 'AUTONOMOUS':
830
830
  case 'SUPERVISED':
831
- // Both modes pursue an overall objective via an AI agent, so both need an
832
- // objective and a GPT client. SUPERVISED additionally gates each
833
- // AI-proposed action on user approval at runtime.
834
- if ((flowParams.overallObjective?.trim().length ?? 0) === 0) {
835
- throw new InvalidParamValueException_1.InvalidParamValueException('overallObjective', flowParams.overallObjective, `'initialRunMode' has a value of '${initialRunMode}'`);
836
- }
831
+ // Both modes are AI-driven, so both need a GPT client and an objective
832
+ // to pursue.
837
833
  if (!gptClient) {
838
834
  throw new InvalidParamValueException_1.InvalidParamValueException('initialRunMode', initialRunMode, `no GPT client is available`);
839
835
  }
836
+ if ((flowParams.overallObjective?.trim().length ?? 0) === 0) {
837
+ throw new InvalidParamValueException_1.InvalidParamValueException('overallObjective', flowParams.overallObjective, 'an AI-driven flow needs an objective to pursue');
838
+ }
840
839
  break;
841
840
  case 'INSTRUCT':
842
841
  break;
@@ -984,7 +983,12 @@ async function prepareToolCallsForRerun(toolCalls, options, toolRegistry) {
984
983
  continue;
985
984
  }
986
985
  try {
987
- proposedToolCalls.push(tool.prepareForRerun(toolCall, options));
986
+ // A tool returns null to exclude itself from replay (e.g. a recorded
987
+ // user instruction, which is a live artifact, not a replayable action).
988
+ const prepared = tool.prepareForRerun(toolCall, options);
989
+ if (prepared) {
990
+ proposedToolCalls.push(prepared);
991
+ }
988
992
  }
989
993
  catch (e) {
990
994
  Logger_1.appLogger.warn(`Failed to prepare tool call for rerun: ${JSON.stringify(toolCall)}`, e);
@@ -21,24 +21,29 @@ export type UserAction = {
21
21
  type: 'SET_RUN_MODE';
22
22
  runMode: RunMode;
23
23
  approvePending?: boolean;
24
+ } | {
25
+ type: 'STEP';
26
+ instruction?: string;
27
+ } | {
28
+ type: 'RUN';
29
+ instruction?: string;
24
30
  };
25
31
  export type ControlPanelDataUpdate = {
26
32
  state: State;
33
+ runMode?: RunMode;
34
+ /** The flow's overall objective; the panel treats a non-empty value (or a
35
+ * typed instruction) as a "goal", which gates ⏩ Fast-forward and ▶ Play. */
36
+ overallObjective?: string | null;
37
+ /** The tools the flow can actually run (resolved from the ToolManager). */
38
+ allowedTools?: string[] | null;
27
39
  headline?: string;
28
- /** Names of tools loaded in the flow's ToolManager. Surfaced to the UI so
29
- * the control panel can offer only tools the flow can actually run. */
30
- availableToolNames?: string[];
31
- /** In SUPERVISED mode, the AI-proposed tool call(s) currently awaiting the
32
- * user's approval. Surfaced to the UI so the user can see what they are
33
- * approving or rejecting. Empty/undefined when nothing is pending. */
40
+ /** AI-proposed tool call(s) awaiting approval (SUPERVISED). */
34
41
  pendingToolCalls?: ProposedToolCall[];
35
- /** The flow's current run mode, so the UI can render and drive the autonomy
36
- * selector (Manual/Supervised/Autonomous). */
37
- runMode?: RunMode;
38
- /** Whether AI-driven modes (Autonomous/Supervised) are available i.e. the
39
- * flow has a GPT client. False for purely manual flows, so the UI can disable
40
- * those options on the autonomy selector. */
41
- canUseAi?: boolean;
42
+ /** Whether the flow has a GPT client (AI available at all). Not cleanly a
43
+ * FlowMetadata field — a client can come from env/default config, not just a
44
+ * named gptConfig — so it's surfaced explicitly. Drives whether the compose
45
+ * surface and the ▶/⏩ transport are offered at all. */
46
+ hasGptClient?: boolean;
42
47
  };
43
48
  export interface ControlPanel {
44
49
  /** Cheap, idempotent render update. */
@@ -1,4 +1,5 @@
1
1
  import { z } from 'zod/v4';
2
+ import type { ProposedToolCall } from '../models/ProposedToolCall';
2
3
  import type { ToolCallContext } from '../models/ToolCallContext';
3
4
  import type { ToolCallResult } from '../models/ToolCallResult';
4
5
  import { Tool } from './Tool';
@@ -20,5 +21,10 @@ export declare class AcknowledgeUserInstructionTool extends Tool<typeof Acknowle
20
21
  constructor();
21
22
  call(_context: ToolCallContext, parameters: z.infer<typeof AcknowledgeUserInstructionCoreSchema>): Promise<ToolCallResult>;
22
23
  callFromGpt(context: ToolCallContext, parameters: z.infer<typeof AcknowledgeUserInstructionGptSchema>): Promise<ToolCallResult>;
24
+ /**
25
+ * A user instruction is a live-interaction artifact, not a replayable action,
26
+ * so it is excluded from reruns and generated scripts.
27
+ */
28
+ prepareForRerun(): ProposedToolCall | null;
23
29
  }
24
30
  //# sourceMappingURL=AcknowledgeUserInstruction.d.ts.map
@@ -33,6 +33,13 @@ class AcknowledgeUserInstructionTool extends Tool_1.Tool {
33
33
  async callFromGpt(context, parameters) {
34
34
  return this.call(context, parameters);
35
35
  }
36
+ /**
37
+ * A user instruction is a live-interaction artifact, not a replayable action,
38
+ * so it is excluded from reruns and generated scripts.
39
+ */
40
+ prepareForRerun() {
41
+ return null;
42
+ }
36
43
  }
37
44
  exports.AcknowledgeUserInstructionTool = AcknowledgeUserInstructionTool;
38
45
  AcknowledgeUserInstructionTool.NAME = 'acknowledgeUserInstruction';
@@ -54,14 +54,17 @@ export declare abstract class Tool<CallSchema extends z.ZodObject, CallFromGptSc
54
54
  previewInteraction(_context: ToolCallContext, _parameters: Record<string, unknown>): Promise<void>;
55
55
  /**
56
56
  * Transform a completed tool call into a {@link ProposedToolCall} suitable
57
- * for deterministic replay / code generation.
57
+ * for deterministic replay / code generation, or `null` to exclude the call
58
+ * from replay entirely.
58
59
  *
59
60
  * The default implementation is a passthrough — `{ name, parameters }` —
60
61
  * which is correct for tools that have no replay-specific logic
61
62
  * (waits, assertions, markers, etc.). Tools that need to hoist
62
63
  * selector metadata out of their outcome, strip LLM-only fields, or
63
- * otherwise rewrite themselves override this method.
64
+ * otherwise rewrite themselves override this method. Tools that record
65
+ * live-interaction artifacts rather than replayable actions (e.g. a user
66
+ * instruction) return `null` so they don't reappear on reruns.
64
67
  */
65
- prepareForRerun(toolCall: ToolCall, _options: CodeGenerationOptions): ProposedToolCall;
68
+ prepareForRerun(toolCall: ToolCall, _options: CodeGenerationOptions): ProposedToolCall | null;
66
69
  }
67
70
  //# sourceMappingURL=Tool.d.ts.map
@@ -45,13 +45,16 @@ class Tool {
45
45
  async previewInteraction(_context, _parameters) { }
46
46
  /**
47
47
  * Transform a completed tool call into a {@link ProposedToolCall} suitable
48
- * for deterministic replay / code generation.
48
+ * for deterministic replay / code generation, or `null` to exclude the call
49
+ * from replay entirely.
49
50
  *
50
51
  * The default implementation is a passthrough — `{ name, parameters }` —
51
52
  * which is correct for tools that have no replay-specific logic
52
53
  * (waits, assertions, markers, etc.). Tools that need to hoist
53
54
  * selector metadata out of their outcome, strip LLM-only fields, or
54
- * otherwise rewrite themselves override this method.
55
+ * otherwise rewrite themselves override this method. Tools that record
56
+ * live-interaction artifacts rather than replayable actions (e.g. a user
57
+ * instruction) return `null` so they don't reappear on reruns.
55
58
  */
56
59
  prepareForRerun(toolCall, _options) {
57
60
  return {
@@ -134,6 +134,14 @@ export declare class DonobuFlow {
134
134
  * Note that this *bypasses* the normal state transition logic!
135
135
  */
136
136
  private onUserInterruption;
137
+ /**
138
+ * Incorporates the compose-field text from a ▶/⏩ action: if the flow has no
139
+ * standing goal yet, the text becomes the `overallObjective`; otherwise it's
140
+ * added as extra guidance. Either way it's injected into the LLM history (the
141
+ * system prompt was built at init, possibly before any objective existed) and
142
+ * recorded in the timeline. No-op for empty text.
143
+ */
144
+ private applyComposeInstruction;
137
145
  /**
138
146
  * Closes out the currently-proposed AI tool call(s) without executing them:
139
147
  * emits a `tool_call_result` for each (so the LLM message history stays
@@ -163,11 +171,16 @@ export declare class DonobuFlow {
163
171
  private applyRunModeChange;
164
172
  /**
165
173
  * Whether the flow can hand control to the AI: it needs both a GPT client and
166
- * an overall objective for the agent to pursue. Surfaced to the UI (as
167
- * `canUseAi`) so the autonomy selector can disable the AI modes when they
168
- * wouldn't work — e.g. a Playwright-imported test with no objective.
174
+ * a goal to pursue.
169
175
  */
170
176
  private canHandOffToAi;
177
+ /**
178
+ * Whether there is a standing goal for the AI to pursue (a non-empty
179
+ * `overallObjective`). Surfaced to the UI as `hasGoal` to drive the
180
+ * transport: ⏩ Fast-forward (autonomous run) is only offered with a goal,
181
+ * and ▶ Play needs either a goal or a typed instruction.
182
+ */
183
+ private hasGoal;
171
184
  /**
172
185
  * This method is called if there is an unhandled unexpected exception. This
173
186
  * method will mark the flow as a failure.
@@ -161,12 +161,13 @@ class DonobuFlow {
161
161
  try {
162
162
  this.controlPanel.update({
163
163
  state: this.metadata.state,
164
- availableToolNames: this.toolManager.tools.map((t) => t.name),
164
+ runMode: this.metadata.runMode,
165
+ overallObjective: this.metadata.overallObjective,
166
+ allowedTools: this.metadata.allowedTools,
165
167
  pendingToolCalls: this.metadata.state === 'WAITING_FOR_APPROVAL'
166
168
  ? [...this.proposedToolCalls]
167
169
  : undefined,
168
- runMode: this.metadata.runMode,
169
- canUseAi: this.canHandOffToAi(),
170
+ hasGptClient: this.gptClient !== null,
170
171
  });
171
172
  switch (this.metadata.state) {
172
173
  case 'UNSTARTED':
@@ -324,6 +325,11 @@ class DonobuFlow {
324
325
  // Set the next state based on user action
325
326
  switch (userAction.type) {
326
327
  case 'PAUSE':
328
+ // Pausing while an AI proposal awaits approval abandons that proposal so
329
+ // the user returns to a clean compose state rather than a stale prompt.
330
+ if (this.metadata.state === 'WAITING_FOR_APPROVAL') {
331
+ this.closeOutPendingProposals('Superseded because the user paused before approving; not executed.');
332
+ }
327
333
  this.metadata.state = 'PAUSED';
328
334
  await this.targetInspector.hideInteractionCursor();
329
335
  break;
@@ -441,9 +447,73 @@ class DonobuFlow {
441
447
  await this.applyRunModeChange(userAction.runMode, userAction.approvePending ?? false);
442
448
  break;
443
449
  }
450
+ case 'STEP': {
451
+ // ▶ Play: start supervised running toward the goal — the AI proposes
452
+ // each action and the user approves it before it runs, continuing until
453
+ // the objective is met or the user pauses. Needs a GPT client and a goal
454
+ // (the typed instruction can supply the goal).
455
+ if (!this.gptClient) {
456
+ break;
457
+ }
458
+ // The user is directing the next move, which supersedes anything still
459
+ // queued (e.g. unreplayed recorded steps of a paused DETERMINISTIC run).
460
+ this.closeOutPendingProposals('Superseded by the user directing the next action; not executed.');
461
+ await this.applyComposeInstruction(userAction.instruction);
462
+ if (!this.hasGoal()) {
463
+ break;
464
+ }
465
+ this.metadata.runMode = 'SUPERVISED';
466
+ await this.targetInspector.showInteractionCursor();
467
+ this.metadata.state = 'RESUMING';
468
+ break;
469
+ }
470
+ case 'RUN': {
471
+ // ⏩ Fast-forward: run autonomously toward the goal until done/paused.
472
+ if (!this.gptClient) {
473
+ break;
474
+ }
475
+ this.closeOutPendingProposals('Superseded by the user directing the next action; not executed.');
476
+ await this.applyComposeInstruction(userAction.instruction);
477
+ if (!this.hasGoal()) {
478
+ break;
479
+ }
480
+ this.metadata.runMode = 'AUTONOMOUS';
481
+ await this.targetInspector.showInteractionCursor();
482
+ this.metadata.state = 'RESUMING';
483
+ break;
484
+ }
444
485
  }
445
486
  await this.persistence.setFlowMetadata(this.metadata);
446
487
  }
488
+ /**
489
+ * Incorporates the compose-field text from a ▶/⏩ action: if the flow has no
490
+ * standing goal yet, the text becomes the `overallObjective`; otherwise it's
491
+ * added as extra guidance. Either way it's injected into the LLM history (the
492
+ * system prompt was built at init, possibly before any objective existed) and
493
+ * recorded in the timeline. No-op for empty text.
494
+ */
495
+ async applyComposeInstruction(instruction) {
496
+ const text = instruction?.trim();
497
+ if (!text) {
498
+ return;
499
+ }
500
+ const settingObjective = !this.hasGoal();
501
+ if (settingObjective) {
502
+ this.metadata.overallObjective = text;
503
+ }
504
+ this.gptMessages.push({
505
+ type: 'user',
506
+ items: [
507
+ {
508
+ type: 'text',
509
+ text: settingObjective
510
+ ? `Your overall objective: ${text}`
511
+ : `${DonobuFlow.USER_INTERRUPT_MARKER}: ${text}`,
512
+ },
513
+ ],
514
+ });
515
+ await this.recordAdHocToolCall(text, text);
516
+ }
447
517
  /**
448
518
  * Closes out the currently-proposed AI tool call(s) without executing them:
449
519
  * emits a `tool_call_result` for each (so the LLM message history stays
@@ -515,10 +585,15 @@ class DonobuFlow {
515
585
  !this.canHandOffToAi()) {
516
586
  return;
517
587
  }
588
+ // A deliberate pause should survive a mode change: update the run mode but
589
+ // keep the flow parked, so it only continues when the user hits play
590
+ // (RESUME). Other rest points (awaiting approval, waiting on the user) are
591
+ // active decision points, so a switch there takes effect immediately.
592
+ const wasPaused = this.metadata.state === 'PAUSED';
518
593
  if (runMode === this.metadata.runMode &&
519
594
  this.proposedToolCalls.length === 0) {
520
595
  // Nothing to change.
521
- this.metadata.state = 'RESUMING';
596
+ this.metadata.state = wasPaused ? 'PAUSED' : 'RESUMING';
522
597
  return;
523
598
  }
524
599
  const previousRunMode = this.metadata.runMode;
@@ -571,6 +646,14 @@ class DonobuFlow {
571
646
  : 'User handed off to Donobu.';
572
647
  await this.recordAdHocToolCall(note, note);
573
648
  }
649
+ if (wasPaused) {
650
+ // Stay paused after the mode change; the user resumes deliberately with
651
+ // play. Leave the cursor as-is — the RESUME handler shows/hides it when
652
+ // the flow actually continues.
653
+ this.metadata.state = 'PAUSED';
654
+ this.metadata.nextState = 'PAUSED';
655
+ return;
656
+ }
574
657
  // The interaction cursor belongs to the AI; show it for AI modes, hide it
575
658
  // when the human takes over.
576
659
  if (runMode === 'INSTRUCT') {
@@ -584,13 +667,19 @@ class DonobuFlow {
584
667
  }
585
668
  /**
586
669
  * Whether the flow can hand control to the AI: it needs both a GPT client and
587
- * an overall objective for the agent to pursue. Surfaced to the UI (as
588
- * `canUseAi`) so the autonomy selector can disable the AI modes when they
589
- * wouldn't work — e.g. a Playwright-imported test with no objective.
670
+ * a goal to pursue.
590
671
  */
591
672
  canHandOffToAi() {
592
- return (this.gptClient !== null &&
593
- (this.metadata.overallObjective?.trim().length ?? 0) > 0);
673
+ return this.gptClient !== null && this.hasGoal();
674
+ }
675
+ /**
676
+ * Whether there is a standing goal for the AI to pursue (a non-empty
677
+ * `overallObjective`). Surfaced to the UI as `hasGoal` to drive the
678
+ * transport: ⏩ Fast-forward (autonomous run) is only offered with a goal,
679
+ * and ▶ Play needs either a goal or a typed instruction.
680
+ */
681
+ hasGoal() {
682
+ return (this.metadata.overallObjective?.trim().length ?? 0) > 0;
594
683
  }
595
684
  /**
596
685
  * This method is called if there is an unhandled unexpected exception. This
@@ -897,9 +986,15 @@ Message: ${dialog.message()}`;
897
986
  switch (this.metadata.runMode) {
898
987
  case 'AUTONOMOUS':
899
988
  case 'SUPERVISED':
900
- // The LLM is driving the flow, so ask the LLM what to do next.
901
- // (In SUPERVISED mode the proposal will then wait for approval.)
902
- nextState = 'QUERYING_LLM_FOR_NEXT_ACTION';
989
+ // The LLM drives continuously toward a goal but only if there is
990
+ // one. Without a goal, rest in the compose state until the user
991
+ // supplies it (via a ▶/⏩ action). SUPERVISED differs only in that
992
+ // each proposed action is gated for the user's approval (see the
993
+ // approval check above); it keeps proposing the next step after each
994
+ // approval until the objective is met or the user pauses.
995
+ nextState = this.hasGoal()
996
+ ? 'QUERYING_LLM_FOR_NEXT_ACTION'
997
+ : 'WAITING_ON_USER_FOR_NEXT_ACTION';
903
998
  break;
904
999
  case 'INSTRUCT':
905
1000
  // A user is driving the flow, so wait for them to tell us what to
@@ -1058,7 +1153,7 @@ Message: ${dialog.message()}`;
1058
1153
  if (!proposedToolCall) {
1059
1154
  return;
1060
1155
  }
1061
- // This proposal is now being executed, so its approval (if any) is spent.
1156
+ // This proposal is being executed, so its approval (if any) is spent.
1062
1157
  if (proposedToolCall.toolCallId) {
1063
1158
  this.approvedToolCallIds.delete(proposedToolCall.toolCallId);
1064
1159
  }
@@ -828,15 +828,14 @@ async function validateFlowParams(flowParams, gptClient, initialRunMode, toolReg
828
828
  switch (initialRunMode) {
829
829
  case 'AUTONOMOUS':
830
830
  case 'SUPERVISED':
831
- // Both modes pursue an overall objective via an AI agent, so both need an
832
- // objective and a GPT client. SUPERVISED additionally gates each
833
- // AI-proposed action on user approval at runtime.
834
- if ((flowParams.overallObjective?.trim().length ?? 0) === 0) {
835
- throw new InvalidParamValueException_1.InvalidParamValueException('overallObjective', flowParams.overallObjective, `'initialRunMode' has a value of '${initialRunMode}'`);
836
- }
831
+ // Both modes are AI-driven, so both need a GPT client and an objective
832
+ // to pursue.
837
833
  if (!gptClient) {
838
834
  throw new InvalidParamValueException_1.InvalidParamValueException('initialRunMode', initialRunMode, `no GPT client is available`);
839
835
  }
836
+ if ((flowParams.overallObjective?.trim().length ?? 0) === 0) {
837
+ throw new InvalidParamValueException_1.InvalidParamValueException('overallObjective', flowParams.overallObjective, 'an AI-driven flow needs an objective to pursue');
838
+ }
840
839
  break;
841
840
  case 'INSTRUCT':
842
841
  break;
@@ -984,7 +983,12 @@ async function prepareToolCallsForRerun(toolCalls, options, toolRegistry) {
984
983
  continue;
985
984
  }
986
985
  try {
987
- proposedToolCalls.push(tool.prepareForRerun(toolCall, options));
986
+ // A tool returns null to exclude itself from replay (e.g. a recorded
987
+ // user instruction, which is a live artifact, not a replayable action).
988
+ const prepared = tool.prepareForRerun(toolCall, options);
989
+ if (prepared) {
990
+ proposedToolCalls.push(prepared);
991
+ }
988
992
  }
989
993
  catch (e) {
990
994
  Logger_1.appLogger.warn(`Failed to prepare tool call for rerun: ${JSON.stringify(toolCall)}`, e);
@@ -21,24 +21,29 @@ export type UserAction = {
21
21
  type: 'SET_RUN_MODE';
22
22
  runMode: RunMode;
23
23
  approvePending?: boolean;
24
+ } | {
25
+ type: 'STEP';
26
+ instruction?: string;
27
+ } | {
28
+ type: 'RUN';
29
+ instruction?: string;
24
30
  };
25
31
  export type ControlPanelDataUpdate = {
26
32
  state: State;
33
+ runMode?: RunMode;
34
+ /** The flow's overall objective; the panel treats a non-empty value (or a
35
+ * typed instruction) as a "goal", which gates ⏩ Fast-forward and ▶ Play. */
36
+ overallObjective?: string | null;
37
+ /** The tools the flow can actually run (resolved from the ToolManager). */
38
+ allowedTools?: string[] | null;
27
39
  headline?: string;
28
- /** Names of tools loaded in the flow's ToolManager. Surfaced to the UI so
29
- * the control panel can offer only tools the flow can actually run. */
30
- availableToolNames?: string[];
31
- /** In SUPERVISED mode, the AI-proposed tool call(s) currently awaiting the
32
- * user's approval. Surfaced to the UI so the user can see what they are
33
- * approving or rejecting. Empty/undefined when nothing is pending. */
40
+ /** AI-proposed tool call(s) awaiting approval (SUPERVISED). */
34
41
  pendingToolCalls?: ProposedToolCall[];
35
- /** The flow's current run mode, so the UI can render and drive the autonomy
36
- * selector (Manual/Supervised/Autonomous). */
37
- runMode?: RunMode;
38
- /** Whether AI-driven modes (Autonomous/Supervised) are available i.e. the
39
- * flow has a GPT client. False for purely manual flows, so the UI can disable
40
- * those options on the autonomy selector. */
41
- canUseAi?: boolean;
42
+ /** Whether the flow has a GPT client (AI available at all). Not cleanly a
43
+ * FlowMetadata field — a client can come from env/default config, not just a
44
+ * named gptConfig — so it's surfaced explicitly. Drives whether the compose
45
+ * surface and the ▶/⏩ transport are offered at all. */
46
+ hasGptClient?: boolean;
42
47
  };
43
48
  export interface ControlPanel {
44
49
  /** Cheap, idempotent render update. */
@@ -1,4 +1,5 @@
1
1
  import { z } from 'zod/v4';
2
+ import type { ProposedToolCall } from '../models/ProposedToolCall';
2
3
  import type { ToolCallContext } from '../models/ToolCallContext';
3
4
  import type { ToolCallResult } from '../models/ToolCallResult';
4
5
  import { Tool } from './Tool';
@@ -20,5 +21,10 @@ export declare class AcknowledgeUserInstructionTool extends Tool<typeof Acknowle
20
21
  constructor();
21
22
  call(_context: ToolCallContext, parameters: z.infer<typeof AcknowledgeUserInstructionCoreSchema>): Promise<ToolCallResult>;
22
23
  callFromGpt(context: ToolCallContext, parameters: z.infer<typeof AcknowledgeUserInstructionGptSchema>): Promise<ToolCallResult>;
24
+ /**
25
+ * A user instruction is a live-interaction artifact, not a replayable action,
26
+ * so it is excluded from reruns and generated scripts.
27
+ */
28
+ prepareForRerun(): ProposedToolCall | null;
23
29
  }
24
30
  //# sourceMappingURL=AcknowledgeUserInstruction.d.ts.map
@@ -33,6 +33,13 @@ class AcknowledgeUserInstructionTool extends Tool_1.Tool {
33
33
  async callFromGpt(context, parameters) {
34
34
  return this.call(context, parameters);
35
35
  }
36
+ /**
37
+ * A user instruction is a live-interaction artifact, not a replayable action,
38
+ * so it is excluded from reruns and generated scripts.
39
+ */
40
+ prepareForRerun() {
41
+ return null;
42
+ }
36
43
  }
37
44
  exports.AcknowledgeUserInstructionTool = AcknowledgeUserInstructionTool;
38
45
  AcknowledgeUserInstructionTool.NAME = 'acknowledgeUserInstruction';
@@ -54,14 +54,17 @@ export declare abstract class Tool<CallSchema extends z.ZodObject, CallFromGptSc
54
54
  previewInteraction(_context: ToolCallContext, _parameters: Record<string, unknown>): Promise<void>;
55
55
  /**
56
56
  * Transform a completed tool call into a {@link ProposedToolCall} suitable
57
- * for deterministic replay / code generation.
57
+ * for deterministic replay / code generation, or `null` to exclude the call
58
+ * from replay entirely.
58
59
  *
59
60
  * The default implementation is a passthrough — `{ name, parameters }` —
60
61
  * which is correct for tools that have no replay-specific logic
61
62
  * (waits, assertions, markers, etc.). Tools that need to hoist
62
63
  * selector metadata out of their outcome, strip LLM-only fields, or
63
- * otherwise rewrite themselves override this method.
64
+ * otherwise rewrite themselves override this method. Tools that record
65
+ * live-interaction artifacts rather than replayable actions (e.g. a user
66
+ * instruction) return `null` so they don't reappear on reruns.
64
67
  */
65
- prepareForRerun(toolCall: ToolCall, _options: CodeGenerationOptions): ProposedToolCall;
68
+ prepareForRerun(toolCall: ToolCall, _options: CodeGenerationOptions): ProposedToolCall | null;
66
69
  }
67
70
  //# sourceMappingURL=Tool.d.ts.map
@@ -45,13 +45,16 @@ class Tool {
45
45
  async previewInteraction(_context, _parameters) { }
46
46
  /**
47
47
  * Transform a completed tool call into a {@link ProposedToolCall} suitable
48
- * for deterministic replay / code generation.
48
+ * for deterministic replay / code generation, or `null` to exclude the call
49
+ * from replay entirely.
49
50
  *
50
51
  * The default implementation is a passthrough — `{ name, parameters }` —
51
52
  * which is correct for tools that have no replay-specific logic
52
53
  * (waits, assertions, markers, etc.). Tools that need to hoist
53
54
  * selector metadata out of their outcome, strip LLM-only fields, or
54
- * otherwise rewrite themselves override this method.
55
+ * otherwise rewrite themselves override this method. Tools that record
56
+ * live-interaction artifacts rather than replayable actions (e.g. a user
57
+ * instruction) return `null` so they don't reappear on reruns.
55
58
  */
56
59
  prepareForRerun(toolCall, _options) {
57
60
  return {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "donobu",
3
- "version": "5.55.0",
3
+ "version": "5.56.0",
4
4
  "description": "Create browser automations with an LLM agent and replay them as Playwright scripts.",
5
5
  "main": "dist/main.js",
6
6
  "module": "dist/esm/main.js",