donobu 5.55.0 → 5.56.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/managers/DonobuFlow.d.ts +16 -3
- package/dist/esm/managers/DonobuFlow.js +108 -13
- package/dist/esm/managers/DonobuFlowsManager.js +11 -7
- package/dist/esm/models/ControlPanel.d.ts +18 -13
- package/dist/esm/tools/AcknowledgeUserInstruction.d.ts +6 -0
- package/dist/esm/tools/AcknowledgeUserInstruction.js +7 -0
- package/dist/esm/tools/Tool.d.ts +6 -3
- package/dist/esm/tools/Tool.js +5 -2
- package/dist/managers/DonobuFlow.d.ts +16 -3
- package/dist/managers/DonobuFlow.js +108 -13
- package/dist/managers/DonobuFlowsManager.js +11 -7
- package/dist/models/ControlPanel.d.ts +18 -13
- package/dist/tools/AcknowledgeUserInstruction.d.ts +6 -0
- package/dist/tools/AcknowledgeUserInstruction.js +7 -0
- package/dist/tools/Tool.d.ts +6 -3
- package/dist/tools/Tool.js +5 -2
- package/package.json +1 -1
|
@@ -134,6 +134,14 @@ export declare class DonobuFlow {
|
|
|
134
134
|
* Note that this *bypasses* the normal state transition logic!
|
|
135
135
|
*/
|
|
136
136
|
private onUserInterruption;
|
|
137
|
+
/**
|
|
138
|
+
* Incorporates the compose-field text from a ▶/⏩ action: if the flow has no
|
|
139
|
+
* standing goal yet, the text becomes the `overallObjective`; otherwise it's
|
|
140
|
+
* added as extra guidance. Either way it's injected into the LLM history (the
|
|
141
|
+
* system prompt was built at init, possibly before any objective existed) and
|
|
142
|
+
* recorded in the timeline. No-op for empty text.
|
|
143
|
+
*/
|
|
144
|
+
private applyComposeInstruction;
|
|
137
145
|
/**
|
|
138
146
|
* Closes out the currently-proposed AI tool call(s) without executing them:
|
|
139
147
|
* emits a `tool_call_result` for each (so the LLM message history stays
|
|
@@ -163,11 +171,16 @@ export declare class DonobuFlow {
|
|
|
163
171
|
private applyRunModeChange;
|
|
164
172
|
/**
|
|
165
173
|
* Whether the flow can hand control to the AI: it needs both a GPT client and
|
|
166
|
-
*
|
|
167
|
-
* `canUseAi`) so the autonomy selector can disable the AI modes when they
|
|
168
|
-
* wouldn't work — e.g. a Playwright-imported test with no objective.
|
|
174
|
+
* a goal to pursue.
|
|
169
175
|
*/
|
|
170
176
|
private canHandOffToAi;
|
|
177
|
+
/**
|
|
178
|
+
* Whether there is a standing goal for the AI to pursue (a non-empty
|
|
179
|
+
* `overallObjective`). Surfaced to the UI as `hasGoal` to drive the
|
|
180
|
+
* transport: ⏩ Fast-forward (autonomous run) is only offered with a goal,
|
|
181
|
+
* and ▶ Play needs either a goal or a typed instruction.
|
|
182
|
+
*/
|
|
183
|
+
private hasGoal;
|
|
171
184
|
/**
|
|
172
185
|
* This method is called if there is an unhandled unexpected exception. This
|
|
173
186
|
* method will mark the flow as a failure.
|
|
@@ -161,12 +161,13 @@ class DonobuFlow {
|
|
|
161
161
|
try {
|
|
162
162
|
this.controlPanel.update({
|
|
163
163
|
state: this.metadata.state,
|
|
164
|
-
|
|
164
|
+
runMode: this.metadata.runMode,
|
|
165
|
+
overallObjective: this.metadata.overallObjective,
|
|
166
|
+
allowedTools: this.metadata.allowedTools,
|
|
165
167
|
pendingToolCalls: this.metadata.state === 'WAITING_FOR_APPROVAL'
|
|
166
168
|
? [...this.proposedToolCalls]
|
|
167
169
|
: undefined,
|
|
168
|
-
|
|
169
|
-
canUseAi: this.canHandOffToAi(),
|
|
170
|
+
hasGptClient: this.gptClient !== null,
|
|
170
171
|
});
|
|
171
172
|
switch (this.metadata.state) {
|
|
172
173
|
case 'UNSTARTED':
|
|
@@ -324,6 +325,11 @@ class DonobuFlow {
|
|
|
324
325
|
// Set the next state based on user action
|
|
325
326
|
switch (userAction.type) {
|
|
326
327
|
case 'PAUSE':
|
|
328
|
+
// Pausing while an AI proposal awaits approval abandons that proposal so
|
|
329
|
+
// the user returns to a clean compose state rather than a stale prompt.
|
|
330
|
+
if (this.metadata.state === 'WAITING_FOR_APPROVAL') {
|
|
331
|
+
this.closeOutPendingProposals('Superseded because the user paused before approving; not executed.');
|
|
332
|
+
}
|
|
327
333
|
this.metadata.state = 'PAUSED';
|
|
328
334
|
await this.targetInspector.hideInteractionCursor();
|
|
329
335
|
break;
|
|
@@ -441,9 +447,73 @@ class DonobuFlow {
|
|
|
441
447
|
await this.applyRunModeChange(userAction.runMode, userAction.approvePending ?? false);
|
|
442
448
|
break;
|
|
443
449
|
}
|
|
450
|
+
case 'STEP': {
|
|
451
|
+
// ▶ Play: start supervised running toward the goal — the AI proposes
|
|
452
|
+
// each action and the user approves it before it runs, continuing until
|
|
453
|
+
// the objective is met or the user pauses. Needs a GPT client and a goal
|
|
454
|
+
// (the typed instruction can supply the goal).
|
|
455
|
+
if (!this.gptClient) {
|
|
456
|
+
break;
|
|
457
|
+
}
|
|
458
|
+
// The user is directing the next move, which supersedes anything still
|
|
459
|
+
// queued (e.g. unreplayed recorded steps of a paused DETERMINISTIC run).
|
|
460
|
+
this.closeOutPendingProposals('Superseded by the user directing the next action; not executed.');
|
|
461
|
+
await this.applyComposeInstruction(userAction.instruction);
|
|
462
|
+
if (!this.hasGoal()) {
|
|
463
|
+
break;
|
|
464
|
+
}
|
|
465
|
+
this.metadata.runMode = 'SUPERVISED';
|
|
466
|
+
await this.targetInspector.showInteractionCursor();
|
|
467
|
+
this.metadata.state = 'RESUMING';
|
|
468
|
+
break;
|
|
469
|
+
}
|
|
470
|
+
case 'RUN': {
|
|
471
|
+
// ⏩ Fast-forward: run autonomously toward the goal until done/paused.
|
|
472
|
+
if (!this.gptClient) {
|
|
473
|
+
break;
|
|
474
|
+
}
|
|
475
|
+
this.closeOutPendingProposals('Superseded by the user directing the next action; not executed.');
|
|
476
|
+
await this.applyComposeInstruction(userAction.instruction);
|
|
477
|
+
if (!this.hasGoal()) {
|
|
478
|
+
break;
|
|
479
|
+
}
|
|
480
|
+
this.metadata.runMode = 'AUTONOMOUS';
|
|
481
|
+
await this.targetInspector.showInteractionCursor();
|
|
482
|
+
this.metadata.state = 'RESUMING';
|
|
483
|
+
break;
|
|
484
|
+
}
|
|
444
485
|
}
|
|
445
486
|
await this.persistence.setFlowMetadata(this.metadata);
|
|
446
487
|
}
|
|
488
|
+
/**
|
|
489
|
+
* Incorporates the compose-field text from a ▶/⏩ action: if the flow has no
|
|
490
|
+
* standing goal yet, the text becomes the `overallObjective`; otherwise it's
|
|
491
|
+
* added as extra guidance. Either way it's injected into the LLM history (the
|
|
492
|
+
* system prompt was built at init, possibly before any objective existed) and
|
|
493
|
+
* recorded in the timeline. No-op for empty text.
|
|
494
|
+
*/
|
|
495
|
+
async applyComposeInstruction(instruction) {
|
|
496
|
+
const text = instruction?.trim();
|
|
497
|
+
if (!text) {
|
|
498
|
+
return;
|
|
499
|
+
}
|
|
500
|
+
const settingObjective = !this.hasGoal();
|
|
501
|
+
if (settingObjective) {
|
|
502
|
+
this.metadata.overallObjective = text;
|
|
503
|
+
}
|
|
504
|
+
this.gptMessages.push({
|
|
505
|
+
type: 'user',
|
|
506
|
+
items: [
|
|
507
|
+
{
|
|
508
|
+
type: 'text',
|
|
509
|
+
text: settingObjective
|
|
510
|
+
? `Your overall objective: ${text}`
|
|
511
|
+
: `${DonobuFlow.USER_INTERRUPT_MARKER}: ${text}`,
|
|
512
|
+
},
|
|
513
|
+
],
|
|
514
|
+
});
|
|
515
|
+
await this.recordAdHocToolCall(text, text);
|
|
516
|
+
}
|
|
447
517
|
/**
|
|
448
518
|
* Closes out the currently-proposed AI tool call(s) without executing them:
|
|
449
519
|
* emits a `tool_call_result` for each (so the LLM message history stays
|
|
@@ -515,10 +585,15 @@ class DonobuFlow {
|
|
|
515
585
|
!this.canHandOffToAi()) {
|
|
516
586
|
return;
|
|
517
587
|
}
|
|
588
|
+
// A deliberate pause should survive a mode change: update the run mode but
|
|
589
|
+
// keep the flow parked, so it only continues when the user hits play
|
|
590
|
+
// (RESUME). Other rest points (awaiting approval, waiting on the user) are
|
|
591
|
+
// active decision points, so a switch there takes effect immediately.
|
|
592
|
+
const wasPaused = this.metadata.state === 'PAUSED';
|
|
518
593
|
if (runMode === this.metadata.runMode &&
|
|
519
594
|
this.proposedToolCalls.length === 0) {
|
|
520
595
|
// Nothing to change.
|
|
521
|
-
this.metadata.state = 'RESUMING';
|
|
596
|
+
this.metadata.state = wasPaused ? 'PAUSED' : 'RESUMING';
|
|
522
597
|
return;
|
|
523
598
|
}
|
|
524
599
|
const previousRunMode = this.metadata.runMode;
|
|
@@ -571,6 +646,14 @@ class DonobuFlow {
|
|
|
571
646
|
: 'User handed off to Donobu.';
|
|
572
647
|
await this.recordAdHocToolCall(note, note);
|
|
573
648
|
}
|
|
649
|
+
if (wasPaused) {
|
|
650
|
+
// Stay paused after the mode change; the user resumes deliberately with
|
|
651
|
+
// play. Leave the cursor as-is — the RESUME handler shows/hides it when
|
|
652
|
+
// the flow actually continues.
|
|
653
|
+
this.metadata.state = 'PAUSED';
|
|
654
|
+
this.metadata.nextState = 'PAUSED';
|
|
655
|
+
return;
|
|
656
|
+
}
|
|
574
657
|
// The interaction cursor belongs to the AI; show it for AI modes, hide it
|
|
575
658
|
// when the human takes over.
|
|
576
659
|
if (runMode === 'INSTRUCT') {
|
|
@@ -584,13 +667,19 @@ class DonobuFlow {
|
|
|
584
667
|
}
|
|
585
668
|
/**
|
|
586
669
|
* Whether the flow can hand control to the AI: it needs both a GPT client and
|
|
587
|
-
*
|
|
588
|
-
* `canUseAi`) so the autonomy selector can disable the AI modes when they
|
|
589
|
-
* wouldn't work — e.g. a Playwright-imported test with no objective.
|
|
670
|
+
* a goal to pursue.
|
|
590
671
|
*/
|
|
591
672
|
canHandOffToAi() {
|
|
592
|
-
return
|
|
593
|
-
|
|
673
|
+
return this.gptClient !== null && this.hasGoal();
|
|
674
|
+
}
|
|
675
|
+
/**
|
|
676
|
+
* Whether there is a standing goal for the AI to pursue (a non-empty
|
|
677
|
+
* `overallObjective`). Surfaced to the UI as `hasGoal` to drive the
|
|
678
|
+
* transport: ⏩ Fast-forward (autonomous run) is only offered with a goal,
|
|
679
|
+
* and ▶ Play needs either a goal or a typed instruction.
|
|
680
|
+
*/
|
|
681
|
+
hasGoal() {
|
|
682
|
+
return (this.metadata.overallObjective?.trim().length ?? 0) > 0;
|
|
594
683
|
}
|
|
595
684
|
/**
|
|
596
685
|
* This method is called if there is an unhandled unexpected exception. This
|
|
@@ -897,9 +986,15 @@ Message: ${dialog.message()}`;
|
|
|
897
986
|
switch (this.metadata.runMode) {
|
|
898
987
|
case 'AUTONOMOUS':
|
|
899
988
|
case 'SUPERVISED':
|
|
900
|
-
// The LLM
|
|
901
|
-
//
|
|
902
|
-
|
|
989
|
+
// The LLM drives continuously toward a goal — but only if there is
|
|
990
|
+
// one. Without a goal, rest in the compose state until the user
|
|
991
|
+
// supplies it (via a ▶/⏩ action). SUPERVISED differs only in that
|
|
992
|
+
// each proposed action is gated for the user's approval (see the
|
|
993
|
+
// approval check above); it keeps proposing the next step after each
|
|
994
|
+
// approval until the objective is met or the user pauses.
|
|
995
|
+
nextState = this.hasGoal()
|
|
996
|
+
? 'QUERYING_LLM_FOR_NEXT_ACTION'
|
|
997
|
+
: 'WAITING_ON_USER_FOR_NEXT_ACTION';
|
|
903
998
|
break;
|
|
904
999
|
case 'INSTRUCT':
|
|
905
1000
|
// A user is driving the flow, so wait for them to tell us what to
|
|
@@ -1058,7 +1153,7 @@ Message: ${dialog.message()}`;
|
|
|
1058
1153
|
if (!proposedToolCall) {
|
|
1059
1154
|
return;
|
|
1060
1155
|
}
|
|
1061
|
-
// This proposal is
|
|
1156
|
+
// This proposal is being executed, so its approval (if any) is spent.
|
|
1062
1157
|
if (proposedToolCall.toolCallId) {
|
|
1063
1158
|
this.approvedToolCallIds.delete(proposedToolCall.toolCallId);
|
|
1064
1159
|
}
|
|
@@ -828,15 +828,14 @@ async function validateFlowParams(flowParams, gptClient, initialRunMode, toolReg
|
|
|
828
828
|
switch (initialRunMode) {
|
|
829
829
|
case 'AUTONOMOUS':
|
|
830
830
|
case 'SUPERVISED':
|
|
831
|
-
// Both modes
|
|
832
|
-
//
|
|
833
|
-
// AI-proposed action on user approval at runtime.
|
|
834
|
-
if ((flowParams.overallObjective?.trim().length ?? 0) === 0) {
|
|
835
|
-
throw new InvalidParamValueException_1.InvalidParamValueException('overallObjective', flowParams.overallObjective, `'initialRunMode' has a value of '${initialRunMode}'`);
|
|
836
|
-
}
|
|
831
|
+
// Both modes are AI-driven, so both need a GPT client and an objective
|
|
832
|
+
// to pursue.
|
|
837
833
|
if (!gptClient) {
|
|
838
834
|
throw new InvalidParamValueException_1.InvalidParamValueException('initialRunMode', initialRunMode, `no GPT client is available`);
|
|
839
835
|
}
|
|
836
|
+
if ((flowParams.overallObjective?.trim().length ?? 0) === 0) {
|
|
837
|
+
throw new InvalidParamValueException_1.InvalidParamValueException('overallObjective', flowParams.overallObjective, 'an AI-driven flow needs an objective to pursue');
|
|
838
|
+
}
|
|
840
839
|
break;
|
|
841
840
|
case 'INSTRUCT':
|
|
842
841
|
break;
|
|
@@ -984,7 +983,12 @@ async function prepareToolCallsForRerun(toolCalls, options, toolRegistry) {
|
|
|
984
983
|
continue;
|
|
985
984
|
}
|
|
986
985
|
try {
|
|
987
|
-
|
|
986
|
+
// A tool returns null to exclude itself from replay (e.g. a recorded
|
|
987
|
+
// user instruction, which is a live artifact, not a replayable action).
|
|
988
|
+
const prepared = tool.prepareForRerun(toolCall, options);
|
|
989
|
+
if (prepared) {
|
|
990
|
+
proposedToolCalls.push(prepared);
|
|
991
|
+
}
|
|
988
992
|
}
|
|
989
993
|
catch (e) {
|
|
990
994
|
Logger_1.appLogger.warn(`Failed to prepare tool call for rerun: ${JSON.stringify(toolCall)}`, e);
|
|
@@ -21,24 +21,29 @@ export type UserAction = {
|
|
|
21
21
|
type: 'SET_RUN_MODE';
|
|
22
22
|
runMode: RunMode;
|
|
23
23
|
approvePending?: boolean;
|
|
24
|
+
} | {
|
|
25
|
+
type: 'STEP';
|
|
26
|
+
instruction?: string;
|
|
27
|
+
} | {
|
|
28
|
+
type: 'RUN';
|
|
29
|
+
instruction?: string;
|
|
24
30
|
};
|
|
25
31
|
export type ControlPanelDataUpdate = {
|
|
26
32
|
state: State;
|
|
33
|
+
runMode?: RunMode;
|
|
34
|
+
/** The flow's overall objective; the panel treats a non-empty value (or a
|
|
35
|
+
* typed instruction) as a "goal", which gates ⏩ Fast-forward and ▶ Play. */
|
|
36
|
+
overallObjective?: string | null;
|
|
37
|
+
/** The tools the flow can actually run (resolved from the ToolManager). */
|
|
38
|
+
allowedTools?: string[] | null;
|
|
27
39
|
headline?: string;
|
|
28
|
-
/**
|
|
29
|
-
* the control panel can offer only tools the flow can actually run. */
|
|
30
|
-
availableToolNames?: string[];
|
|
31
|
-
/** In SUPERVISED mode, the AI-proposed tool call(s) currently awaiting the
|
|
32
|
-
* user's approval. Surfaced to the UI so the user can see what they are
|
|
33
|
-
* approving or rejecting. Empty/undefined when nothing is pending. */
|
|
40
|
+
/** AI-proposed tool call(s) awaiting approval (SUPERVISED). */
|
|
34
41
|
pendingToolCalls?: ProposedToolCall[];
|
|
35
|
-
/**
|
|
36
|
-
*
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
* those options on the autonomy selector. */
|
|
41
|
-
canUseAi?: boolean;
|
|
42
|
+
/** Whether the flow has a GPT client (AI available at all). Not cleanly a
|
|
43
|
+
* FlowMetadata field — a client can come from env/default config, not just a
|
|
44
|
+
* named gptConfig — so it's surfaced explicitly. Drives whether the compose
|
|
45
|
+
* surface and the ▶/⏩ transport are offered at all. */
|
|
46
|
+
hasGptClient?: boolean;
|
|
42
47
|
};
|
|
43
48
|
export interface ControlPanel {
|
|
44
49
|
/** Cheap, idempotent render update. */
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { z } from 'zod/v4';
|
|
2
|
+
import type { ProposedToolCall } from '../models/ProposedToolCall';
|
|
2
3
|
import type { ToolCallContext } from '../models/ToolCallContext';
|
|
3
4
|
import type { ToolCallResult } from '../models/ToolCallResult';
|
|
4
5
|
import { Tool } from './Tool';
|
|
@@ -20,5 +21,10 @@ export declare class AcknowledgeUserInstructionTool extends Tool<typeof Acknowle
|
|
|
20
21
|
constructor();
|
|
21
22
|
call(_context: ToolCallContext, parameters: z.infer<typeof AcknowledgeUserInstructionCoreSchema>): Promise<ToolCallResult>;
|
|
22
23
|
callFromGpt(context: ToolCallContext, parameters: z.infer<typeof AcknowledgeUserInstructionGptSchema>): Promise<ToolCallResult>;
|
|
24
|
+
/**
|
|
25
|
+
* A user instruction is a live-interaction artifact, not a replayable action,
|
|
26
|
+
* so it is excluded from reruns and generated scripts.
|
|
27
|
+
*/
|
|
28
|
+
prepareForRerun(): ProposedToolCall | null;
|
|
23
29
|
}
|
|
24
30
|
//# sourceMappingURL=AcknowledgeUserInstruction.d.ts.map
|
|
@@ -33,6 +33,13 @@ class AcknowledgeUserInstructionTool extends Tool_1.Tool {
|
|
|
33
33
|
async callFromGpt(context, parameters) {
|
|
34
34
|
return this.call(context, parameters);
|
|
35
35
|
}
|
|
36
|
+
/**
|
|
37
|
+
* A user instruction is a live-interaction artifact, not a replayable action,
|
|
38
|
+
* so it is excluded from reruns and generated scripts.
|
|
39
|
+
*/
|
|
40
|
+
prepareForRerun() {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
36
43
|
}
|
|
37
44
|
exports.AcknowledgeUserInstructionTool = AcknowledgeUserInstructionTool;
|
|
38
45
|
AcknowledgeUserInstructionTool.NAME = 'acknowledgeUserInstruction';
|
package/dist/esm/tools/Tool.d.ts
CHANGED
|
@@ -54,14 +54,17 @@ export declare abstract class Tool<CallSchema extends z.ZodObject, CallFromGptSc
|
|
|
54
54
|
previewInteraction(_context: ToolCallContext, _parameters: Record<string, unknown>): Promise<void>;
|
|
55
55
|
/**
|
|
56
56
|
* Transform a completed tool call into a {@link ProposedToolCall} suitable
|
|
57
|
-
* for deterministic replay / code generation
|
|
57
|
+
* for deterministic replay / code generation, or `null` to exclude the call
|
|
58
|
+
* from replay entirely.
|
|
58
59
|
*
|
|
59
60
|
* The default implementation is a passthrough — `{ name, parameters }` —
|
|
60
61
|
* which is correct for tools that have no replay-specific logic
|
|
61
62
|
* (waits, assertions, markers, etc.). Tools that need to hoist
|
|
62
63
|
* selector metadata out of their outcome, strip LLM-only fields, or
|
|
63
|
-
* otherwise rewrite themselves override this method.
|
|
64
|
+
* otherwise rewrite themselves override this method. Tools that record
|
|
65
|
+
* live-interaction artifacts rather than replayable actions (e.g. a user
|
|
66
|
+
* instruction) return `null` so they don't reappear on reruns.
|
|
64
67
|
*/
|
|
65
|
-
prepareForRerun(toolCall: ToolCall, _options: CodeGenerationOptions): ProposedToolCall;
|
|
68
|
+
prepareForRerun(toolCall: ToolCall, _options: CodeGenerationOptions): ProposedToolCall | null;
|
|
66
69
|
}
|
|
67
70
|
//# sourceMappingURL=Tool.d.ts.map
|
package/dist/esm/tools/Tool.js
CHANGED
|
@@ -45,13 +45,16 @@ class Tool {
|
|
|
45
45
|
async previewInteraction(_context, _parameters) { }
|
|
46
46
|
/**
|
|
47
47
|
* Transform a completed tool call into a {@link ProposedToolCall} suitable
|
|
48
|
-
* for deterministic replay / code generation
|
|
48
|
+
* for deterministic replay / code generation, or `null` to exclude the call
|
|
49
|
+
* from replay entirely.
|
|
49
50
|
*
|
|
50
51
|
* The default implementation is a passthrough — `{ name, parameters }` —
|
|
51
52
|
* which is correct for tools that have no replay-specific logic
|
|
52
53
|
* (waits, assertions, markers, etc.). Tools that need to hoist
|
|
53
54
|
* selector metadata out of their outcome, strip LLM-only fields, or
|
|
54
|
-
* otherwise rewrite themselves override this method.
|
|
55
|
+
* otherwise rewrite themselves override this method. Tools that record
|
|
56
|
+
* live-interaction artifacts rather than replayable actions (e.g. a user
|
|
57
|
+
* instruction) return `null` so they don't reappear on reruns.
|
|
55
58
|
*/
|
|
56
59
|
prepareForRerun(toolCall, _options) {
|
|
57
60
|
return {
|
|
@@ -134,6 +134,14 @@ export declare class DonobuFlow {
|
|
|
134
134
|
* Note that this *bypasses* the normal state transition logic!
|
|
135
135
|
*/
|
|
136
136
|
private onUserInterruption;
|
|
137
|
+
/**
|
|
138
|
+
* Incorporates the compose-field text from a ▶/⏩ action: if the flow has no
|
|
139
|
+
* standing goal yet, the text becomes the `overallObjective`; otherwise it's
|
|
140
|
+
* added as extra guidance. Either way it's injected into the LLM history (the
|
|
141
|
+
* system prompt was built at init, possibly before any objective existed) and
|
|
142
|
+
* recorded in the timeline. No-op for empty text.
|
|
143
|
+
*/
|
|
144
|
+
private applyComposeInstruction;
|
|
137
145
|
/**
|
|
138
146
|
* Closes out the currently-proposed AI tool call(s) without executing them:
|
|
139
147
|
* emits a `tool_call_result` for each (so the LLM message history stays
|
|
@@ -163,11 +171,16 @@ export declare class DonobuFlow {
|
|
|
163
171
|
private applyRunModeChange;
|
|
164
172
|
/**
|
|
165
173
|
* Whether the flow can hand control to the AI: it needs both a GPT client and
|
|
166
|
-
*
|
|
167
|
-
* `canUseAi`) so the autonomy selector can disable the AI modes when they
|
|
168
|
-
* wouldn't work — e.g. a Playwright-imported test with no objective.
|
|
174
|
+
* a goal to pursue.
|
|
169
175
|
*/
|
|
170
176
|
private canHandOffToAi;
|
|
177
|
+
/**
|
|
178
|
+
* Whether there is a standing goal for the AI to pursue (a non-empty
|
|
179
|
+
* `overallObjective`). Surfaced to the UI as `hasGoal` to drive the
|
|
180
|
+
* transport: ⏩ Fast-forward (autonomous run) is only offered with a goal,
|
|
181
|
+
* and ▶ Play needs either a goal or a typed instruction.
|
|
182
|
+
*/
|
|
183
|
+
private hasGoal;
|
|
171
184
|
/**
|
|
172
185
|
* This method is called if there is an unhandled unexpected exception. This
|
|
173
186
|
* method will mark the flow as a failure.
|
|
@@ -161,12 +161,13 @@ class DonobuFlow {
|
|
|
161
161
|
try {
|
|
162
162
|
this.controlPanel.update({
|
|
163
163
|
state: this.metadata.state,
|
|
164
|
-
|
|
164
|
+
runMode: this.metadata.runMode,
|
|
165
|
+
overallObjective: this.metadata.overallObjective,
|
|
166
|
+
allowedTools: this.metadata.allowedTools,
|
|
165
167
|
pendingToolCalls: this.metadata.state === 'WAITING_FOR_APPROVAL'
|
|
166
168
|
? [...this.proposedToolCalls]
|
|
167
169
|
: undefined,
|
|
168
|
-
|
|
169
|
-
canUseAi: this.canHandOffToAi(),
|
|
170
|
+
hasGptClient: this.gptClient !== null,
|
|
170
171
|
});
|
|
171
172
|
switch (this.metadata.state) {
|
|
172
173
|
case 'UNSTARTED':
|
|
@@ -324,6 +325,11 @@ class DonobuFlow {
|
|
|
324
325
|
// Set the next state based on user action
|
|
325
326
|
switch (userAction.type) {
|
|
326
327
|
case 'PAUSE':
|
|
328
|
+
// Pausing while an AI proposal awaits approval abandons that proposal so
|
|
329
|
+
// the user returns to a clean compose state rather than a stale prompt.
|
|
330
|
+
if (this.metadata.state === 'WAITING_FOR_APPROVAL') {
|
|
331
|
+
this.closeOutPendingProposals('Superseded because the user paused before approving; not executed.');
|
|
332
|
+
}
|
|
327
333
|
this.metadata.state = 'PAUSED';
|
|
328
334
|
await this.targetInspector.hideInteractionCursor();
|
|
329
335
|
break;
|
|
@@ -441,9 +447,73 @@ class DonobuFlow {
|
|
|
441
447
|
await this.applyRunModeChange(userAction.runMode, userAction.approvePending ?? false);
|
|
442
448
|
break;
|
|
443
449
|
}
|
|
450
|
+
case 'STEP': {
|
|
451
|
+
// ▶ Play: start supervised running toward the goal — the AI proposes
|
|
452
|
+
// each action and the user approves it before it runs, continuing until
|
|
453
|
+
// the objective is met or the user pauses. Needs a GPT client and a goal
|
|
454
|
+
// (the typed instruction can supply the goal).
|
|
455
|
+
if (!this.gptClient) {
|
|
456
|
+
break;
|
|
457
|
+
}
|
|
458
|
+
// The user is directing the next move, which supersedes anything still
|
|
459
|
+
// queued (e.g. unreplayed recorded steps of a paused DETERMINISTIC run).
|
|
460
|
+
this.closeOutPendingProposals('Superseded by the user directing the next action; not executed.');
|
|
461
|
+
await this.applyComposeInstruction(userAction.instruction);
|
|
462
|
+
if (!this.hasGoal()) {
|
|
463
|
+
break;
|
|
464
|
+
}
|
|
465
|
+
this.metadata.runMode = 'SUPERVISED';
|
|
466
|
+
await this.targetInspector.showInteractionCursor();
|
|
467
|
+
this.metadata.state = 'RESUMING';
|
|
468
|
+
break;
|
|
469
|
+
}
|
|
470
|
+
case 'RUN': {
|
|
471
|
+
// ⏩ Fast-forward: run autonomously toward the goal until done/paused.
|
|
472
|
+
if (!this.gptClient) {
|
|
473
|
+
break;
|
|
474
|
+
}
|
|
475
|
+
this.closeOutPendingProposals('Superseded by the user directing the next action; not executed.');
|
|
476
|
+
await this.applyComposeInstruction(userAction.instruction);
|
|
477
|
+
if (!this.hasGoal()) {
|
|
478
|
+
break;
|
|
479
|
+
}
|
|
480
|
+
this.metadata.runMode = 'AUTONOMOUS';
|
|
481
|
+
await this.targetInspector.showInteractionCursor();
|
|
482
|
+
this.metadata.state = 'RESUMING';
|
|
483
|
+
break;
|
|
484
|
+
}
|
|
444
485
|
}
|
|
445
486
|
await this.persistence.setFlowMetadata(this.metadata);
|
|
446
487
|
}
|
|
488
|
+
/**
|
|
489
|
+
* Incorporates the compose-field text from a ▶/⏩ action: if the flow has no
|
|
490
|
+
* standing goal yet, the text becomes the `overallObjective`; otherwise it's
|
|
491
|
+
* added as extra guidance. Either way it's injected into the LLM history (the
|
|
492
|
+
* system prompt was built at init, possibly before any objective existed) and
|
|
493
|
+
* recorded in the timeline. No-op for empty text.
|
|
494
|
+
*/
|
|
495
|
+
async applyComposeInstruction(instruction) {
|
|
496
|
+
const text = instruction?.trim();
|
|
497
|
+
if (!text) {
|
|
498
|
+
return;
|
|
499
|
+
}
|
|
500
|
+
const settingObjective = !this.hasGoal();
|
|
501
|
+
if (settingObjective) {
|
|
502
|
+
this.metadata.overallObjective = text;
|
|
503
|
+
}
|
|
504
|
+
this.gptMessages.push({
|
|
505
|
+
type: 'user',
|
|
506
|
+
items: [
|
|
507
|
+
{
|
|
508
|
+
type: 'text',
|
|
509
|
+
text: settingObjective
|
|
510
|
+
? `Your overall objective: ${text}`
|
|
511
|
+
: `${DonobuFlow.USER_INTERRUPT_MARKER}: ${text}`,
|
|
512
|
+
},
|
|
513
|
+
],
|
|
514
|
+
});
|
|
515
|
+
await this.recordAdHocToolCall(text, text);
|
|
516
|
+
}
|
|
447
517
|
/**
|
|
448
518
|
* Closes out the currently-proposed AI tool call(s) without executing them:
|
|
449
519
|
* emits a `tool_call_result` for each (so the LLM message history stays
|
|
@@ -515,10 +585,15 @@ class DonobuFlow {
|
|
|
515
585
|
!this.canHandOffToAi()) {
|
|
516
586
|
return;
|
|
517
587
|
}
|
|
588
|
+
// A deliberate pause should survive a mode change: update the run mode but
|
|
589
|
+
// keep the flow parked, so it only continues when the user hits play
|
|
590
|
+
// (RESUME). Other rest points (awaiting approval, waiting on the user) are
|
|
591
|
+
// active decision points, so a switch there takes effect immediately.
|
|
592
|
+
const wasPaused = this.metadata.state === 'PAUSED';
|
|
518
593
|
if (runMode === this.metadata.runMode &&
|
|
519
594
|
this.proposedToolCalls.length === 0) {
|
|
520
595
|
// Nothing to change.
|
|
521
|
-
this.metadata.state = 'RESUMING';
|
|
596
|
+
this.metadata.state = wasPaused ? 'PAUSED' : 'RESUMING';
|
|
522
597
|
return;
|
|
523
598
|
}
|
|
524
599
|
const previousRunMode = this.metadata.runMode;
|
|
@@ -571,6 +646,14 @@ class DonobuFlow {
|
|
|
571
646
|
: 'User handed off to Donobu.';
|
|
572
647
|
await this.recordAdHocToolCall(note, note);
|
|
573
648
|
}
|
|
649
|
+
if (wasPaused) {
|
|
650
|
+
// Stay paused after the mode change; the user resumes deliberately with
|
|
651
|
+
// play. Leave the cursor as-is — the RESUME handler shows/hides it when
|
|
652
|
+
// the flow actually continues.
|
|
653
|
+
this.metadata.state = 'PAUSED';
|
|
654
|
+
this.metadata.nextState = 'PAUSED';
|
|
655
|
+
return;
|
|
656
|
+
}
|
|
574
657
|
// The interaction cursor belongs to the AI; show it for AI modes, hide it
|
|
575
658
|
// when the human takes over.
|
|
576
659
|
if (runMode === 'INSTRUCT') {
|
|
@@ -584,13 +667,19 @@ class DonobuFlow {
|
|
|
584
667
|
}
|
|
585
668
|
/**
|
|
586
669
|
* Whether the flow can hand control to the AI: it needs both a GPT client and
|
|
587
|
-
*
|
|
588
|
-
* `canUseAi`) so the autonomy selector can disable the AI modes when they
|
|
589
|
-
* wouldn't work — e.g. a Playwright-imported test with no objective.
|
|
670
|
+
* a goal to pursue.
|
|
590
671
|
*/
|
|
591
672
|
canHandOffToAi() {
|
|
592
|
-
return
|
|
593
|
-
|
|
673
|
+
return this.gptClient !== null && this.hasGoal();
|
|
674
|
+
}
|
|
675
|
+
/**
|
|
676
|
+
* Whether there is a standing goal for the AI to pursue (a non-empty
|
|
677
|
+
* `overallObjective`). Surfaced to the UI as `hasGoal` to drive the
|
|
678
|
+
* transport: ⏩ Fast-forward (autonomous run) is only offered with a goal,
|
|
679
|
+
* and ▶ Play needs either a goal or a typed instruction.
|
|
680
|
+
*/
|
|
681
|
+
hasGoal() {
|
|
682
|
+
return (this.metadata.overallObjective?.trim().length ?? 0) > 0;
|
|
594
683
|
}
|
|
595
684
|
/**
|
|
596
685
|
* This method is called if there is an unhandled unexpected exception. This
|
|
@@ -897,9 +986,15 @@ Message: ${dialog.message()}`;
|
|
|
897
986
|
switch (this.metadata.runMode) {
|
|
898
987
|
case 'AUTONOMOUS':
|
|
899
988
|
case 'SUPERVISED':
|
|
900
|
-
// The LLM
|
|
901
|
-
//
|
|
902
|
-
|
|
989
|
+
// The LLM drives continuously toward a goal — but only if there is
|
|
990
|
+
// one. Without a goal, rest in the compose state until the user
|
|
991
|
+
// supplies it (via a ▶/⏩ action). SUPERVISED differs only in that
|
|
992
|
+
// each proposed action is gated for the user's approval (see the
|
|
993
|
+
// approval check above); it keeps proposing the next step after each
|
|
994
|
+
// approval until the objective is met or the user pauses.
|
|
995
|
+
nextState = this.hasGoal()
|
|
996
|
+
? 'QUERYING_LLM_FOR_NEXT_ACTION'
|
|
997
|
+
: 'WAITING_ON_USER_FOR_NEXT_ACTION';
|
|
903
998
|
break;
|
|
904
999
|
case 'INSTRUCT':
|
|
905
1000
|
// A user is driving the flow, so wait for them to tell us what to
|
|
@@ -1058,7 +1153,7 @@ Message: ${dialog.message()}`;
|
|
|
1058
1153
|
if (!proposedToolCall) {
|
|
1059
1154
|
return;
|
|
1060
1155
|
}
|
|
1061
|
-
// This proposal is
|
|
1156
|
+
// This proposal is being executed, so its approval (if any) is spent.
|
|
1062
1157
|
if (proposedToolCall.toolCallId) {
|
|
1063
1158
|
this.approvedToolCallIds.delete(proposedToolCall.toolCallId);
|
|
1064
1159
|
}
|
|
@@ -828,15 +828,14 @@ async function validateFlowParams(flowParams, gptClient, initialRunMode, toolReg
|
|
|
828
828
|
switch (initialRunMode) {
|
|
829
829
|
case 'AUTONOMOUS':
|
|
830
830
|
case 'SUPERVISED':
|
|
831
|
-
// Both modes
|
|
832
|
-
//
|
|
833
|
-
// AI-proposed action on user approval at runtime.
|
|
834
|
-
if ((flowParams.overallObjective?.trim().length ?? 0) === 0) {
|
|
835
|
-
throw new InvalidParamValueException_1.InvalidParamValueException('overallObjective', flowParams.overallObjective, `'initialRunMode' has a value of '${initialRunMode}'`);
|
|
836
|
-
}
|
|
831
|
+
// Both modes are AI-driven, so both need a GPT client and an objective
|
|
832
|
+
// to pursue.
|
|
837
833
|
if (!gptClient) {
|
|
838
834
|
throw new InvalidParamValueException_1.InvalidParamValueException('initialRunMode', initialRunMode, `no GPT client is available`);
|
|
839
835
|
}
|
|
836
|
+
if ((flowParams.overallObjective?.trim().length ?? 0) === 0) {
|
|
837
|
+
throw new InvalidParamValueException_1.InvalidParamValueException('overallObjective', flowParams.overallObjective, 'an AI-driven flow needs an objective to pursue');
|
|
838
|
+
}
|
|
840
839
|
break;
|
|
841
840
|
case 'INSTRUCT':
|
|
842
841
|
break;
|
|
@@ -984,7 +983,12 @@ async function prepareToolCallsForRerun(toolCalls, options, toolRegistry) {
|
|
|
984
983
|
continue;
|
|
985
984
|
}
|
|
986
985
|
try {
|
|
987
|
-
|
|
986
|
+
// A tool returns null to exclude itself from replay (e.g. a recorded
|
|
987
|
+
// user instruction, which is a live artifact, not a replayable action).
|
|
988
|
+
const prepared = tool.prepareForRerun(toolCall, options);
|
|
989
|
+
if (prepared) {
|
|
990
|
+
proposedToolCalls.push(prepared);
|
|
991
|
+
}
|
|
988
992
|
}
|
|
989
993
|
catch (e) {
|
|
990
994
|
Logger_1.appLogger.warn(`Failed to prepare tool call for rerun: ${JSON.stringify(toolCall)}`, e);
|
|
@@ -21,24 +21,29 @@ export type UserAction = {
|
|
|
21
21
|
type: 'SET_RUN_MODE';
|
|
22
22
|
runMode: RunMode;
|
|
23
23
|
approvePending?: boolean;
|
|
24
|
+
} | {
|
|
25
|
+
type: 'STEP';
|
|
26
|
+
instruction?: string;
|
|
27
|
+
} | {
|
|
28
|
+
type: 'RUN';
|
|
29
|
+
instruction?: string;
|
|
24
30
|
};
|
|
25
31
|
export type ControlPanelDataUpdate = {
|
|
26
32
|
state: State;
|
|
33
|
+
runMode?: RunMode;
|
|
34
|
+
/** The flow's overall objective; the panel treats a non-empty value (or a
|
|
35
|
+
* typed instruction) as a "goal", which gates ⏩ Fast-forward and ▶ Play. */
|
|
36
|
+
overallObjective?: string | null;
|
|
37
|
+
/** The tools the flow can actually run (resolved from the ToolManager). */
|
|
38
|
+
allowedTools?: string[] | null;
|
|
27
39
|
headline?: string;
|
|
28
|
-
/**
|
|
29
|
-
* the control panel can offer only tools the flow can actually run. */
|
|
30
|
-
availableToolNames?: string[];
|
|
31
|
-
/** In SUPERVISED mode, the AI-proposed tool call(s) currently awaiting the
|
|
32
|
-
* user's approval. Surfaced to the UI so the user can see what they are
|
|
33
|
-
* approving or rejecting. Empty/undefined when nothing is pending. */
|
|
40
|
+
/** AI-proposed tool call(s) awaiting approval (SUPERVISED). */
|
|
34
41
|
pendingToolCalls?: ProposedToolCall[];
|
|
35
|
-
/**
|
|
36
|
-
*
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
* those options on the autonomy selector. */
|
|
41
|
-
canUseAi?: boolean;
|
|
42
|
+
/** Whether the flow has a GPT client (AI available at all). Not cleanly a
|
|
43
|
+
* FlowMetadata field — a client can come from env/default config, not just a
|
|
44
|
+
* named gptConfig — so it's surfaced explicitly. Drives whether the compose
|
|
45
|
+
* surface and the ▶/⏩ transport are offered at all. */
|
|
46
|
+
hasGptClient?: boolean;
|
|
42
47
|
};
|
|
43
48
|
export interface ControlPanel {
|
|
44
49
|
/** Cheap, idempotent render update. */
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { z } from 'zod/v4';
|
|
2
|
+
import type { ProposedToolCall } from '../models/ProposedToolCall';
|
|
2
3
|
import type { ToolCallContext } from '../models/ToolCallContext';
|
|
3
4
|
import type { ToolCallResult } from '../models/ToolCallResult';
|
|
4
5
|
import { Tool } from './Tool';
|
|
@@ -20,5 +21,10 @@ export declare class AcknowledgeUserInstructionTool extends Tool<typeof Acknowle
|
|
|
20
21
|
constructor();
|
|
21
22
|
call(_context: ToolCallContext, parameters: z.infer<typeof AcknowledgeUserInstructionCoreSchema>): Promise<ToolCallResult>;
|
|
22
23
|
callFromGpt(context: ToolCallContext, parameters: z.infer<typeof AcknowledgeUserInstructionGptSchema>): Promise<ToolCallResult>;
|
|
24
|
+
/**
|
|
25
|
+
* A user instruction is a live-interaction artifact, not a replayable action,
|
|
26
|
+
* so it is excluded from reruns and generated scripts.
|
|
27
|
+
*/
|
|
28
|
+
prepareForRerun(): ProposedToolCall | null;
|
|
23
29
|
}
|
|
24
30
|
//# sourceMappingURL=AcknowledgeUserInstruction.d.ts.map
|
|
@@ -33,6 +33,13 @@ class AcknowledgeUserInstructionTool extends Tool_1.Tool {
|
|
|
33
33
|
async callFromGpt(context, parameters) {
|
|
34
34
|
return this.call(context, parameters);
|
|
35
35
|
}
|
|
36
|
+
/**
|
|
37
|
+
* A user instruction is a live-interaction artifact, not a replayable action,
|
|
38
|
+
* so it is excluded from reruns and generated scripts.
|
|
39
|
+
*/
|
|
40
|
+
prepareForRerun() {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
36
43
|
}
|
|
37
44
|
exports.AcknowledgeUserInstructionTool = AcknowledgeUserInstructionTool;
|
|
38
45
|
AcknowledgeUserInstructionTool.NAME = 'acknowledgeUserInstruction';
|
package/dist/tools/Tool.d.ts
CHANGED
|
@@ -54,14 +54,17 @@ export declare abstract class Tool<CallSchema extends z.ZodObject, CallFromGptSc
|
|
|
54
54
|
previewInteraction(_context: ToolCallContext, _parameters: Record<string, unknown>): Promise<void>;
|
|
55
55
|
/**
|
|
56
56
|
* Transform a completed tool call into a {@link ProposedToolCall} suitable
|
|
57
|
-
* for deterministic replay / code generation
|
|
57
|
+
* for deterministic replay / code generation, or `null` to exclude the call
|
|
58
|
+
* from replay entirely.
|
|
58
59
|
*
|
|
59
60
|
* The default implementation is a passthrough — `{ name, parameters }` —
|
|
60
61
|
* which is correct for tools that have no replay-specific logic
|
|
61
62
|
* (waits, assertions, markers, etc.). Tools that need to hoist
|
|
62
63
|
* selector metadata out of their outcome, strip LLM-only fields, or
|
|
63
|
-
* otherwise rewrite themselves override this method.
|
|
64
|
+
* otherwise rewrite themselves override this method. Tools that record
|
|
65
|
+
* live-interaction artifacts rather than replayable actions (e.g. a user
|
|
66
|
+
* instruction) return `null` so they don't reappear on reruns.
|
|
64
67
|
*/
|
|
65
|
-
prepareForRerun(toolCall: ToolCall, _options: CodeGenerationOptions): ProposedToolCall;
|
|
68
|
+
prepareForRerun(toolCall: ToolCall, _options: CodeGenerationOptions): ProposedToolCall | null;
|
|
66
69
|
}
|
|
67
70
|
//# sourceMappingURL=Tool.d.ts.map
|
package/dist/tools/Tool.js
CHANGED
|
@@ -45,13 +45,16 @@ class Tool {
|
|
|
45
45
|
async previewInteraction(_context, _parameters) { }
|
|
46
46
|
/**
|
|
47
47
|
* Transform a completed tool call into a {@link ProposedToolCall} suitable
|
|
48
|
-
* for deterministic replay / code generation
|
|
48
|
+
* for deterministic replay / code generation, or `null` to exclude the call
|
|
49
|
+
* from replay entirely.
|
|
49
50
|
*
|
|
50
51
|
* The default implementation is a passthrough — `{ name, parameters }` —
|
|
51
52
|
* which is correct for tools that have no replay-specific logic
|
|
52
53
|
* (waits, assertions, markers, etc.). Tools that need to hoist
|
|
53
54
|
* selector metadata out of their outcome, strip LLM-only fields, or
|
|
54
|
-
* otherwise rewrite themselves override this method.
|
|
55
|
+
* otherwise rewrite themselves override this method. Tools that record
|
|
56
|
+
* live-interaction artifacts rather than replayable actions (e.g. a user
|
|
57
|
+
* instruction) return `null` so they don't reappear on reruns.
|
|
55
58
|
*/
|
|
56
59
|
prepareForRerun(toolCall, _options) {
|
|
57
60
|
return {
|