donobu 5.53.0 → 5.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/apis/FlowsApi.d.ts +95 -7
  2. package/dist/apis/FlowsApi.js +139 -11
  3. package/dist/apis/TestsApi.js +4 -3
  4. package/dist/codegen/CodeGenerator.js +4 -2
  5. package/dist/esm/apis/FlowsApi.d.ts +95 -7
  6. package/dist/esm/apis/FlowsApi.js +139 -11
  7. package/dist/esm/apis/TestsApi.js +4 -3
  8. package/dist/esm/codegen/CodeGenerator.js +4 -2
  9. package/dist/esm/lib/test/testExtension.js +34 -29
  10. package/dist/esm/managers/AdminApiController.js +4 -0
  11. package/dist/esm/managers/DonobuFlow.d.ts +98 -1
  12. package/dist/esm/managers/DonobuFlow.js +345 -21
  13. package/dist/esm/managers/DonobuFlowsManager.d.ts +14 -1
  14. package/dist/esm/managers/DonobuFlowsManager.js +20 -2
  15. package/dist/esm/models/ControlPanel.d.ts +22 -0
  16. package/dist/esm/models/CreateDonobuFlow.d.ts +1 -0
  17. package/dist/esm/models/CreateTest.d.ts +1 -0
  18. package/dist/esm/models/FlowMetadata.d.ts +6 -0
  19. package/dist/esm/models/FlowMetadata.js +3 -1
  20. package/dist/esm/models/RunMode.d.ts +1 -0
  21. package/dist/esm/models/RunMode.js +7 -1
  22. package/dist/esm/models/TestMetadata.d.ts +9 -0
  23. package/dist/esm/persistence/DonobuSqliteDb.js +3 -2
  24. package/dist/esm/tools/ReplayableInteraction.d.ts +20 -0
  25. package/dist/esm/tools/ReplayableInteraction.js +63 -0
  26. package/dist/esm/tools/SetRunModeTool.d.ts +2 -0
  27. package/dist/esm/tools/Tool.d.ts +16 -0
  28. package/dist/esm/tools/Tool.js +16 -0
  29. package/dist/esm/tools/TriggerDonobuFlowTool.d.ts +2 -0
  30. package/dist/lib/test/testExtension.js +34 -29
  31. package/dist/managers/AdminApiController.js +4 -0
  32. package/dist/managers/DonobuFlow.d.ts +98 -1
  33. package/dist/managers/DonobuFlow.js +345 -21
  34. package/dist/managers/DonobuFlowsManager.d.ts +14 -1
  35. package/dist/managers/DonobuFlowsManager.js +20 -2
  36. package/dist/models/ControlPanel.d.ts +22 -0
  37. package/dist/models/CreateDonobuFlow.d.ts +1 -0
  38. package/dist/models/CreateTest.d.ts +1 -0
  39. package/dist/models/FlowMetadata.d.ts +6 -0
  40. package/dist/models/FlowMetadata.js +3 -1
  41. package/dist/models/RunMode.d.ts +1 -0
  42. package/dist/models/RunMode.js +7 -1
  43. package/dist/models/TestMetadata.d.ts +9 -0
  44. package/dist/persistence/DonobuSqliteDb.js +3 -2
  45. package/dist/tools/ReplayableInteraction.d.ts +20 -0
  46. package/dist/tools/ReplayableInteraction.js +63 -0
  47. package/dist/tools/SetRunModeTool.d.ts +2 -0
  48. package/dist/tools/Tool.d.ts +16 -0
  49. package/dist/tools/Tool.js +16 -0
  50. package/dist/tools/TriggerDonobuFlowTool.d.ts +2 -0
  51. package/package.json +1 -1
@@ -12,6 +12,7 @@ exports.StateSchema = v4_1.z
12
12
  'INITIALIZING',
13
13
  'QUERYING_LLM_FOR_NEXT_ACTION',
14
14
  'WAITING_ON_USER_FOR_NEXT_ACTION',
15
+ 'WAITING_FOR_APPROVAL',
15
16
  'PAUSED',
16
17
  'RESUMING',
17
18
  'RUNNING_ACTION',
@@ -22,8 +23,9 @@ exports.StateSchema = v4_1.z
22
23
  - UNSTARTED: Flow created but not yet initialized.
23
24
  - INITIALIZING: Setting up browser context and initial state.
24
25
  - RUNNING_ACTION: Executing a tool call.
25
- - QUERYING_LLM_FOR_NEXT_ACTION: AI determining next action (AUTONOMOUS mode).
26
+ - QUERYING_LLM_FOR_NEXT_ACTION: AI determining next action (AUTONOMOUS/SUPERVISED mode).
26
27
  - WAITING_ON_USER_FOR_NEXT_ACTION: Waiting for user input (INSTRUCT mode).
28
+ - WAITING_FOR_APPROVAL: An AI-proposed action is waiting for the user to approve or reject it (SUPERVISED mode).
27
29
  - PAUSED: Flow execution temporarily suspended.
28
30
  - RESUMING: Transitioning from paused to active state.
29
31
  - FAILED: Flow terminated unsuccessfully.
@@ -1,6 +1,7 @@
1
1
  import { z } from 'zod/v4';
2
2
  export declare const RunModeSchema: z.ZodEnum<{
3
3
  AUTONOMOUS: "AUTONOMOUS";
4
+ SUPERVISED: "SUPERVISED";
4
5
  INSTRUCT: "INSTRUCT";
5
6
  DETERMINISTIC: "DETERMINISTIC";
6
7
  }>;
@@ -3,7 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.RunModeSchema = void 0;
4
4
  const v4_1 = require("zod/v4");
5
5
  exports.RunModeSchema = v4_1.z
6
- .enum(['AUTONOMOUS', 'INSTRUCT', 'DETERMINISTIC'])
6
+ .enum(['AUTONOMOUS', 'SUPERVISED', 'INSTRUCT', 'DETERMINISTIC'])
7
7
  .describe(`The execution mode that determines how a flow operates and makes decisions:
8
8
 
9
9
  - AUTONOMOUS: The flow is driven by an AI agent (GPT) that autonomously decides what actions to
@@ -11,6 +11,12 @@ exports.RunModeSchema = v4_1.z
11
11
  determines the next appropriate action, and executes it without human intervention.
12
12
  This mode requires a valid GPT client configuration.
13
13
 
14
+ - SUPERVISED: The flow is driven by an AI agent (GPT) that proposes the next action, but a human
15
+ supervises each proposal: every AI-proposed action waits for the user to approve it,
16
+ or reject it with optional feedback, before it executes. Rejecting discards the
17
+ proposal and lets the AI try again with the feedback in mind. Like AUTONOMOUS, this
18
+ mode pursues an overall objective and requires a valid GPT client configuration.
19
+
14
20
  - INSTRUCT: The flow waits for explicit user instructions for each action. The user manually
15
21
  directs what tools to call and when, making this mode suitable for guided walkthroughs
16
22
  or when human oversight is required for each step. No GPT client is required.
@@ -141,6 +141,7 @@ export declare const TestMetadataSchema: z.ZodObject<{
141
141
  suiteId: z.ZodNullable<z.ZodString>;
142
142
  nextRunMode: z.ZodEnum<{
143
143
  AUTONOMOUS: "AUTONOMOUS";
144
+ SUPERVISED: "SUPERVISED";
144
145
  INSTRUCT: "INSTRUCT";
145
146
  DETERMINISTIC: "DETERMINISTIC";
146
147
  }>;
@@ -324,6 +325,7 @@ export declare const TestListItemSchema: z.ZodObject<{
324
325
  suiteId: z.ZodNullable<z.ZodString>;
325
326
  nextRunMode: z.ZodEnum<{
326
327
  AUTONOMOUS: "AUTONOMOUS";
328
+ SUPERVISED: "SUPERVISED";
327
329
  INSTRUCT: "INSTRUCT";
328
330
  DETERMINISTIC: "DETERMINISTIC";
329
331
  }>;
@@ -486,6 +488,7 @@ export declare const TestListItemSchema: z.ZodObject<{
486
488
  defaultMessageDuration: z.ZodNullable<z.ZodNumber>;
487
489
  runMode: z.ZodEnum<{
488
490
  AUTONOMOUS: "AUTONOMOUS";
491
+ SUPERVISED: "SUPERVISED";
489
492
  INSTRUCT: "INSTRUCT";
490
493
  DETERMINISTIC: "DETERMINISTIC";
491
494
  }>;
@@ -500,6 +503,7 @@ export declare const TestListItemSchema: z.ZodObject<{
500
503
  INITIALIZING: "INITIALIZING";
501
504
  QUERYING_LLM_FOR_NEXT_ACTION: "QUERYING_LLM_FOR_NEXT_ACTION";
502
505
  WAITING_ON_USER_FOR_NEXT_ACTION: "WAITING_ON_USER_FOR_NEXT_ACTION";
506
+ WAITING_FOR_APPROVAL: "WAITING_FOR_APPROVAL";
503
507
  PAUSED: "PAUSED";
504
508
  RESUMING: "RESUMING";
505
509
  RUNNING_ACTION: "RUNNING_ACTION";
@@ -511,6 +515,7 @@ export declare const TestListItemSchema: z.ZodObject<{
511
515
  INITIALIZING: "INITIALIZING";
512
516
  QUERYING_LLM_FOR_NEXT_ACTION: "QUERYING_LLM_FOR_NEXT_ACTION";
513
517
  WAITING_ON_USER_FOR_NEXT_ACTION: "WAITING_ON_USER_FOR_NEXT_ACTION";
518
+ WAITING_FOR_APPROVAL: "WAITING_FOR_APPROVAL";
514
519
  PAUSED: "PAUSED";
515
520
  RESUMING: "RESUMING";
516
521
  RUNNING_ACTION: "RUNNING_ACTION";
@@ -676,6 +681,7 @@ export declare const TestListItemPaginatedResultSchema: z.ZodObject<{
676
681
  suiteId: z.ZodNullable<z.ZodString>;
677
682
  nextRunMode: z.ZodEnum<{
678
683
  AUTONOMOUS: "AUTONOMOUS";
684
+ SUPERVISED: "SUPERVISED";
679
685
  INSTRUCT: "INSTRUCT";
680
686
  DETERMINISTIC: "DETERMINISTIC";
681
687
  }>;
@@ -838,6 +844,7 @@ export declare const TestListItemPaginatedResultSchema: z.ZodObject<{
838
844
  defaultMessageDuration: z.ZodNullable<z.ZodNumber>;
839
845
  runMode: z.ZodEnum<{
840
846
  AUTONOMOUS: "AUTONOMOUS";
847
+ SUPERVISED: "SUPERVISED";
841
848
  INSTRUCT: "INSTRUCT";
842
849
  DETERMINISTIC: "DETERMINISTIC";
843
850
  }>;
@@ -852,6 +859,7 @@ export declare const TestListItemPaginatedResultSchema: z.ZodObject<{
852
859
  INITIALIZING: "INITIALIZING";
853
860
  QUERYING_LLM_FOR_NEXT_ACTION: "QUERYING_LLM_FOR_NEXT_ACTION";
854
861
  WAITING_ON_USER_FOR_NEXT_ACTION: "WAITING_ON_USER_FOR_NEXT_ACTION";
862
+ WAITING_FOR_APPROVAL: "WAITING_FOR_APPROVAL";
855
863
  PAUSED: "PAUSED";
856
864
  RESUMING: "RESUMING";
857
865
  RUNNING_ACTION: "RUNNING_ACTION";
@@ -863,6 +871,7 @@ export declare const TestListItemPaginatedResultSchema: z.ZodObject<{
863
871
  INITIALIZING: "INITIALIZING";
864
872
  QUERYING_LLM_FOR_NEXT_ACTION: "QUERYING_LLM_FOR_NEXT_ACTION";
865
873
  WAITING_ON_USER_FOR_NEXT_ACTION: "WAITING_ON_USER_FOR_NEXT_ACTION";
874
+ WAITING_FOR_APPROVAL: "WAITING_FOR_APPROVAL";
866
875
  PAUSED: "PAUSED";
867
876
  RESUMING: "RESUMING";
868
877
  RUNNING_ACTION: "RUNNING_ACTION";
@@ -471,7 +471,8 @@ CREATE INDEX IF NOT EXISTS idx_ai_queries_flow_id_started_at ON ai_queries(flow_
471
471
  // value (likely null) if no autonomous flow exists in the group.
472
472
  const newestAutonomous = [...group.flows]
473
473
  .reverse()
474
- .find((f) => f.metadata.runMode === 'AUTONOMOUS')?.metadata;
474
+ .find((f) => f.metadata.runMode === 'AUTONOMOUS' ||
475
+ f.metadata.runMode === 'SUPERVISED')?.metadata;
475
476
  const maxToolCalls = newestAutonomous?.maxToolCalls ?? newest.maxToolCalls;
476
477
  const testName = (0, displayName_1.getDisplayName)(newest, 'Untitled Test');
477
478
  const testMetadata = {
@@ -649,7 +650,7 @@ CREATE INDEX IF NOT EXISTS idx_ai_queries_flow_id_started_at ON ai_queries(flow_
649
650
  // value (likely null) if no autonomous flow exists in the group.
650
651
  const newestAutonomous = [...flows]
651
652
  .reverse()
652
- .find((f) => f.runMode === 'AUTONOMOUS');
653
+ .find((f) => f.runMode === 'AUTONOMOUS' || f.runMode === 'SUPERVISED');
653
654
  const maxToolCalls = newestAutonomous?.maxToolCalls ?? newestFlow.maxToolCalls;
654
655
  const testName = (0, displayName_1.getDisplayName)(newestFlow, 'Untitled Test');
655
656
  const testMetadata = {
@@ -84,8 +84,28 @@ export declare abstract class ReplayableInteraction<CoreSchema extends z.ZodObje
84
84
  readonly coreSchema: CoreSchema;
85
85
  static readonly MAX_SELECTOR_FAILOVERS = 3;
86
86
  static readonly MAX_LOCATOR_MATCH_COUNT = 3;
87
+ /**
88
+ * How long the preview cursor takes to glide to (and dwell on) the proposed
89
+ * target, in milliseconds. {@link InteractionVisualizer.pointAt} animates the
90
+ * move over half this duration.
91
+ */
92
+ static readonly PREVIEW_CURSOR_DURATION_MILLIS = 900;
87
93
  constructor(name: string, description: string, coreSchema: CoreSchema, inputSchema: NonGptSchema, inputSchemaForGpt: GptSchema, requiresGpt?: boolean);
88
94
  call(context: ToolCallContext, parameters: z.infer<NonGptSchema>): Promise<ToolCallResult>;
95
+ /**
96
+ * SUPERVISED-mode preview: resolve the element this interaction *would*
97
+ * target (from either an annotation- or selector-based proposal) and move the
98
+ * on-screen cursor to it, without performing the interaction. Best-effort —
99
+ * an unresolvable element simply leaves the cursor where it is.
100
+ */
101
+ previewInteraction(context: ToolCallContext, parameters: Record<string, unknown>): Promise<void>;
102
+ /**
103
+ * Resolve a proposed interaction's parameters to the {@link Locator} it would
104
+ * act on, mirroring the resolution in {@link call} (selector-based) and
105
+ * {@link callFromGpt} (annotation-based) but returning only the locator and
106
+ * never invoking anything. Returns `null` if the element can't be found.
107
+ */
108
+ private resolvePreviewLocator;
89
109
  callFromGpt(context: ToolCallContext, parameters: z.infer<GptSchema>): Promise<ToolCallResult>;
90
110
  /**
91
111
  * **Concrete subclasses implement the actual user action here.**
@@ -96,6 +96,63 @@ class ReplayableInteraction extends Tool_1.Tool {
96
96
  }
97
97
  return this.callCore(context, parameters, locators, parameters.selector);
98
98
  }
99
+ /**
100
+ * SUPERVISED-mode preview: resolve the element this interaction *would*
101
+ * target (from either an annotation- or selector-based proposal) and move the
102
+ * on-screen cursor to it, without performing the interaction. Best-effort —
103
+ * an unresolvable element simply leaves the cursor where it is.
104
+ */
105
+ async previewInteraction(context, parameters) {
106
+ const page = (0, TargetUtils_1.webPage)(context);
107
+ const locator = await this.resolvePreviewLocator(context, page, parameters);
108
+ if (!locator) {
109
+ return;
110
+ }
111
+ // Point at the same visible target (or its label) the real interaction
112
+ // would, so the preview matches what approval will touch.
113
+ const pointTarget = await ReplayableInteraction.getLocatorOrItsLabel(locator.first());
114
+ // Only reveal the cursor now that we have a real target to point at, so a
115
+ // non-interactive proposal never pops a stationary cursor.
116
+ await context.targetInspector.showInteractionCursor();
117
+ await context.interactionVisualizer.pointAt(page, pointTarget.first(), undefined, ReplayableInteraction.PREVIEW_CURSOR_DURATION_MILLIS);
118
+ }
119
+ /**
120
+ * Resolve a proposed interaction's parameters to the {@link Locator} it would
121
+ * act on, mirroring the resolution in {@link call} (selector-based) and
122
+ * {@link callFromGpt} (annotation-based) but returning only the locator and
123
+ * never invoking anything. Returns `null` if the element can't be found.
124
+ */
125
+ async resolvePreviewLocator(context, page, parameters) {
126
+ // Selector-based (deterministic) proposal.
127
+ if (parameters.selector) {
128
+ const parsed = ElementSelector_1.ElementSelectorSchema.safeParse(parameters.selector);
129
+ if (!parsed.success) {
130
+ return null;
131
+ }
132
+ const locators = await ReplayableInteraction.getLocatorsOrderedByMatchCount(page, parsed.data, ReplayableInteraction.MAX_LOCATOR_MATCH_COUNT, ReplayableInteraction.MAX_SELECTOR_FAILOVERS);
133
+ return locators[0]?.locator ?? null;
134
+ }
135
+ // Annotation-based (LLM-driven) proposal.
136
+ if (typeof parameters.annotation === 'string' ||
137
+ typeof parameters.annotation === 'number') {
138
+ const elementSelector = `[${context.targetInspector.interactableElementAttribute}="${parameters.annotation}"]`;
139
+ for (const frame of page.frames()) {
140
+ if (frame.isDetached()) {
141
+ continue;
142
+ }
143
+ try {
144
+ const candidate = frame.locator(elementSelector);
145
+ if ((await candidate.count()) > 0) {
146
+ return candidate;
147
+ }
148
+ }
149
+ catch {
150
+ // Detached or cross-origin frame — skip it.
151
+ }
152
+ }
153
+ }
154
+ return null;
155
+ }
99
156
  async callFromGpt(context, parameters) {
100
157
  const page = (0, TargetUtils_1.webPage)(context);
101
158
  const elementSelector = `[${context.targetInspector.interactableElementAttribute}="${parameters.annotation}"]`;
@@ -655,4 +712,10 @@ class ReplayableInteraction extends Tool_1.Tool {
655
712
  exports.ReplayableInteraction = ReplayableInteraction;
656
713
  ReplayableInteraction.MAX_SELECTOR_FAILOVERS = 3;
657
714
  ReplayableInteraction.MAX_LOCATOR_MATCH_COUNT = 3;
715
+ /**
716
+ * How long the preview cursor takes to glide to (and dwell on) the proposed
717
+ * target, in milliseconds. {@link InteractionVisualizer.pointAt} animates the
718
+ * move over half this duration.
719
+ */
720
+ ReplayableInteraction.PREVIEW_CURSOR_DURATION_MILLIS = 900;
658
721
  //# sourceMappingURL=ReplayableInteraction.js.map
@@ -5,6 +5,7 @@ import { Tool } from './Tool';
5
5
  export declare const SetRunModeCoreSchema: z.ZodObject<{
6
6
  runMode: z.ZodEnum<{
7
7
  AUTONOMOUS: "AUTONOMOUS";
8
+ SUPERVISED: "SUPERVISED";
8
9
  INSTRUCT: "INSTRUCT";
9
10
  DETERMINISTIC: "DETERMINISTIC";
10
11
  }>;
@@ -12,6 +13,7 @@ export declare const SetRunModeCoreSchema: z.ZodObject<{
12
13
  export declare const SetRunModeGptSchema: z.ZodObject<{
13
14
  runMode: z.ZodEnum<{
14
15
  AUTONOMOUS: "AUTONOMOUS";
16
+ SUPERVISED: "SUPERVISED";
15
17
  INSTRUCT: "INSTRUCT";
16
18
  DETERMINISTIC: "DETERMINISTIC";
17
19
  }>;
@@ -36,6 +36,22 @@ export declare abstract class Tool<CallSchema extends z.ZodObject, CallFromGptSc
36
36
  * Invoke the tool as made from a GPT with the given context and parameters.
37
37
  */
38
38
  abstract callFromGpt(context: ToolCallContext, parameters: z.infer<CallFromGptSchema>): Promise<ToolCallResult>;
39
+ /**
40
+ * Move the on-screen cursor to where this tool *would* interact, WITHOUT
41
+ * performing the action. Used by SUPERVISED mode to show the user what an
42
+ * AI-proposed action would touch while it awaits their approval.
43
+ *
44
+ * The default is a no-op — only tools with a visible interaction target
45
+ * (e.g. {@link ReplayableInteraction}-derived click/type/hover tools)
46
+ * override it. Implementations must be best-effort and side-effect-free with
47
+ * respect to the page: they may move the visual cursor but must never click,
48
+ * type, navigate, or otherwise mutate page state.
49
+ *
50
+ * @param context - The active {@link ToolCallContext}.
51
+ * @param parameters - The proposed tool call's raw parameters (as proposed by
52
+ * the LLM or a user); implementations validate/resolve these themselves.
53
+ */
54
+ previewInteraction(_context: ToolCallContext, _parameters: Record<string, unknown>): Promise<void>;
39
55
  /**
40
56
  * Transform a completed tool call into a {@link ProposedToolCall} suitable
41
57
  * for deterministic replay / code generation.
@@ -27,6 +27,22 @@ class Tool {
27
27
  this.controlPanelMessage = controlPanelMessage;
28
28
  this.supportedTargets = supportedTargets;
29
29
  }
30
+ /**
31
+ * Move the on-screen cursor to where this tool *would* interact, WITHOUT
32
+ * performing the action. Used by SUPERVISED mode to show the user what an
33
+ * AI-proposed action would touch while it awaits their approval.
34
+ *
35
+ * The default is a no-op — only tools with a visible interaction target
36
+ * (e.g. {@link ReplayableInteraction}-derived click/type/hover tools)
37
+ * override it. Implementations must be best-effort and side-effect-free with
38
+ * respect to the page: they may move the visual cursor but must never click,
39
+ * type, navigate, or otherwise mutate page state.
40
+ *
41
+ * @param context - The active {@link ToolCallContext}.
42
+ * @param parameters - The proposed tool call's raw parameters (as proposed by
43
+ * the LLM or a user); implementations validate/resolve these themselves.
44
+ */
45
+ async previewInteraction(_context, _parameters) { }
30
46
  /**
31
47
  * Transform a completed tool call into a {@link ProposedToolCall} suitable
32
48
  * for deterministic replay / code generation.
@@ -138,6 +138,7 @@ export declare const TriggerDonobuFlowCoreSchema: z.ZodObject<{
138
138
  gptConfigNameOverride: z.ZodOptional<z.ZodNullable<z.ZodString>>;
139
139
  initialRunMode: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
140
140
  AUTONOMOUS: "AUTONOMOUS";
141
+ SUPERVISED: "SUPERVISED";
141
142
  INSTRUCT: "INSTRUCT";
142
143
  DETERMINISTIC: "DETERMINISTIC";
143
144
  }>>>;
@@ -286,6 +287,7 @@ export declare const TriggerDonobuFlowGptSchema: z.ZodObject<{
286
287
  gptConfigNameOverride: z.ZodOptional<z.ZodNullable<z.ZodString>>;
287
288
  initialRunMode: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
288
289
  AUTONOMOUS: "AUTONOMOUS";
290
+ SUPERVISED: "SUPERVISED";
289
291
  INSTRUCT: "INSTRUCT";
290
292
  DETERMINISTIC: "DETERMINISTIC";
291
293
  }>>>;
@@ -908,14 +908,14 @@ async function persistFlowJson(persistence, flowId, fileId, value) {
908
908
  * See `fetchBaselineScreenshot` / `gatherTestFailureEvidence` in
909
909
  * triageTestFailure.ts.
910
910
  *
911
- * Runs for any meaningful end state; skipped only for `skipped` tests (no real
912
- * page state), when triage is disabled, or for V1 (legacy self-heal) tests.
913
- * Best-effort and fails open.
911
+ * Runs for any meaningful end state, including V1 (objective-annotated) tests;
912
+ * skipped only for `skipped` tests (no real page state) or when triage is
913
+ * disabled. Triage reads this screenshot as the current run's failure shot and
914
+ * as the baseline for a later failing run. Best-effort and fails open.
914
915
  */
915
916
  async function captureAndPersistFinalState(page, testInfo) {
916
917
  if (testInfo.status === 'skipped' ||
917
- process.env.DONOBU_TRIAGE_DISABLED === '1' ||
918
- isV1Test(testInfo)) {
918
+ process.env.DONOBU_TRIAGE_DISABLED === '1') {
919
919
  return;
920
920
  }
921
921
  const flowId = page._dnb?.donobuFlowMetadata?.id;
@@ -1015,35 +1015,40 @@ async function finalizeTest(page, testInfo, logBuffer, videoOption) {
1015
1015
  // future failing run reads a successful run's copy as its baseline.
1016
1016
  await captureAndPersistFinalState(page, testInfo);
1017
1017
  if (testInfo.status === 'failed') {
1018
- if (isV1Test(testInfo)) {
1019
- if (isV1SelfHealingEnabled(testInfo) &&
1020
- !MiscUtils_1.MiscUtils.yn(envVars_1.env.data.DONOBU_AUTO_HEAL_ACTIVE)) {
1021
- if (!sharedState.gptClient) {
1022
- Logger_1.appLogger.warn('Will not self-heal due to no GPT client being set up.');
1023
- }
1024
- else {
1025
- try {
1026
- await (0, selfHealing_1.selfHeal)(sharedState.gptClient, testInfo, page);
1027
- }
1028
- catch (error) {
1029
- Logger_1.appLogger.error('Error when attempting to self-heal', error);
1030
- }
1031
- }
1018
+ // Gather failure-triage evidence for every failed test, regardless of its
1019
+ // V1 (objective-annotated) classification or self-heal setting. Triage is a
1020
+ // standalone diagnostic: it writes the failure evidence that populates the
1021
+ // triage run directory, feeds the reports, and supplies the treatment plans
1022
+ // auto-heal consumes. Legacy V1 self-heal runs separately, below.
1023
+ try {
1024
+ const evidenceResult = await (0, triageTestFailure_1.gatherTestFailureEvidence)(testInfo, page);
1025
+ if (evidenceResult?.filePath) {
1026
+ Logger_1.appLogger.info(`Persisted Donobu triage evidence for "${testInfo.title}" to ${evidenceResult.filePath}.`);
1027
+ }
1028
+ else if (evidenceResult?.evidence) {
1029
+ Logger_1.appLogger.info(`Captured Donobu triage evidence for "${testInfo.title}" (schema v${evidenceResult.evidence.schemaVersion}).`);
1032
1030
  }
1033
1031
  }
1034
- else {
1035
- try {
1036
- const evidenceResult = await (0, triageTestFailure_1.gatherTestFailureEvidence)(testInfo, page);
1037
- if (evidenceResult?.filePath) {
1038
- Logger_1.appLogger.info(`Persisted Donobu triage evidence for "${testInfo.title}" to ${evidenceResult.filePath}.`);
1032
+ catch (error) {
1033
+ Logger_1.appLogger.error(`Failed to gather test failure evidence for "${testInfo.title}".`, error);
1034
+ }
1035
+ // Legacy V1 self-heal: only for objective-annotated tests that opt in via
1036
+ // SELF_HEAL_TESTS_ENABLED, and never during an auto-heal rerun (which owns
1037
+ // its own remediation path).
1038
+ if (isV1Test(testInfo) &&
1039
+ isV1SelfHealingEnabled(testInfo) &&
1040
+ !MiscUtils_1.MiscUtils.yn(envVars_1.env.data.DONOBU_AUTO_HEAL_ACTIVE)) {
1041
+ if (!sharedState.gptClient) {
1042
+ Logger_1.appLogger.warn('Will not self-heal due to no GPT client being set up.');
1043
+ }
1044
+ else {
1045
+ try {
1046
+ await (0, selfHealing_1.selfHeal)(sharedState.gptClient, testInfo, page);
1039
1047
  }
1040
- else if (evidenceResult?.evidence) {
1041
- Logger_1.appLogger.info(`Captured Donobu triage evidence for "${testInfo.title}" (schema v${evidenceResult.evidence.schemaVersion}).`);
1048
+ catch (error) {
1049
+ Logger_1.appLogger.error('Error when attempting to self-heal', error);
1042
1050
  }
1043
1051
  }
1044
- catch (error) {
1045
- Logger_1.appLogger.error(`Failed to gather test failure evidence for "${testInfo.title}".`, error);
1046
- }
1047
1052
  }
1048
1053
  }
1049
1054
  else if (testInfo.status === 'passed' &&
@@ -287,6 +287,10 @@ class AdminApiController {
287
287
  app.post('/api/flows/:flowId/cancel', this.asyncHandler(apis.flowsApi.cancelFlow.bind(apis.flowsApi)));
288
288
  app.post('/api/flows/:flowId/pause', this.asyncHandler(apis.flowsApi.pauseFlow.bind(apis.flowsApi)));
289
289
  app.post('/api/flows/:flowId/resume', this.asyncHandler(apis.flowsApi.resumeFlow.bind(apis.flowsApi)));
290
+ app.post('/api/flows/:flowId/approve', this.asyncHandler(apis.flowsApi.approveProposal.bind(apis.flowsApi)));
291
+ app.post('/api/flows/:flowId/reject', this.asyncHandler(apis.flowsApi.rejectProposal.bind(apis.flowsApi)));
292
+ app.get('/api/flows/:flowId/pending-tool-calls', this.asyncHandler(apis.flowsApi.getPendingToolCalls.bind(apis.flowsApi)));
293
+ app.post('/api/flows/:flowId/run-mode', this.asyncHandler(apis.flowsApi.setRunMode.bind(apis.flowsApi)));
290
294
  app.post('/api/flows/:flowId/tool-calls', this.asyncHandler(apis.flowsToolCallsApi.postToolCalls.bind(apis.flowsToolCallsApi)));
291
295
  }
292
296
  /**
@@ -1,7 +1,7 @@
1
1
  import type { z } from 'zod/v4';
2
2
  import type { GptClient } from '../clients/GptClient';
3
3
  import type { AiQuery } from '../models/AiQuery';
4
- import type { ControlPanel } from '../models/ControlPanel';
4
+ import type { ControlPanel, UserAction } from '../models/ControlPanel';
5
5
  import type { FlowMetadata } from '../models/FlowMetadata';
6
6
  import type { GptMessage, StructuredOutputMessage, TextItem } from '../models/GptMessage';
7
7
  import type { SystemMessage } from '../models/GptMessage';
@@ -40,8 +40,23 @@ export declare class DonobuFlow {
40
40
  readonly controlPanel: ControlPanel;
41
41
  private static readonly MAIN_MESSAGE_ELEMENT_LIST_MARKER;
42
42
  static readonly USER_INTERRUPT_MARKER = "[User interruption while flow was paused, this MUST be acknowledged]";
43
+ static readonly REJECTION_MARKER = "[The user rejected your previously proposed action(s). Do NOT repeat them. Propose a different next action, taking the following feedback into account]";
43
44
  inProgressToolCall: ToolCall | null;
44
45
  readonly aiQueries: AiQuery[];
46
+ /**
47
+ * In SUPERVISED mode, the set of `toolCallId`s the user has explicitly
48
+ * approved. A proposed tool call only executes once its id is in this set;
49
+ * AI-proposed calls whose id is absent park the flow in
50
+ * `WAITING_FOR_APPROVAL`. Ids are removed as their calls run, so the set only
51
+ * ever holds currently-pending approvals.
52
+ */
53
+ private readonly approvedToolCallIds;
54
+ /**
55
+ * User actions submitted out-of-band (e.g. via REST endpoints rather than the
56
+ * desktop control panel). Drained by the run loop alongside the control
57
+ * panel, so both surfaces drive the flow through the same code path.
58
+ */
59
+ private readonly userActionInbox;
45
60
  constructor(flowsManager: DonobuFlowsManager, envData: Record<string, string>, persistence: FlowsPersistence, gptClient: GptClient | null, toolManager: ToolManager, interactionVisualizer: InteractionVisualizer, proposedToolCalls: ProposedToolCall[], invokedToolCalls: ToolCall[], gptMessages: GptMessage[], targetInspector: TargetInspector, metadata: FlowMetadata, controlPanel: ControlPanel);
46
61
  /**
47
62
  * Drives the entire Donobu flow state-machine until it reaches a
@@ -78,6 +93,25 @@ export declare class DonobuFlow {
78
93
  * explicit result.
79
94
  */
80
95
  run(): Promise<FlowMetadata['result']>;
96
+ /**
97
+ * The single entry point for external user imperatives. Every cooperative
98
+ * control interrupt — pause, resume, end, approve, reject, run-mode change —
99
+ * arrives here as a {@link UserAction}, whether it came from a REST endpoint
100
+ * (web frontend / SDK) or the desktop control panel. The action is queued and
101
+ * drained by the run loop ({@link popUserAction}) and handled uniformly by
102
+ * {@link onUserInterruption}, so all transports drive the flow identically.
103
+ *
104
+ * (The forceful `cancelFlow` and the queue-injecting `proposeToolCall` on
105
+ * {@link DonobuFlowsManager} intentionally do NOT use this path — see their
106
+ * docs.)
107
+ */
108
+ submitUserAction(action: UserAction): void;
109
+ /**
110
+ * Returns and clears the next pending user action, preferring out-of-band
111
+ * actions (REST) over the control panel. Both sources feed the same
112
+ * intervention path so the desktop and web surfaces behave identically.
113
+ */
114
+ private popUserAction;
81
115
  /**
82
116
  * Delegates to the inspector to attempt recovery after the target is
83
117
  * closed. If recovery fails, the flow is marked as failed.
@@ -100,6 +134,40 @@ export declare class DonobuFlow {
100
134
  * Note that this *bypasses* the normal state transition logic!
101
135
  */
102
136
  private onUserInterruption;
137
+ /**
138
+ * Closes out the currently-proposed AI tool call(s) without executing them:
139
+ * emits a `tool_call_result` for each (so the LLM message history stays
140
+ * well-formed — every tool call needs a matching result) and clears the
141
+ * proposal queue and any pending approvals. Shared by REJECT and manual
142
+ * takeover.
143
+ */
144
+ private closeOutPendingProposals;
145
+ /**
146
+ * Records a synthetic {@link AcknowledgeUserInstructionTool} tool call so a
147
+ * user-driven event (rejection, mode change) shows up in the flow timeline.
148
+ * Mirrors how RESUME records a user instruction.
149
+ */
150
+ private recordAdHocToolCall;
151
+ /**
152
+ * Moves the flow along the autonomy axis at runtime — the primitive behind
153
+ * "start asking me" (→ SUPERVISED), "go fully autonomous" (→ AUTONOMOUS),
154
+ * and "I'll take over" (→ INSTRUCT). After adjusting `runMode` and the
155
+ * pending proposal as appropriate, it routes through RESUMING so the next
156
+ * {@link transitionState} recomputes the correct state under the new mode.
157
+ *
158
+ * @param runMode - The target live mode. DETERMINISTIC is not a live mode and
159
+ * is ignored. AI modes (AUTONOMOUS/SUPERVISED) require a GPT client.
160
+ * @param approvePending - When switching to AUTONOMOUS with an AI proposal
161
+ * awaiting approval, approve and run it as part of the switch.
162
+ */
163
+ private applyRunModeChange;
164
+ /**
165
+ * Whether the flow can hand control to the AI: it needs both a GPT client and
166
+ * an overall objective for the agent to pursue. Surfaced to the UI (as
167
+ * `canUseAi`) so the autonomy selector can disable the AI modes when they
168
+ * wouldn't work — e.g. a Playwright-imported test with no objective.
169
+ */
170
+ private canHandOffToAi;
103
171
  /**
104
172
  * This method is called if there is an unhandled unexpected exception. This
105
173
  * method will mark the flow as a failure.
@@ -171,9 +239,38 @@ export declare class DonobuFlow {
171
239
  * initializes the GPT message history.
172
240
  */
173
241
  private onInitializing;
242
+ /**
243
+ * Assembles the {@link ToolCallContext} handed to a tool. Shared by actual
244
+ * execution ({@link onRunningAction}) and the SUPERVISED-mode cursor preview
245
+ * ({@link previewProposedInteraction}) so both see an identical environment.
246
+ */
247
+ private buildToolCallContext;
248
+ /**
249
+ * SUPERVISED mode: move the on-screen cursor to where the head proposed
250
+ * action *would* interact, so the user can see the target while deciding
251
+ * whether to approve it. This never executes the action — it only previews
252
+ * the interaction point. Best-effort: tools without a visible target (and any
253
+ * resolution failure) are simply skipped.
254
+ */
255
+ private previewProposedInteraction;
174
256
  private onRunningAction;
175
257
  private onQueryingLlmForNextAction;
176
258
  private onWaitingForUserForNextAction;
259
+ /**
260
+ * SUPERVISED mode: an AI-proposed action is parked awaiting the user's
261
+ * decision. We idle here until an APPROVE/REJECT (or other intervention)
262
+ * arrives via the control panel or a REST endpoint, which the run loop picks
263
+ * up as a {@link UserInterruptException}. Mirrors
264
+ * {@link onWaitingForUserForNextAction}.
265
+ *
266
+ * Unlike {@link onPaused}, we must NOT pin `nextState` here: the proposal
267
+ * still sits in `proposedToolCalls`, so the approval gate in
268
+ * {@link transitionState} re-parks us each poll on its own. Pinning it would
269
+ * also leave a stale `nextState` that survives an APPROVE interrupt (which
270
+ * sets `state` directly), causing the next transition to skip querying the
271
+ * LLM and park forever with an empty proposal queue.
272
+ */
273
+ private onWaitingForApproval;
177
274
  private onPaused;
178
275
  private onResuming;
179
276
  private onFailed;