donobu 5.35.0 → 5.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -310,6 +310,18 @@ function extractTests(jsonData) {
310
310
  // Ignore parse failures
311
311
  }
312
312
  }
313
+ // Parse AI invocation wrappers from donobu-ai-invocations attachment
314
+ let aiInvocations = [];
315
+ const aiInvAtt = attachments.find((a) => a.name === 'donobu-ai-invocations');
316
+ if (aiInvAtt?.body) {
317
+ try {
318
+ const decoded = Buffer.from(aiInvAtt.body, 'base64').toString('utf8');
319
+ aiInvocations = JSON.parse(decoded);
320
+ }
321
+ catch {
322
+ // Ignore parse failures
323
+ }
324
+ }
313
325
  return {
314
326
  index: i,
315
327
  status: r.status,
@@ -334,6 +346,7 @@ function extractTests(jsonData) {
334
346
  steps: parseStderrSteps(r.stderr ?? []),
335
347
  stepScreenshots,
336
348
  nativeSteps,
349
+ aiInvocations,
337
350
  };
338
351
  });
339
352
  // Extract flow ID from the test-flow-metadata.json attachment
@@ -613,6 +626,107 @@ function renderNativeStep(ns, childrenHtml) {
613
626
  html += `</details>`;
614
627
  return html;
615
628
  }
629
+ const AI_KIND_LABELS = {
630
+ act: 'page.ai',
631
+ assert: 'page.ai.assert',
632
+ locate: 'page.ai.locate',
633
+ };
634
+ /**
635
+ * Render a single structured assertion step back as the Playwright source
636
+ * line that effectively executes — e.g. `expect(page.getByRole('heading',
637
+ * { name: 'Create an account' })).toBeVisible()`. Used to surface in the
638
+ * report what a cached `page.ai.assert` actually checked.
639
+ */
640
+ function formatAssertionStep(step) {
641
+ const quote = (s) => `'${s.replace(/\\/g, '\\\\').replace(/'/g, "\\'")}'`;
642
+ const matcher = step.valueIsRegex ? `/${step.value}/` : quote(step.value);
643
+ // Page-level assertions (no element locator)
644
+ if (step.locator === null) {
645
+ return `expect(page).${step.assertion}(${matcher})`;
646
+ }
647
+ let locatorExpr;
648
+ if (step.locator === 'role' && step.role) {
649
+ locatorExpr = `page.getByRole(${quote(step.role)}, { name: ${matcher} })`;
650
+ }
651
+ else if (step.locator === 'label') {
652
+ locatorExpr = `page.getByLabel(${matcher})`;
653
+ }
654
+ else {
655
+ locatorExpr = `page.getByText(${matcher})`;
656
+ }
657
+ locatorExpr += '.first()';
658
+ const attrValue = step.attributeValue ?? '';
659
+ switch (step.assertion) {
660
+ case 'toBeVisible':
661
+ case 'toBeEnabled':
662
+ case 'toBeDisabled':
663
+ case 'toBeChecked':
664
+ return `expect(${locatorExpr}).${step.assertion}()`;
665
+ case 'toBeHidden':
666
+ // Executor uses `not.toBeVisible()` for `toBeHidden`; mirror that here.
667
+ return `expect(${locatorExpr}).not.toBeVisible()`;
668
+ case 'toHaveValue':
669
+ case 'toContainText':
670
+ return `expect(${locatorExpr}).${step.assertion}(${quote(attrValue)})`;
671
+ case 'toHaveAttribute':
672
+ return `expect(${locatorExpr}).toHaveAttribute(${quote(step.value)}, ${quote(attrValue)})`;
673
+ default:
674
+ return `expect(${locatorExpr}).${step.assertion}(${matcher})`;
675
+ }
676
+ }
677
+ function renderAiInvocation(inv, childrenHtml) {
678
+ const statusIcon = inv.passed
679
+ ? '<span class="step-status-ok">&#10003;</span>'
680
+ : '<span class="step-status-fail">&#10007;</span>';
681
+ const kindBadge = `<span class="ai-invocation-badge ai-invocation-badge--${inv.kind}">${esc(AI_KIND_LABELS[inv.kind])}</span>`;
682
+ const cachedBadge = inv.cacheHit
683
+ ? '<span class="ai-cached-badge">cached</span>'
684
+ : '';
685
+ const hasError = !inv.passed && !!inv.error?.message;
686
+ const hasAssertSteps = !!inv.assertSteps && inv.assertSteps.length > 0;
687
+ const hasBody = hasError || !!childrenHtml || hasAssertSteps;
688
+ const renderHeader = (tag) => {
689
+ let header = `<${tag} class="filmstrip-header">`;
690
+ header += statusIcon;
691
+ header += `<span class="ai-invocation-title">${esc(inv.description)}</span>`;
692
+ header += kindBadge;
693
+ header += cachedBadge;
694
+ if (tag === 'summary') {
695
+ header +=
696
+ '<span class="native-step-chevron" aria-hidden="true">&#9656;</span>';
697
+ }
698
+ header += `</${tag}>`;
699
+ return header;
700
+ };
701
+ if (!hasBody) {
702
+ // Leaf row — no children, no error. Common for `page.ai.locate` cache
703
+ // hits and for any other invocation whose internal work didn't surface
704
+ // any captured tool calls or native steps.
705
+ return `<div class="filmstrip-step ai-invocation">${renderHeader('div')}</div>`;
706
+ }
707
+ // Failures always render expanded; passing wrappers with children open
708
+ // by default so the contents are visible without an extra click.
709
+ const defaultOpen = !inv.passed || !!childrenHtml || hasAssertSteps;
710
+ const passClass = inv.passed
711
+ ? 'ai-invocation--passed'
712
+ : 'ai-invocation--failed';
713
+ let html = `<details class="filmstrip-step ai-invocation ${passClass}"${defaultOpen ? ' open' : ''}>`;
714
+ html += renderHeader('summary');
715
+ if (hasError) {
716
+ html += `<pre class="native-step-error">${ansiToHtml(inv.error.message)}</pre>`;
717
+ }
718
+ if (hasAssertSteps) {
719
+ const lines = inv
720
+ .assertSteps.map((s) => esc(formatAssertionStep(s)))
721
+ .join('\n');
722
+ html += `<pre class="ai-assert-steps">${lines}</pre>`;
723
+ }
724
+ if (childrenHtml) {
725
+ html += childrenHtml;
726
+ }
727
+ html += `</details>`;
728
+ return html;
729
+ }
616
730
  const AUDIT_CHECK_DEFS = [
617
731
  {
618
732
  key: 'pageLoad',
@@ -852,14 +966,15 @@ function renderFilmstripStep(ss, outputDir) {
852
966
  html += `</div>`;
853
967
  return html;
854
968
  }
855
- function renderSteps(steps, stepScreenshots, nativeSteps, outputDir) {
969
+ function renderSteps(steps, stepScreenshots, nativeSteps, aiInvocations, outputDir) {
856
970
  const meaningful = steps.filter((s) => s.type === 'action' || s.type === 'result');
857
971
  const hasScreenshots = stepScreenshots.length > 0;
858
972
  const hasNative = nativeSteps.length > 0;
859
- if (!meaningful.length && !hasScreenshots && !hasNative) {
973
+ const hasAi = aiInvocations.length > 0;
974
+ if (!meaningful.length && !hasScreenshots && !hasNative && !hasAi) {
860
975
  return '';
861
976
  }
862
- if (hasScreenshots || hasNative) {
977
+ if (hasScreenshots || hasNative || hasAi) {
863
978
  const buildNativeTree = (nss) => nss.map((ns) => ({
864
979
  kind: 'native',
865
980
  ns,
@@ -868,32 +983,53 @@ function renderSteps(steps, stepScreenshots, nativeSteps, outputDir) {
868
983
  children: buildNativeTree(ns.children),
869
984
  }));
870
985
  const roots = buildNativeTree(nativeSteps);
871
- // Place each Donobu screenshot under the deepest native step whose
872
- // [start, end] window contains it. Falls back to top level if none.
873
- const placeDonobu = (nodes, d) => {
986
+ // Place a node into the deepest container whose [t, tEnd] window
987
+ // contains its [tStart, tEnd]. Returns true on placement. Both native
988
+ // steps and AI invocations are eligible parents.
989
+ const placeNode = (nodes, leaf, tStart, tEnd) => {
874
990
  for (const n of nodes) {
875
- if (n.kind !== 'native') {
991
+ if (n.kind !== 'native' && n.kind !== 'ai') {
876
992
  continue;
877
993
  }
878
- if (d.ss.startedAt >= n.t && d.ss.completedAt <= n.tEnd) {
879
- if (!placeDonobu(n.children, d)) {
880
- n.children.push(d);
994
+ if (tStart >= n.t && tEnd <= n.tEnd) {
995
+ if (!placeNode(n.children, leaf, tStart, tEnd)) {
996
+ n.children.push(leaf);
881
997
  }
882
998
  return true;
883
999
  }
884
1000
  }
885
1001
  return false;
886
1002
  };
1003
+ // AI invocations placed first, longer-window first so an outer cached
1004
+ // `page.ai` is in place before its inner `page.ai.assert` lands.
1005
+ const sortedInvocations = [...aiInvocations].sort((a, b) => b.endedAt - b.startedAt - (a.endedAt - a.startedAt));
1006
+ for (const inv of sortedInvocations) {
1007
+ const node = {
1008
+ kind: 'ai',
1009
+ inv,
1010
+ t: inv.startedAt,
1011
+ tEnd: inv.endedAt,
1012
+ children: [],
1013
+ };
1014
+ if (!placeNode(roots, node, inv.startedAt, inv.endedAt)) {
1015
+ roots.push(node);
1016
+ }
1017
+ }
887
1018
  for (const ss of stepScreenshots) {
888
- const d = { kind: 'donobu', ss, t: ss.startedAt };
889
- if (!placeDonobu(roots, d)) {
1019
+ const d = {
1020
+ kind: 'donobu',
1021
+ ss,
1022
+ t: ss.startedAt,
1023
+ tEnd: ss.completedAt,
1024
+ };
1025
+ if (!placeNode(roots, d, ss.startedAt, ss.completedAt)) {
890
1026
  roots.push(d);
891
1027
  }
892
1028
  }
893
1029
  const sortTree = (nodes) => {
894
1030
  nodes.sort((a, b) => a.t - b.t);
895
1031
  for (const n of nodes) {
896
- if (n.kind === 'native') {
1032
+ if (n.kind === 'native' || n.kind === 'ai') {
897
1033
  sortTree(n.children);
898
1034
  }
899
1035
  }
@@ -903,7 +1039,7 @@ function renderSteps(steps, stepScreenshots, nativeSteps, outputDir) {
903
1039
  let c = 0;
904
1040
  for (const n of nodes) {
905
1041
  c += 1;
906
- if (n.kind === 'native') {
1042
+ if (n.kind === 'native' || n.kind === 'ai') {
907
1043
  c += countNodes(n.children);
908
1044
  }
909
1045
  }
@@ -913,6 +1049,12 @@ function renderSteps(steps, stepScreenshots, nativeSteps, outputDir) {
913
1049
  if (node.kind === 'donobu') {
914
1050
  return renderFilmstripStep(node.ss, outputDir);
915
1051
  }
1052
+ if (node.kind === 'ai') {
1053
+ const childrenHtml = node.children.length > 0
1054
+ ? `<div class="native-step-children">${node.children.map(renderNode).join('')}</div>`
1055
+ : '';
1056
+ return renderAiInvocation(node.inv, childrenHtml);
1057
+ }
916
1058
  const childrenHtml = node.children.length > 0
917
1059
  ? `<div class="native-step-children">${node.children.map(renderNode).join('')}</div>`
918
1060
  : '';
@@ -1227,7 +1369,7 @@ function renderResultTimeline(results, outputDir) {
1227
1369
  html += `<div class="timeline-errors">${renderErrors(r.errors)}</div>`;
1228
1370
  }
1229
1371
  html += renderAttachments(r.attachments, outputDir, r.stepScreenshots);
1230
- html += renderSteps(r.steps, r.stepScreenshots, r.nativeSteps, outputDir);
1372
+ html += renderSteps(r.steps, r.stepScreenshots, r.nativeSteps, r.aiInvocations, outputDir);
1231
1373
  html += '</div></div>';
1232
1374
  }
1233
1375
  html += '</div>';
@@ -1385,7 +1527,7 @@ function renderHtml(report, triage, outputDir) {
1385
1527
  }
1386
1528
  // 6. Steps — detailed forensics
1387
1529
  if (!hasMultipleResults && lastResult) {
1388
- detailsHtml += renderSteps(lastResult.steps, lastResult.stepScreenshots, lastResult.nativeSteps, outputDir);
1530
+ detailsHtml += renderSteps(lastResult.steps, lastResult.stepScreenshots, lastResult.nativeSteps, lastResult.aiInvocations, outputDir);
1389
1531
  }
1390
1532
  // 7. Triage details — remediation steps (expandable)
1391
1533
  if (test.plan) {
@@ -1659,6 +1801,18 @@ details.native-step[open]>summary .native-step-chevron{transform:rotate(90deg)}
1659
1801
  .native-step-snippet{font-size:11px;font-family:var(--mono);margin:4px 0 2px 22px;overflow:hidden}
1660
1802
  .native-step-children{display:flex;flex-direction:column;margin:4px 0 0 10px;border-left:1px solid var(--border-subtle);padding-left:8px}
1661
1803
  .native-step-children>.filmstrip-step{padding-left:8px}
1804
+
1805
+ /* AI invocation wrappers — page.ai / page.ai.assert / page.ai.locate */
1806
+ details.ai-invocation>summary{list-style:none;cursor:pointer}
1807
+ details.ai-invocation>summary::-webkit-details-marker{display:none}
1808
+ .ai-invocation-title{font-size:12px;font-weight:500;color:var(--text);font-family:var(--mono);flex:1;min-width:0;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
1809
+ .ai-invocation-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0;font-family:var(--mono)}
1810
+ .ai-invocation-badge--act{background:rgba(168,85,247,.12);color:#c084fc}
1811
+ .ai-invocation-badge--assert{background:rgba(236,72,153,.12);color:#f472b6}
1812
+ .ai-invocation-badge--locate{background:rgba(59,130,246,.12);color:#60a5fa}
1813
+ .ai-cached-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0;background:rgba(245,158,11,.12);color:#fbbf24}
1814
+ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)}
1815
+ .ai-assert-steps{font-size:11px;font-family:var(--mono);background:var(--bg);border:1px solid var(--border-subtle);border-radius:var(--radius);padding:8px 12px;margin:6px 0 2px 22px;color:var(--text-muted);white-space:pre-wrap;word-break:break-word;overflow-x:auto;max-height:240px;overflow-y:auto}
1662
1816
  .snippet-line{display:flex;padding:1px 8px;white-space:pre}
1663
1817
  .snippet-line--target{background:rgba(239,68,68,.10)}
1664
1818
  .snippet-linenum{color:var(--text-dim);min-width:40px;user-select:none}
@@ -148,52 +148,75 @@ class PageAi {
148
148
  return new PageAi(donobu, gptClient, new cache_1.InMemoryPageAiCache());
149
149
  }
150
150
  async ai(page, instruction, options) {
151
- const descriptor = this.buildDescriptor(page, instruction, options);
152
- // Keep the per-page metadata in sync with the env vars needed for this invocation so cached
153
- // replays can resolve interpolations via runTool.
154
- page._dnb.donobuFlowMetadata.envVars = descriptor.envVarNames;
155
- const cachedEntry = descriptor.useCache
156
- ? await this.cache.get(descriptor.key)
157
- : null;
158
- if (cachedEntry) {
159
- page._dnb.donobuFlowMetadata.runMode = 'DETERMINISTIC';
160
- page._dnb.envVals = descriptor.envVals;
161
- try {
162
- await cachedEntry.run({ page });
151
+ const startedAt = Date.now();
152
+ let cacheHit = false;
153
+ let thrownError = undefined;
154
+ try {
155
+ const descriptor = this.buildDescriptor(page, instruction, options);
156
+ // Keep the per-page metadata in sync with the env vars needed for this invocation so cached
157
+ // replays can resolve interpolations via runTool.
158
+ page._dnb.donobuFlowMetadata.envVars = descriptor.envVarNames;
159
+ const cachedEntry = descriptor.useCache
160
+ ? await this.cache.get(descriptor.key)
161
+ : null;
162
+ cacheHit = !!cachedEntry;
163
+ if (cachedEntry) {
164
+ page._dnb.donobuFlowMetadata.runMode = 'DETERMINISTIC';
165
+ page._dnb.envVals = descriptor.envVals;
166
+ try {
167
+ await cachedEntry.run({ page });
168
+ }
169
+ finally {
170
+ page._dnb.envVals = undefined;
171
+ }
172
+ return this.synthesizeResultFromMetadata(page, instruction, descriptor, options);
163
173
  }
164
- finally {
165
- page._dnb.envVals = undefined;
174
+ else {
175
+ const runResult = await this.runner.run({
176
+ page,
177
+ instruction,
178
+ schema: descriptor.schema,
179
+ jsonSchema: descriptor.jsonSchema,
180
+ allowedTools: descriptor.allowedTools,
181
+ maxToolCalls: descriptor.maxToolCalls,
182
+ envVarNames: descriptor.envVarNames,
183
+ envVals: descriptor.envVals,
184
+ runMode: 'AUTONOMOUS',
185
+ gptClient: options?.gptClient,
186
+ });
187
+ if (descriptor.useCache) {
188
+ const preparedToolCalls = await (0, DonobuFlowsManager_1.prepareToolCallsForRerun)(
189
+ // Only retain successfully run tool calls, otherwise when a cache file
190
+ // with some bad calls in it runs in the future, the test will blow up
191
+ // when the first bad tool call is read.
192
+ runResult.donobuFlow.invokedToolCalls.filter((tc) => {
193
+ return tc.outcome.isSuccessful;
194
+ }), {
195
+ areElementIdsVolatile: options?.volatileElementIds,
196
+ disableSelectorFailover: options?.noSelectorFailover,
197
+ }, this.donobu.toolRegistry);
198
+ const cacheEntry = cacheEntryBuilder_1.PageAiCacheEntryBuilder.fromMetadata(descriptor.key.pageUrl, runResult.donobuFlow.metadata, preparedToolCalls);
199
+ await this.cache.put(cacheEntry);
200
+ }
201
+ return runResult.parsedResult;
166
202
  }
167
- return this.synthesizeResultFromMetadata(page, instruction, descriptor, options);
168
203
  }
169
- else {
170
- const runResult = await this.runner.run({
171
- page,
172
- instruction,
173
- schema: descriptor.schema,
174
- jsonSchema: descriptor.jsonSchema,
175
- allowedTools: descriptor.allowedTools,
176
- maxToolCalls: descriptor.maxToolCalls,
177
- envVarNames: descriptor.envVarNames,
178
- envVals: descriptor.envVals,
179
- runMode: 'AUTONOMOUS',
180
- gptClient: options?.gptClient,
204
+ catch (e) {
205
+ thrownError = e;
206
+ throw e;
207
+ }
208
+ finally {
209
+ page._dnb.aiInvocations.push({
210
+ kind: 'act',
211
+ description: instruction,
212
+ startedAt,
213
+ endedAt: Date.now(),
214
+ cacheHit,
215
+ passed: thrownError === undefined,
216
+ error: thrownError !== undefined
217
+ ? { message: thrownError?.message }
218
+ : undefined,
181
219
  });
182
- if (descriptor.useCache) {
183
- const preparedToolCalls = await (0, DonobuFlowsManager_1.prepareToolCallsForRerun)(
184
- // Only retain successfully run tool calls, otherwise when a cache file
185
- // with some bad calls in it runs in the future, the test will blow up
186
- // when the first bad tool call is read.
187
- runResult.donobuFlow.invokedToolCalls.filter((tc) => {
188
- return tc.outcome.isSuccessful;
189
- }), {
190
- areElementIdsVolatile: options?.volatileElementIds,
191
- disableSelectorFailover: options?.noSelectorFailover,
192
- }, this.donobu.toolRegistry);
193
- const cacheEntry = cacheEntryBuilder_1.PageAiCacheEntryBuilder.fromMetadata(descriptor.key.pageUrl, runResult.donobuFlow.metadata, preparedToolCalls);
194
- await this.cache.put(cacheEntry);
195
- }
196
- return runResult.parsedResult;
197
220
  }
198
221
  }
199
222
  /**
@@ -13,6 +13,7 @@ import type { FlowsPersistence } from '../../persistence/flows/FlowsPersistence'
13
13
  import type { TestsPersistence } from '../../persistence/tests/TestsPersistence';
14
14
  import type { CookieAnalyses } from '../../tools/CreateBrowserCookieReportTool';
15
15
  import type { AccessibilityResults } from '../../tools/RunAccessibilityTestTool';
16
+ import type { PlaywrightAssertionStep } from '../ai/cache/assertCache';
16
17
  import type { PageAiCache } from '../ai/cache/cache';
17
18
  import type { LocateOptions } from '../ai/locate/locateTypes';
18
19
  import type { PageAi, PageAiNoSchemaOptions, PageAiOptions, PageAiSchemaOptions } from '../ai/PageAi';
@@ -466,6 +467,38 @@ export interface DonobuExtendedPage extends Page {
466
467
  envVals?: Record<string, string | undefined>;
467
468
  /** Sessions recorded by {@link tbd} for post-test code generation. */
468
469
  tbdSessions: TbdSession[];
470
+ /**
471
+ * Wrapping records for every `page.ai`, `page.ai.assert`, and
472
+ * `page.ai.locate` invocation in this test. The HTML reporter renders
473
+ * each as a parent node containing whichever Donobu tool calls and
474
+ * native Playwright steps fell inside its time window, with a
475
+ * `[cached]` badge driven by the per-record `cacheHit` flag.
476
+ *
477
+ * Recording happens for ALL calls (cache hit or miss) so the wrapper
478
+ * is visible regardless. Nested AI calls (e.g. a cached `page.ai`
479
+ * whose runSource calls `page.ai.assert(...)`) become nested wrappers
480
+ * — each carries its own cache state.
481
+ */
482
+ aiInvocations: AiInvocationRecord[];
469
483
  };
470
484
  }
485
+ export interface AiInvocationRecord {
486
+ kind: 'act' | 'assert' | 'locate';
487
+ description: string;
488
+ startedAt: number;
489
+ endedAt: number;
490
+ cacheHit: boolean;
491
+ passed: boolean;
492
+ error?: {
493
+ message?: string;
494
+ };
495
+ /**
496
+ * For cached `page.ai.assert` invocations: the structured Playwright
497
+ * assertion steps that were replayed. The reporter formats these back
498
+ * into source-code lines so the report shows exactly what was checked
499
+ * (e.g. `expect(page.getByRole('heading', { name: '…' })).toBeVisible()`).
500
+ * Undefined for live assert runs, `act`, and `locate` records.
501
+ */
502
+ assertSteps?: PlaywrightAssertionStep[];
503
+ }
471
504
  //# sourceMappingURL=DonobuExtendedPage.d.ts.map