@donggui/core 1.5.14 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import { AiAssertElement } from "../ai-model/assert.mjs";
1
2
  import { ScreenshotItem } from "../screenshot-item.mjs";
2
3
  import service from "../service/index.mjs";
3
4
  import { ExecutionDump, GroupedActionDump } from "../types.mjs";
@@ -7,7 +8,7 @@ import { ReportGenerator } from "../report-generator.mjs";
7
8
  import { getVersion, processCacheConfig, reportHTMLContent } from "../utils.mjs";
8
9
  import { ScriptPlayer, buildDetailedLocateParam, parseYamlScript } from "../yaml/index.mjs";
9
10
  import { existsSync } from "node:fs";
10
- import { resolve } from "node:path";
11
+ import { resolve as external_node_path_resolve } from "node:path";
11
12
  import { MIDSCENE_REPLANNING_CYCLE_LIMIT, ModelConfigManager, globalConfigManager, globalModelConfigManager } from "@midscene/shared/env";
12
13
  import { getDebug } from "@midscene/shared/logger";
13
14
  import { assert, ifInBrowser, uuid } from "@midscene/shared/utils";
@@ -162,6 +163,7 @@ class Agent {
162
163
  }
163
164
  async callActionInActionSpace(type, opt) {
164
165
  debug('callActionInActionSpace', type, ',', opt);
166
+ const beforeScreenshot = await this.interface.screenshotBase64();
165
167
  const actionPlan = {
166
168
  type: type,
167
169
  param: opt || {},
@@ -175,6 +177,15 @@ class Agent {
175
177
  const defaultIntentModelConfig = this.modelConfigManager.getModelConfig('default');
176
178
  const modelConfigForPlanning = this.modelConfigManager.getModelConfig('planning');
177
179
  const { output } = await this.taskExecutor.runPlans(title, plans, modelConfigForPlanning, defaultIntentModelConfig);
180
+ const afterScreenshot = await this.interface.screenshotBase64();
181
+ this.actionScreenshotHistory.push({
182
+ beforeScreenshot,
183
+ afterScreenshot,
184
+ actionType: type,
185
+ actionParam: opt,
186
+ timestamp: Date.now()
187
+ });
188
+ if (this.actionScreenshotHistory.length > this.maxScreenshotHistoryLength) this.actionScreenshotHistory.shift();
178
189
  return output;
179
190
  }
180
191
  async aiTap(locatePrompt, opt) {
@@ -290,6 +301,8 @@ class Agent {
290
301
  const abortSignal = opt?.abortSignal;
291
302
  if (abortSignal?.aborted) throw new Error(`aiAct aborted: ${abortSignal.reason || 'signal already aborted'}`);
292
303
  const runAiAct = async ()=>{
304
+ this.aiActStartScreenshot = await this.interface.screenshotBase64();
305
+ this.actionScreenshotHistory = [];
293
306
  const modelConfigForPlanning = this.modelConfigManager.getModelConfig('planning');
294
307
  const defaultIntentModelConfig = this.modelConfigManager.getModelConfig('default');
295
308
  const deepThink = opt?.deepThink === 'unset' ? void 0 : opt?.deepThink;
@@ -426,19 +439,169 @@ class Agent {
426
439
  }
427
440
  async aiAssert(assertion, msg, opt) {
428
441
  const modelConfig = this.modelConfigManager.getModelConfig('insight');
429
- const serviceOpt = {
430
- domIncluded: opt?.domIncluded ?? defaultServiceExtractOption.domIncluded,
431
- screenshotIncluded: opt?.screenshotIncluded ?? defaultServiceExtractOption.screenshotIncluded
432
- };
433
- const { textPrompt, multimodalPrompt } = parsePrompt(assertion);
442
+ const { textPrompt } = parsePrompt(assertion);
434
443
  const assertionText = 'string' == typeof assertion ? assertion : assertion.prompt;
444
+ let beforeScreenshot;
445
+ let afterScreenshot;
446
+ const mode = opt?.screenshotMode || 'auto';
447
+ switch(mode){
448
+ case 'manual':
449
+ beforeScreenshot = opt?.beforeScreenshot;
450
+ afterScreenshot = opt?.afterScreenshot;
451
+ break;
452
+ case 'flow':
453
+ beforeScreenshot = this.aiActStartScreenshot || this.actionScreenshotHistory[0]?.beforeScreenshot;
454
+ afterScreenshot = await this.interface.screenshotBase64();
455
+ break;
456
+ case 'currentOnly':
457
+ beforeScreenshot = void 0;
458
+ afterScreenshot = await this.interface.screenshotBase64();
459
+ break;
460
+ case 'lastAction':
461
+ if (this.actionScreenshotHistory.length > 0) {
462
+ const lastContext = this.actionScreenshotHistory[this.actionScreenshotHistory.length - 1];
463
+ beforeScreenshot = lastContext.beforeScreenshot;
464
+ afterScreenshot = await this.interface.screenshotBase64();
465
+ } else {
466
+ beforeScreenshot = void 0;
467
+ afterScreenshot = await this.interface.screenshotBase64();
468
+ }
469
+ break;
470
+ case 'diff':
471
+ afterScreenshot = await this.interface.screenshotBase64();
472
+ break;
473
+ case 'video':
474
+ beforeScreenshot = void 0;
475
+ afterScreenshot = await this.interface.screenshotBase64();
476
+ break;
477
+ default:
478
+ if (opt?.beforeScreenshot && opt?.afterScreenshot) {
479
+ beforeScreenshot = opt.beforeScreenshot;
480
+ afterScreenshot = opt.afterScreenshot;
481
+ } else if (this.actionScreenshotHistory.length > 0) {
482
+ const lastContext = this.actionScreenshotHistory[this.actionScreenshotHistory.length - 1];
483
+ beforeScreenshot = lastContext.beforeScreenshot;
484
+ afterScreenshot = await this.interface.screenshotBase64();
485
+ } else {
486
+ beforeScreenshot = void 0;
487
+ afterScreenshot = await this.interface.screenshotBase64();
488
+ }
489
+ break;
490
+ }
491
+ const executeAssertion = async ()=>{
492
+ if ('diff' === mode && opt?.referenceImages && opt.referenceImages.length > 0) {
493
+ const { AiAssertDiff } = await import("../ai-model/assert.mjs");
494
+ const diffResult = await AiAssertDiff({
495
+ currentScreenshot: afterScreenshot,
496
+ referenceImages: opt.referenceImages,
497
+ assertion: textPrompt,
498
+ businessContext: opt.businessContext,
499
+ diffThreshold: opt.diffThreshold,
500
+ ignoreRegions: opt.ignoreRegions,
501
+ ignoreDynamicContent: opt.ignoreDynamicContent,
502
+ strictMode: opt.strictMode,
503
+ modelConfig
504
+ });
505
+ return {
506
+ pass: diffResult.pass,
507
+ thought: diffResult.thought,
508
+ reason: diffResult.reason,
509
+ systemCheckResults: void 0,
510
+ diffDetails: diffResult.diffDetails,
511
+ videoDetails: void 0
512
+ };
513
+ }
514
+ if ('video' === mode && opt?.currentVideo) {
515
+ const { AiAssertVideo } = await import("../ai-model/assert.mjs");
516
+ const MAX_DURATION = 5;
517
+ const DEFAULT_FPS = 30;
518
+ const MAX_FRAMES = MAX_DURATION * DEFAULT_FPS;
519
+ let videoFrames = [];
520
+ if (opt.currentVideo.frames && opt.currentVideo.frames.length > 0) videoFrames = opt.currentVideo.frames.slice(0, MAX_FRAMES);
521
+ else if ('url' === opt.currentVideo.format && opt.currentVideo.url) throw new Error('Video URL format is not yet supported. Please provide video frames directly.');
522
+ else throw new Error('currentVideo.frames is required for video assertion mode');
523
+ if (0 === videoFrames.length) throw new Error('No video frames provided for video assertion');
524
+ const videoResult = await AiAssertVideo({
525
+ currentVideoFrames: videoFrames,
526
+ assertion: textPrompt,
527
+ businessContext: opt.businessContext,
528
+ videoOptions: opt.videoOptions,
529
+ modelConfig
530
+ });
531
+ return {
532
+ pass: videoResult.pass,
533
+ thought: videoResult.thought,
534
+ reason: videoResult.reason,
535
+ systemCheckResults: void 0,
536
+ diffDetails: void 0,
537
+ videoDetails: videoResult.videoDetails
538
+ };
539
+ }
540
+ const assertResult = await AiAssertElement({
541
+ beforeScreenshot,
542
+ afterScreenshot,
543
+ assertion: textPrompt,
544
+ businessContext: opt?.businessContext,
545
+ enableSystemCheck: opt?.enableSystemCheck ?? true,
546
+ customSystemCheckRules: opt?.customSystemCheckRules,
547
+ modelConfig
548
+ });
549
+ return {
550
+ pass: assertResult.pass,
551
+ thought: assertResult.thought,
552
+ reason: assertResult.reason,
553
+ systemCheckResults: assertResult.systemCheckResults,
554
+ diffDetails: void 0
555
+ };
556
+ };
557
+ const runWithRetry = async ()=>{
558
+ const retryOptions = opt?.retryOptions;
559
+ let lastError;
560
+ let attempts = 0;
561
+ const maxAttempts = retryOptions?.maxRetries ? retryOptions.maxRetries + 1 : 1;
562
+ while(attempts < maxAttempts)try {
563
+ attempts++;
564
+ const result = await executeAssertion();
565
+ return {
566
+ pass: result.pass,
567
+ thought: result.thought,
568
+ reason: result.reason,
569
+ systemCheckResults: result.systemCheckResults,
570
+ diffDetails: result.diffDetails
571
+ };
572
+ } catch (error) {
573
+ lastError = error instanceof Error ? error : new Error(String(error));
574
+ if (attempts < maxAttempts && retryOptions?.retryInterval) await new Promise((resolve)=>setTimeout(resolve, retryOptions.retryInterval));
575
+ }
576
+ throw lastError || new Error('Assertion failed after retries');
577
+ };
435
578
  try {
436
- const { output, thought } = await this.taskExecutor.createTypeQueryExecution('Assert', textPrompt, modelConfig, serviceOpt, multimodalPrompt);
437
- const pass = Boolean(output);
438
- const message = pass ? void 0 : `Assertion failed: ${msg || assertionText}\nReason: ${thought || '(no_reason)'}`;
579
+ const result = await runWithRetry();
580
+ const { pass, thought, reason, systemCheckResults, diffDetails } = result;
581
+ if (opt?.saveSnapshot) {
582
+ const snapshot = {
583
+ beforeScreenshot,
584
+ afterScreenshot,
585
+ timestamp: Date.now(),
586
+ assertion: assertionText
587
+ };
588
+ if (opt?.snapshotPath) {
589
+ const fs = await import("node:fs");
590
+ const path = await import("node:path");
591
+ const snapshotDir = path.dirname(opt.snapshotPath);
592
+ if (!fs.existsSync(snapshotDir)) fs.mkdirSync(snapshotDir, {
593
+ recursive: true
594
+ });
595
+ fs.writeFileSync(opt.snapshotPath, JSON.stringify(snapshot, null, 2));
596
+ }
597
+ }
598
+ const message = pass ? void 0 : `Assertion failed: ${msg || assertionText}\nReason: ${reason || thought || '(no_reason)'}`;
439
599
  if (opt?.keepRawResponse) return {
440
600
  pass,
441
601
  thought,
602
+ reason,
603
+ systemCheckResults,
604
+ diffDetails,
442
605
  message
443
606
  };
444
607
  if (!pass) throw new Error(message);
@@ -453,6 +616,7 @@ class Agent {
453
616
  if (opt?.keepRawResponse) return {
454
617
  pass: false,
455
618
  thought,
619
+ reason,
456
620
  message
457
621
  };
458
622
  throw new Error(message, {
@@ -462,6 +626,16 @@ class Agent {
462
626
  throw error;
463
627
  }
464
628
  }
629
+ clearActionScreenshotHistory() {
630
+ this.actionScreenshotHistory = [];
631
+ this.aiActStartScreenshot = null;
632
+ }
633
+ getLastActionScreenshotContext() {
634
+ return this.actionScreenshotHistory[this.actionScreenshotHistory.length - 1];
635
+ }
636
+ getFlowStartScreenshot() {
637
+ return this.aiActStartScreenshot;
638
+ }
465
639
  async aiWaitFor(assertion, opt) {
466
640
  const modelConfig = this.modelConfigManager.getModelConfig('insight');
467
641
  await this.taskExecutor.waitFor(assertion, {
@@ -610,7 +784,7 @@ class Agent {
610
784
  normalizeFilePaths(files) {
611
785
  if (ifInBrowser) throw new Error('File chooser is not supported in browser environment');
612
786
  return files.map((file)=>{
613
- const absolutePath = resolve(file);
787
+ const absolutePath = external_node_path_resolve(file);
614
788
  if (!existsSync(absolutePath)) throw new Error(`File not found: ${file}`);
615
789
  return absolutePath;
616
790
  });
@@ -644,6 +818,9 @@ class Agent {
644
818
  _define_property(this, "executionDumpIndexByRunner", new WeakMap());
645
819
  _define_property(this, "fullActionSpace", void 0);
646
820
  _define_property(this, "reportGenerator", void 0);
821
+ _define_property(this, "actionScreenshotHistory", []);
822
+ _define_property(this, "maxScreenshotHistoryLength", 5);
823
+ _define_property(this, "aiActStartScreenshot", null);
647
824
  this.interface = interfaceInstance;
648
825
  const envReplanningCycleLimit = globalConfigManager.getEnvConfigValueAsNumber(MIDSCENE_REPLANNING_CYCLE_LIMIT);
649
826
  this.opts = Object.assign({