@donggui/core 1.5.14 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,6 +36,7 @@ __webpack_require__.d(__webpack_exports__, {
36
36
  Agent: ()=>Agent,
37
37
  createAgent: ()=>createAgent
38
38
  });
39
+ const assert_js_namespaceObject = require("../ai-model/assert.js");
39
40
  const external_screenshot_item_js_namespaceObject = require("../screenshot-item.js");
40
41
  const index_js_namespaceObject = require("../service/index.js");
41
42
  var index_js_default = /*#__PURE__*/ __webpack_require__.n(index_js_namespaceObject);
@@ -202,6 +203,7 @@ class Agent {
202
203
  }
203
204
  async callActionInActionSpace(type, opt) {
204
205
  debug('callActionInActionSpace', type, ',', opt);
206
+ const beforeScreenshot = await this.interface.screenshotBase64();
205
207
  const actionPlan = {
206
208
  type: type,
207
209
  param: opt || {},
@@ -215,6 +217,15 @@ class Agent {
215
217
  const defaultIntentModelConfig = this.modelConfigManager.getModelConfig('default');
216
218
  const modelConfigForPlanning = this.modelConfigManager.getModelConfig('planning');
217
219
  const { output } = await this.taskExecutor.runPlans(title, plans, modelConfigForPlanning, defaultIntentModelConfig);
220
+ const afterScreenshot = await this.interface.screenshotBase64();
221
+ this.actionScreenshotHistory.push({
222
+ beforeScreenshot,
223
+ afterScreenshot,
224
+ actionType: type,
225
+ actionParam: opt,
226
+ timestamp: Date.now()
227
+ });
228
+ if (this.actionScreenshotHistory.length > this.maxScreenshotHistoryLength) this.actionScreenshotHistory.shift();
218
229
  return output;
219
230
  }
220
231
  async aiTap(locatePrompt, opt) {
@@ -330,6 +341,8 @@ class Agent {
330
341
  const abortSignal = opt?.abortSignal;
331
342
  if (abortSignal?.aborted) throw new Error(`aiAct aborted: ${abortSignal.reason || 'signal already aborted'}`);
332
343
  const runAiAct = async ()=>{
344
+ this.aiActStartScreenshot = await this.interface.screenshotBase64();
345
+ this.actionScreenshotHistory = [];
333
346
  const modelConfigForPlanning = this.modelConfigManager.getModelConfig('planning');
334
347
  const defaultIntentModelConfig = this.modelConfigManager.getModelConfig('default');
335
348
  const deepThink = opt?.deepThink === 'unset' ? void 0 : opt?.deepThink;
@@ -466,19 +479,169 @@ class Agent {
466
479
  }
467
480
  async aiAssert(assertion, msg, opt) {
468
481
  const modelConfig = this.modelConfigManager.getModelConfig('insight');
469
- const serviceOpt = {
470
- domIncluded: opt?.domIncluded ?? defaultServiceExtractOption.domIncluded,
471
- screenshotIncluded: opt?.screenshotIncluded ?? defaultServiceExtractOption.screenshotIncluded
472
- };
473
- const { textPrompt, multimodalPrompt } = (0, external_utils_js_namespaceObject_1.parsePrompt)(assertion);
482
+ const { textPrompt } = (0, external_utils_js_namespaceObject_1.parsePrompt)(assertion);
474
483
  const assertionText = 'string' == typeof assertion ? assertion : assertion.prompt;
484
+ let beforeScreenshot;
485
+ let afterScreenshot;
486
+ const mode = opt?.screenshotMode || 'auto';
487
+ switch(mode){
488
+ case 'manual':
489
+ beforeScreenshot = opt?.beforeScreenshot;
490
+ afterScreenshot = opt?.afterScreenshot;
491
+ break;
492
+ case 'flow':
493
+ beforeScreenshot = this.aiActStartScreenshot || this.actionScreenshotHistory[0]?.beforeScreenshot;
494
+ afterScreenshot = await this.interface.screenshotBase64();
495
+ break;
496
+ case 'currentOnly':
497
+ beforeScreenshot = void 0;
498
+ afterScreenshot = await this.interface.screenshotBase64();
499
+ break;
500
+ case 'lastAction':
501
+ if (this.actionScreenshotHistory.length > 0) {
502
+ const lastContext = this.actionScreenshotHistory[this.actionScreenshotHistory.length - 1];
503
+ beforeScreenshot = lastContext.beforeScreenshot;
504
+ afterScreenshot = await this.interface.screenshotBase64();
505
+ } else {
506
+ beforeScreenshot = void 0;
507
+ afterScreenshot = await this.interface.screenshotBase64();
508
+ }
509
+ break;
510
+ case 'diff':
511
+ afterScreenshot = await this.interface.screenshotBase64();
512
+ break;
513
+ case 'video':
514
+ beforeScreenshot = void 0;
515
+ afterScreenshot = await this.interface.screenshotBase64();
516
+ break;
517
+ default:
518
+ if (opt?.beforeScreenshot && opt?.afterScreenshot) {
519
+ beforeScreenshot = opt.beforeScreenshot;
520
+ afterScreenshot = opt.afterScreenshot;
521
+ } else if (this.actionScreenshotHistory.length > 0) {
522
+ const lastContext = this.actionScreenshotHistory[this.actionScreenshotHistory.length - 1];
523
+ beforeScreenshot = lastContext.beforeScreenshot;
524
+ afterScreenshot = await this.interface.screenshotBase64();
525
+ } else {
526
+ beforeScreenshot = void 0;
527
+ afterScreenshot = await this.interface.screenshotBase64();
528
+ }
529
+ break;
530
+ }
531
+ const executeAssertion = async ()=>{
532
+ if ('diff' === mode && opt?.referenceImages && opt.referenceImages.length > 0) {
533
+ const { AiAssertDiff } = await import("../ai-model/assert.js");
534
+ const diffResult = await AiAssertDiff({
535
+ currentScreenshot: afterScreenshot,
536
+ referenceImages: opt.referenceImages,
537
+ assertion: textPrompt,
538
+ businessContext: opt.businessContext,
539
+ diffThreshold: opt.diffThreshold,
540
+ ignoreRegions: opt.ignoreRegions,
541
+ ignoreDynamicContent: opt.ignoreDynamicContent,
542
+ strictMode: opt.strictMode,
543
+ modelConfig
544
+ });
545
+ return {
546
+ pass: diffResult.pass,
547
+ thought: diffResult.thought,
548
+ reason: diffResult.reason,
549
+ systemCheckResults: void 0,
550
+ diffDetails: diffResult.diffDetails,
551
+ videoDetails: void 0
552
+ };
553
+ }
554
+ if ('video' === mode && opt?.currentVideo) {
555
+ const { AiAssertVideo } = await import("../ai-model/assert.js");
556
+ const MAX_DURATION = 5;
557
+ const DEFAULT_FPS = 30;
558
+ const MAX_FRAMES = MAX_DURATION * DEFAULT_FPS;
559
+ let videoFrames = [];
560
+ if (opt.currentVideo.frames && opt.currentVideo.frames.length > 0) videoFrames = opt.currentVideo.frames.slice(0, MAX_FRAMES);
561
+ else if ('url' === opt.currentVideo.format && opt.currentVideo.url) throw new Error('Video URL format is not yet supported. Please provide video frames directly.');
562
+ else throw new Error('currentVideo.frames is required for video assertion mode');
563
+ if (0 === videoFrames.length) throw new Error('No video frames provided for video assertion');
564
+ const videoResult = await AiAssertVideo({
565
+ currentVideoFrames: videoFrames,
566
+ assertion: textPrompt,
567
+ businessContext: opt.businessContext,
568
+ videoOptions: opt.videoOptions,
569
+ modelConfig
570
+ });
571
+ return {
572
+ pass: videoResult.pass,
573
+ thought: videoResult.thought,
574
+ reason: videoResult.reason,
575
+ systemCheckResults: void 0,
576
+ diffDetails: void 0,
577
+ videoDetails: videoResult.videoDetails
578
+ };
579
+ }
580
+ const assertResult = await (0, assert_js_namespaceObject.AiAssertElement)({
581
+ beforeScreenshot,
582
+ afterScreenshot,
583
+ assertion: textPrompt,
584
+ businessContext: opt?.businessContext,
585
+ enableSystemCheck: opt?.enableSystemCheck ?? true,
586
+ customSystemCheckRules: opt?.customSystemCheckRules,
587
+ modelConfig
588
+ });
589
+ return {
590
+ pass: assertResult.pass,
591
+ thought: assertResult.thought,
592
+ reason: assertResult.reason,
593
+ systemCheckResults: assertResult.systemCheckResults,
594
+ diffDetails: void 0
595
+ };
596
+ };
597
+ const runWithRetry = async ()=>{
598
+ const retryOptions = opt?.retryOptions;
599
+ let lastError;
600
+ let attempts = 0;
601
+ const maxAttempts = retryOptions?.maxRetries ? retryOptions.maxRetries + 1 : 1;
602
+ while(attempts < maxAttempts)try {
603
+ attempts++;
604
+ const result = await executeAssertion();
605
+ return {
606
+ pass: result.pass,
607
+ thought: result.thought,
608
+ reason: result.reason,
609
+ systemCheckResults: result.systemCheckResults,
610
+ diffDetails: result.diffDetails
611
+ };
612
+ } catch (error) {
613
+ lastError = error instanceof Error ? error : new Error(String(error));
614
+ if (attempts < maxAttempts && retryOptions?.retryInterval) await new Promise((resolve)=>setTimeout(resolve, retryOptions.retryInterval));
615
+ }
616
+ throw lastError || new Error('Assertion failed after retries');
617
+ };
475
618
  try {
476
- const { output, thought } = await this.taskExecutor.createTypeQueryExecution('Assert', textPrompt, modelConfig, serviceOpt, multimodalPrompt);
477
- const pass = Boolean(output);
478
- const message = pass ? void 0 : `Assertion failed: ${msg || assertionText}\nReason: ${thought || '(no_reason)'}`;
619
+ const result = await runWithRetry();
620
+ const { pass, thought, reason, systemCheckResults, diffDetails } = result;
621
+ if (opt?.saveSnapshot) {
622
+ const snapshot = {
623
+ beforeScreenshot,
624
+ afterScreenshot,
625
+ timestamp: Date.now(),
626
+ assertion: assertionText
627
+ };
628
+ if (opt?.snapshotPath) {
629
+ const fs = await import("node:fs");
630
+ const path = await import("node:path");
631
+ const snapshotDir = path.dirname(opt.snapshotPath);
632
+ if (!fs.existsSync(snapshotDir)) fs.mkdirSync(snapshotDir, {
633
+ recursive: true
634
+ });
635
+ fs.writeFileSync(opt.snapshotPath, JSON.stringify(snapshot, null, 2));
636
+ }
637
+ }
638
+ const message = pass ? void 0 : `Assertion failed: ${msg || assertionText}\nReason: ${reason || thought || '(no_reason)'}`;
479
639
  if (opt?.keepRawResponse) return {
480
640
  pass,
481
641
  thought,
642
+ reason,
643
+ systemCheckResults,
644
+ diffDetails,
482
645
  message
483
646
  };
484
647
  if (!pass) throw new Error(message);
@@ -493,6 +656,7 @@ class Agent {
493
656
  if (opt?.keepRawResponse) return {
494
657
  pass: false,
495
658
  thought,
659
+ reason,
496
660
  message
497
661
  };
498
662
  throw new Error(message, {
@@ -502,6 +666,16 @@ class Agent {
502
666
  throw error;
503
667
  }
504
668
  }
669
+ clearActionScreenshotHistory() {
670
+ this.actionScreenshotHistory = [];
671
+ this.aiActStartScreenshot = null;
672
+ }
673
+ getLastActionScreenshotContext() {
674
+ return this.actionScreenshotHistory[this.actionScreenshotHistory.length - 1];
675
+ }
676
+ getFlowStartScreenshot() {
677
+ return this.aiActStartScreenshot;
678
+ }
505
679
  async aiWaitFor(assertion, opt) {
506
680
  const modelConfig = this.modelConfigManager.getModelConfig('insight');
507
681
  await this.taskExecutor.waitFor(assertion, {
@@ -684,6 +858,9 @@ class Agent {
684
858
  _define_property(this, "executionDumpIndexByRunner", new WeakMap());
685
859
  _define_property(this, "fullActionSpace", void 0);
686
860
  _define_property(this, "reportGenerator", void 0);
861
+ _define_property(this, "actionScreenshotHistory", []);
862
+ _define_property(this, "maxScreenshotHistoryLength", 5);
863
+ _define_property(this, "aiActStartScreenshot", null);
687
864
  this.interface = interfaceInstance;
688
865
  const envReplanningCycleLimit = env_namespaceObject.globalConfigManager.getEnvConfigValueAsNumber(env_namespaceObject.MIDSCENE_REPLANNING_CYCLE_LIMIT);
689
866
  this.opts = Object.assign({