npm - @donggui/core - Versions diffs - 1.5.14 → 1.6.0 - Mend

@donggui/core 1.5.14 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dist/es/agent/agent.mjs +187 -10
package/dist/es/agent/agent.mjs.map +1 -1
package/dist/es/agent/utils.mjs +1 -1
package/dist/es/ai-model/assert.mjs +415 -0
package/dist/es/ai-model/assert.mjs.map +1 -0
package/dist/es/ai-model/index.mjs +2 -1
package/dist/es/service/index.mjs +4 -6
package/dist/es/service/index.mjs.map +1 -1
package/dist/es/types.mjs.map +1 -1
package/dist/es/utils.mjs +2 -2
package/dist/lib/agent/agent.js +185 -8
package/dist/lib/agent/agent.js.map +1 -1
package/dist/lib/agent/utils.js +1 -1
package/dist/lib/ai-model/assert.js +455 -0
package/dist/lib/ai-model/assert.js.map +1 -0
package/dist/lib/ai-model/index.js +18 -11
package/dist/lib/service/index.js +4 -6
package/dist/lib/service/index.js.map +1 -1
package/dist/lib/types.js +3 -3
package/dist/lib/types.js.map +1 -1
package/dist/lib/utils.js +2 -2
package/dist/types/agent/agent.d.ts +38 -2
package/dist/types/ai-model/assert.d.ts +66 -0
package/dist/types/ai-model/index.d.ts +2 -0
package/dist/types/types.d.ts +156 -0
package/package.json +25 -44

package/dist/lib/agent/agent.js CHANGED Viewed

@@ -36,6 +36,7 @@ __webpack_require__.d(__webpack_exports__, {
     Agent: ()=>Agent,
     createAgent: ()=>createAgent
 });
+const assert_js_namespaceObject = require("../ai-model/assert.js");
 const external_screenshot_item_js_namespaceObject = require("../screenshot-item.js");
 const index_js_namespaceObject = require("../service/index.js");
 var index_js_default = /*#__PURE__*/ __webpack_require__.n(index_js_namespaceObject);
@@ -202,6 +203,7 @@ class Agent {
     }
     async callActionInActionSpace(type, opt) {
         debug('callActionInActionSpace', type, ',', opt);
+        const beforeScreenshot = await this.interface.screenshotBase64();
         const actionPlan = {
             type: type,
             param: opt || {},
@@ -215,6 +217,15 @@ class Agent {
         const defaultIntentModelConfig = this.modelConfigManager.getModelConfig('default');
         const modelConfigForPlanning = this.modelConfigManager.getModelConfig('planning');
         const { output } = await this.taskExecutor.runPlans(title, plans, modelConfigForPlanning, defaultIntentModelConfig);
+        const afterScreenshot = await this.interface.screenshotBase64();
+        this.actionScreenshotHistory.push({
+            beforeScreenshot,
+            afterScreenshot,
+            actionType: type,
+            actionParam: opt,
+            timestamp: Date.now()
+        });
+        if (this.actionScreenshotHistory.length > this.maxScreenshotHistoryLength) this.actionScreenshotHistory.shift();
         return output;
     }
     async aiTap(locatePrompt, opt) {
@@ -330,6 +341,8 @@ class Agent {
         const abortSignal = opt?.abortSignal;
         if (abortSignal?.aborted) throw new Error(`aiAct aborted: ${abortSignal.reason || 'signal already aborted'}`);
         const runAiAct = async ()=>{
+            this.aiActStartScreenshot = await this.interface.screenshotBase64();
+            this.actionScreenshotHistory = [];
             const modelConfigForPlanning = this.modelConfigManager.getModelConfig('planning');
             const defaultIntentModelConfig = this.modelConfigManager.getModelConfig('default');
             const deepThink = opt?.deepThink === 'unset' ? void 0 : opt?.deepThink;
@@ -466,19 +479,169 @@ class Agent {
     }
     async aiAssert(assertion, msg, opt) {
         const modelConfig = this.modelConfigManager.getModelConfig('insight');
-        const serviceOpt = {
-            domIncluded: opt?.domIncluded ?? defaultServiceExtractOption.domIncluded,
-            screenshotIncluded: opt?.screenshotIncluded ?? defaultServiceExtractOption.screenshotIncluded
-        };
-        const { textPrompt, multimodalPrompt } = (0, external_utils_js_namespaceObject_1.parsePrompt)(assertion);
+        const { textPrompt } = (0, external_utils_js_namespaceObject_1.parsePrompt)(assertion);
         const assertionText = 'string' == typeof assertion ? assertion : assertion.prompt;
+        let beforeScreenshot;
+        let afterScreenshot;
+        const mode = opt?.screenshotMode || 'auto';
+        switch(mode){
+            case 'manual':
+                beforeScreenshot = opt?.beforeScreenshot;
+                afterScreenshot = opt?.afterScreenshot;
+                break;
+            case 'flow':
+                beforeScreenshot = this.aiActStartScreenshot || this.actionScreenshotHistory[0]?.beforeScreenshot;
+                afterScreenshot = await this.interface.screenshotBase64();
+                break;
+            case 'currentOnly':
+                beforeScreenshot = void 0;
+                afterScreenshot = await this.interface.screenshotBase64();
+                break;
+            case 'lastAction':
+                if (this.actionScreenshotHistory.length > 0) {
+                    const lastContext = this.actionScreenshotHistory[this.actionScreenshotHistory.length - 1];
+                    beforeScreenshot = lastContext.beforeScreenshot;
+                    afterScreenshot = await this.interface.screenshotBase64();
+                } else {
+                    beforeScreenshot = void 0;
+                    afterScreenshot = await this.interface.screenshotBase64();
+                }
+                break;
+            case 'diff':
+                afterScreenshot = await this.interface.screenshotBase64();
+                break;
+            case 'video':
+                beforeScreenshot = void 0;
+                afterScreenshot = await this.interface.screenshotBase64();
+                break;
+            default:
+                if (opt?.beforeScreenshot && opt?.afterScreenshot) {
+                    beforeScreenshot = opt.beforeScreenshot;
+                    afterScreenshot = opt.afterScreenshot;
+                } else if (this.actionScreenshotHistory.length > 0) {
+                    const lastContext = this.actionScreenshotHistory[this.actionScreenshotHistory.length - 1];
+                    beforeScreenshot = lastContext.beforeScreenshot;
+                    afterScreenshot = await this.interface.screenshotBase64();
+                } else {
+                    beforeScreenshot = void 0;
+                    afterScreenshot = await this.interface.screenshotBase64();
+                }
+                break;
+        }
+        const executeAssertion = async ()=>{
+            if ('diff' === mode && opt?.referenceImages && opt.referenceImages.length > 0) {
+                const { AiAssertDiff } = await import("../ai-model/assert.js");
+                const diffResult = await AiAssertDiff({
+                    currentScreenshot: afterScreenshot,
+                    referenceImages: opt.referenceImages,
+                    assertion: textPrompt,
+                    businessContext: opt.businessContext,
+                    diffThreshold: opt.diffThreshold,
+                    ignoreRegions: opt.ignoreRegions,
+                    ignoreDynamicContent: opt.ignoreDynamicContent,
+                    strictMode: opt.strictMode,
+                    modelConfig
+                });
+                return {
+                    pass: diffResult.pass,
+                    thought: diffResult.thought,
+                    reason: diffResult.reason,
+                    systemCheckResults: void 0,
+                    diffDetails: diffResult.diffDetails,
+                    videoDetails: void 0
+                };
+            }
+            if ('video' === mode && opt?.currentVideo) {
+                const { AiAssertVideo } = await import("../ai-model/assert.js");
+                const MAX_DURATION = 5;
+                const DEFAULT_FPS = 30;
+                const MAX_FRAMES = MAX_DURATION * DEFAULT_FPS;
+                let videoFrames = [];
+                if (opt.currentVideo.frames && opt.currentVideo.frames.length > 0) videoFrames = opt.currentVideo.frames.slice(0, MAX_FRAMES);
+                else if ('url' === opt.currentVideo.format && opt.currentVideo.url) throw new Error('Video URL format is not yet supported. Please provide video frames directly.');
+                else throw new Error('currentVideo.frames is required for video assertion mode');
+                if (0 === videoFrames.length) throw new Error('No video frames provided for video assertion');
+                const videoResult = await AiAssertVideo({
+                    currentVideoFrames: videoFrames,
+                    assertion: textPrompt,
+                    businessContext: opt.businessContext,
+                    videoOptions: opt.videoOptions,
+                    modelConfig
+                });
+                return {
+                    pass: videoResult.pass,
+                    thought: videoResult.thought,
+                    reason: videoResult.reason,
+                    systemCheckResults: void 0,
+                    diffDetails: void 0,
+                    videoDetails: videoResult.videoDetails
+                };
+            }
+            const assertResult = await (0, assert_js_namespaceObject.AiAssertElement)({
+                beforeScreenshot,
+                afterScreenshot,
+                assertion: textPrompt,
+                businessContext: opt?.businessContext,
+                enableSystemCheck: opt?.enableSystemCheck ?? true,
+                customSystemCheckRules: opt?.customSystemCheckRules,
+                modelConfig
+            });
+            return {
+                pass: assertResult.pass,
+                thought: assertResult.thought,
+                reason: assertResult.reason,
+                systemCheckResults: assertResult.systemCheckResults,
+                diffDetails: void 0
+            };
+        };
+        const runWithRetry = async ()=>{
+            const retryOptions = opt?.retryOptions;
+            let lastError;
+            let attempts = 0;
+            const maxAttempts = retryOptions?.maxRetries ? retryOptions.maxRetries + 1 : 1;
+            while(attempts < maxAttempts)try {
+                attempts++;
+                const result = await executeAssertion();
+                return {
+                    pass: result.pass,
+                    thought: result.thought,
+                    reason: result.reason,
+                    systemCheckResults: result.systemCheckResults,
+                    diffDetails: result.diffDetails
+                };
+            } catch (error) {
+                lastError = error instanceof Error ? error : new Error(String(error));
+                if (attempts < maxAttempts && retryOptions?.retryInterval) await new Promise((resolve)=>setTimeout(resolve, retryOptions.retryInterval));
+            }
+            throw lastError || new Error('Assertion failed after retries');
+        };
         try {
-            const { output, thought } = await this.taskExecutor.createTypeQueryExecution('Assert', textPrompt, modelConfig, serviceOpt, multimodalPrompt);
-            const pass = Boolean(output);
-            const message = pass ? void 0 : `Assertion failed: ${msg || assertionText}\nReason: ${thought || '(no_reason)'}`;
+            const result = await runWithRetry();
+            const { pass, thought, reason, systemCheckResults, diffDetails } = result;
+            if (opt?.saveSnapshot) {
+                const snapshot = {
+                    beforeScreenshot,
+                    afterScreenshot,
+                    timestamp: Date.now(),
+                    assertion: assertionText
+                };
+                if (opt?.snapshotPath) {
+                    const fs = await import("node:fs");
+                    const path = await import("node:path");
+                    const snapshotDir = path.dirname(opt.snapshotPath);
+                    if (!fs.existsSync(snapshotDir)) fs.mkdirSync(snapshotDir, {
+                        recursive: true
+                    });
+                    fs.writeFileSync(opt.snapshotPath, JSON.stringify(snapshot, null, 2));
+                }
+            }
+            const message = pass ? void 0 : `Assertion failed: ${msg || assertionText}\nReason: ${reason || thought || '(no_reason)'}`;
             if (opt?.keepRawResponse) return {
                 pass,
                 thought,
+                reason,
+                systemCheckResults,
+                diffDetails,
                 message
             };
             if (!pass) throw new Error(message);
@@ -493,6 +656,7 @@ class Agent {
                 if (opt?.keepRawResponse) return {
                     pass: false,
                     thought,
+                    reason,
                     message
                 };
                 throw new Error(message, {
@@ -502,6 +666,16 @@ class Agent {
             throw error;
         }
     }
+    clearActionScreenshotHistory() {
+        this.actionScreenshotHistory = [];
+        this.aiActStartScreenshot = null;
+    }
+    getLastActionScreenshotContext() {
+        return this.actionScreenshotHistory[this.actionScreenshotHistory.length - 1];
+    }
+    getFlowStartScreenshot() {
+        return this.aiActStartScreenshot;
+    }
     async aiWaitFor(assertion, opt) {
         const modelConfig = this.modelConfigManager.getModelConfig('insight');
         await this.taskExecutor.waitFor(assertion, {
@@ -684,6 +858,9 @@ class Agent {
         _define_property(this, "executionDumpIndexByRunner", new WeakMap());
         _define_property(this, "fullActionSpace", void 0);
         _define_property(this, "reportGenerator", void 0);
+        _define_property(this, "actionScreenshotHistory", []);
+        _define_property(this, "maxScreenshotHistoryLength", 5);
+        _define_property(this, "aiActStartScreenshot", null);
         this.interface = interfaceInstance;
         const envReplanningCycleLimit = env_namespaceObject.globalConfigManager.getEnvConfigValueAsNumber(env_namespaceObject.MIDSCENE_REPLANNING_CYCLE_LIMIT);
         this.opts = Object.assign({