npm - @midscene/core - Versions diffs - 0.25.4-beta-20250811115904.0 → 0.25.4-beta-20250812025613.0 - Mend

@midscene/core 0.25.4-beta-20250811115904.0 → 0.25.4-beta-20250812025613.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/es/ai-model.js +1 -1
package/dist/es/{chunk-NY6RQSGJ.js → chunk-UC5NNLPY.js} +17 -17
package/dist/es/{chunk-NY6RQSGJ.js.map → chunk-UC5NNLPY.js.map} +1 -1
package/dist/es/{chunk-SR67R2OE.js → chunk-YNPMUA35.js} +3 -3
package/dist/es/index.js +2 -2
package/dist/es/utils.js +1 -1
package/dist/lib/ai-model.js +2 -2
package/dist/lib/{chunk-NY6RQSGJ.js → chunk-UC5NNLPY.js} +17 -17
package/dist/lib/{chunk-NY6RQSGJ.js.map → chunk-UC5NNLPY.js.map} +1 -1
package/dist/lib/{chunk-SR67R2OE.js → chunk-YNPMUA35.js} +3 -3
package/dist/lib/index.js +12 -12
package/dist/lib/utils.js +2 -2
package/package.json +3 -3
/package/dist/es/{chunk-SR67R2OE.js.map → chunk-YNPMUA35.js.map} +0 -0
/package/dist/lib/{chunk-SR67R2OE.js.map → chunk-YNPMUA35.js.map} +0 -0

package/dist/es/index.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import {
   getVersion
-} from "./chunk-SR67R2OE.js";
+} from "./chunk-YNPMUA35.js";
 import {
   AiAssert,
   AiExtractElementInfo,
@@ -11,7 +11,7 @@ import {
   describeUserPage,
   expandSearchArea,
   plan
-} from "./chunk-NY6RQSGJ.js";
+} from "./chunk-UC5NNLPY.js";
 // src/ai-model/action-executor.ts
 import {

package/dist/es/utils.js CHANGED Viewed

@@ -12,7 +12,7 @@ import {
   uploadTestInfoToServer,
   writeDumpReport,
   writeLogFile
-} from "./chunk-SR67R2OE.js";
+} from "./chunk-YNPMUA35.js";
 export {
   getTmpDir,
   getTmpFile,

package/dist/lib/ai-model.js CHANGED Viewed

@@ -19,7 +19,7 @@
-var _chunkNY6RQSGJjs = require('./chunk-NY6RQSGJ.js');
+var _chunkUC5NNLPYjs = require('./chunk-UC5NNLPY.js');
@@ -41,4 +41,4 @@ var _chunkNY6RQSGJjs = require('./chunk-NY6RQSGJ.js');
-exports.AIActionType = _chunkNY6RQSGJjs.AIActionType; exports.AiAssert = _chunkNY6RQSGJjs.AiAssert; exports.AiExtractElementInfo = _chunkNY6RQSGJjs.AiExtractElementInfo; exports.AiLocateElement = _chunkNY6RQSGJjs.AiLocateElement; exports.AiLocateSection = _chunkNY6RQSGJjs.AiLocateSection; exports.adaptBboxToRect = _chunkNY6RQSGJjs.adaptBboxToRect; exports.callAi = _chunkNY6RQSGJjs.call; exports.callAiFn = _chunkNY6RQSGJjs.callAiFn; exports.callAiFnWithStringResponse = _chunkNY6RQSGJjs.callAiFnWithStringResponse; exports.callToGetJSONObject = _chunkNY6RQSGJjs.callToGetJSONObject; exports.describeUserPage = _chunkNY6RQSGJjs.describeUserPage; exports.elementByPositionWithElementInfo = _chunkNY6RQSGJjs.elementByPositionWithElementInfo; exports.generatePlaywrightTest = _chunkNY6RQSGJjs.generatePlaywrightTest; exports.generatePlaywrightTestStream = _chunkNY6RQSGJjs.generatePlaywrightTestStream; exports.generateYamlTest = _chunkNY6RQSGJjs.generateYamlTest; exports.generateYamlTestStream = _chunkNY6RQSGJjs.generateYamlTestStream; exports.plan = _chunkNY6RQSGJjs.plan; exports.resizeImageForUiTars = _chunkNY6RQSGJjs.resizeImageForUiTars; exports.systemPromptToLocateElement = _chunkNY6RQSGJjs.systemPromptToLocateElement; exports.vlmPlanning = _chunkNY6RQSGJjs.vlmPlanning;
+exports.AIActionType = _chunkUC5NNLPYjs.AIActionType; exports.AiAssert = _chunkUC5NNLPYjs.AiAssert; exports.AiExtractElementInfo = _chunkUC5NNLPYjs.AiExtractElementInfo; exports.AiLocateElement = _chunkUC5NNLPYjs.AiLocateElement; exports.AiLocateSection = _chunkUC5NNLPYjs.AiLocateSection; exports.adaptBboxToRect = _chunkUC5NNLPYjs.adaptBboxToRect; exports.callAi = _chunkUC5NNLPYjs.call; exports.callAiFn = _chunkUC5NNLPYjs.callAiFn; exports.callAiFnWithStringResponse = _chunkUC5NNLPYjs.callAiFnWithStringResponse; exports.callToGetJSONObject = _chunkUC5NNLPYjs.callToGetJSONObject; exports.describeUserPage = _chunkUC5NNLPYjs.describeUserPage; exports.elementByPositionWithElementInfo = _chunkUC5NNLPYjs.elementByPositionWithElementInfo; exports.generatePlaywrightTest = _chunkUC5NNLPYjs.generatePlaywrightTest; exports.generatePlaywrightTestStream = _chunkUC5NNLPYjs.generatePlaywrightTestStream; exports.generateYamlTest = _chunkUC5NNLPYjs.generateYamlTest; exports.generateYamlTestStream = _chunkUC5NNLPYjs.generateYamlTestStream; exports.plan = _chunkUC5NNLPYjs.plan; exports.resizeImageForUiTars = _chunkUC5NNLPYjs.resizeImageForUiTars; exports.systemPromptToLocateElement = _chunkUC5NNLPYjs.systemPromptToLocateElement; exports.vlmPlanning = _chunkUC5NNLPYjs.vlmPlanning;

package/dist/lib/{chunk-NY6RQSGJ.js → chunk-UC5NNLPY.js} RENAMED Viewed

@@ -760,16 +760,15 @@ You are a versatile professional in software UI automation. Your outstanding con
 ## Workflow
 1. Receive the screenshot, element description of screenshot(if any), user's instruction and previous logs.
-2. Decompose the user's task into a sequence of actions, and place it in the \`actions\` field. There are different types of actions (${actionNameList}). The "About the action" section below will give you more details.
-3. Precisely locate the target element if it's already shown in the screenshot, put the location info in the \`locate\` field of the action.
-4. If some target elements is not shown in the screenshot, consider the user's instruction is not feasible on this page. Follow the next steps.
-5. Consider whether the user's instruction will be accomplished after all the actions
- - If yes, set \`taskWillBeAccomplished\` to true
- - If no, don't plan more actions by closing the array. Get ready to reevaluate the task. Some talent people like you will handle this. Give him a clear description of what have been done and what to do next. Put your new plan in the \`furtherPlan\` field. The "How to compose the \`taskWillBeAccomplished\` and \`furtherPlan\` fields" section will give you more details.
+2. Decompose the user's task into a sequence of feasible actions, and place it in the \`actions\` field. There are different types of actions (${actionNameList}). The "About the action" section below will give you more details.
+3. Consider whether the user's instruction will be accomplished after the actions you composed.
+- If the instruction is accomplished, set \`more_actions_needed_by_instruction\` to false.
+- If more actions are needed, set \`more_actions_needed_by_instruction\` to true. Get ready to hand over to the next talent people like you. Carefully log what have been done in the \`log\` field, he or she will continue the task according to your logs.
+4. If the task is not feasible on this page, set \`error\` field to the reason.
 ## Constraints
-- All the actions you composed MUST be based on the page context information you get.
+- All the actions you composed MUST be feasible, which means all the action fields can be filled with the page context information you get. If not, don't plan this action.
 - Trust the "What have been done" field about the task (if any), don't repeat actions in it.
 - Respond only with valid JSON. Do not write an introduction or summary or markdown prefix like \`\`\`json\`\`\`.
 - If the screenshot and the instruction are totally irrelevant, set reason in the \`error\` field.
@@ -807,15 +806,20 @@ The JSON format is as follows:
 ### Example: Decompose a task
-When the instruction is 'Click the language switch button, wait 1s, click "English"', and not log is provided
+When you received the following information:
+* Instruction: 'Click the language switch button, wait 1s, click "English"'
+* Logs: null
+* Page Context (screenshot and description) shows: There is a language switch button, and the "English" option is not shown in the screenshot now.
 By viewing the page screenshot and description, you should consider this and output the JSON:
-* The main steps should be: tap the switch button, sleep, and tap the 'English' option
-* The language switch button is shown in the screenshot, but it's not marked with a rectangle. So we have to use the page description to find the element. By carefully checking the context information (coordinates, attributes, content, etc.), you can find the element.
+* The user intent is: tap the switch button, sleep, and tap the 'English' option
+* The language switch button is shown in the screenshot, and can be located by the page description or the id marked with a rectangle. So we can plan a Tap action to do this.
+* Plan a Sleep action to wait for 1 second to ensure the language options are displayed.
 * The "English" option button is not shown in the screenshot now, it means it may only show after the previous actions are finished. So don't plan any action to do this.
 * Log what these action do: Click the language switch button to open the language options. Wait for 1 second.
-* The task cannot be accomplished (because we cannot see the "English" option now), so the \`more_actions_needed_by_instruction\` field is true.
+* The task cannot be accomplished (because the last tapping action is not finished yet), so the \`more_actions_needed_by_instruction\` field is true. The \`error\` field is null.
 {
   "actions":[
@@ -845,7 +849,7 @@ Wrong output:
       "thought": "Click the language switch button to open the language options.",
       "param": null,
       "locate": {
-        { "id": "c81c4e9a33" }, // WRONG: prompt is missing
+        { "id": "c81c4e9a33" }, // WRONG: prompt is missing, this is not a valid LocateParam
       }
     },
     {
@@ -858,10 +862,6 @@ Wrong output:
   "more_actions_needed_by_instruction": false, // WRONG: should be true
   "log": "Click the language switch button to open the language options",
 }
-Reason:
-* The \`prompt\` is missing in the first 'Locate' action
-* Since the option button is not shown in the screenshot, there are still more actions to be done, so the \`more_actions_needed_by_instruction\` field should be true
 `;
 async function systemPromptToTaskPlanning({
   actionSpace,
@@ -2911,4 +2911,4 @@ async function resizeImageForUiTars(imageBase64, size) {
 exports.systemPromptToLocateElement = systemPromptToLocateElement; exports.call = call2; exports.callToGetJSONObject = callToGetJSONObject; exports.callAiFnWithStringResponse = callAiFnWithStringResponse; exports.AIActionType = AIActionType; exports.callAiFn = callAiFn; exports.adaptBboxToRect = adaptBboxToRect; exports.expandSearchArea = expandSearchArea; exports.elementByPositionWithElementInfo = elementByPositionWithElementInfo; exports.describeUserPage = describeUserPage; exports.generateYamlTest = generateYamlTest; exports.generateYamlTestStream = generateYamlTestStream; exports.generatePlaywrightTest = generatePlaywrightTest; exports.generatePlaywrightTestStream = generatePlaywrightTestStream; exports.AiLocateElement = AiLocateElement; exports.AiLocateSection = AiLocateSection; exports.AiExtractElementInfo = AiExtractElementInfo; exports.AiAssert = AiAssert; exports.plan = plan; exports.vlmPlanning = vlmPlanning; exports.resizeImageForUiTars = resizeImageForUiTars;
-//# sourceMappingURL=chunk-NY6RQSGJ.js.map
+//# sourceMappingURL=chunk-UC5NNLPY.js.map