@midscene/core 1.8.10 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +40 -50
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/task-builder.mjs +39 -19
- package/dist/es/agent/task-builder.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +24 -22
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +11 -14
- package/dist/es/agent/utils.mjs.map +1 -1
- package/dist/es/ai-model/connectivity.mjs +7 -3
- package/dist/es/ai-model/connectivity.mjs.map +1 -1
- package/dist/es/ai-model/errors.mjs +9 -0
- package/dist/es/ai-model/errors.mjs.map +1 -0
- package/dist/es/ai-model/index.mjs +3 -4
- package/dist/es/ai-model/inspect.mjs +132 -144
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +46 -28
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/{auto-glm → models/auto-glm}/actions.mjs +22 -44
- package/dist/es/ai-model/models/auto-glm/actions.mjs.map +1 -0
- package/dist/es/ai-model/models/auto-glm/adapter.mjs +45 -0
- package/dist/es/ai-model/models/auto-glm/adapter.mjs.map +1 -0
- package/dist/es/ai-model/models/auto-glm/locate.mjs +112 -0
- package/dist/es/ai-model/models/auto-glm/locate.mjs.map +1 -0
- package/dist/es/ai-model/models/auto-glm/parser.mjs.map +1 -0
- package/dist/es/ai-model/{auto-glm → models/auto-glm}/planning.mjs +6 -7
- package/dist/es/ai-model/models/auto-glm/planning.mjs.map +1 -0
- package/dist/es/ai-model/{auto-glm → models/auto-glm}/prompt.mjs +3 -11
- package/dist/es/ai-model/models/auto-glm/prompt.mjs.map +1 -0
- package/dist/es/ai-model/models/default.mjs +12 -0
- package/dist/es/ai-model/models/default.mjs.map +1 -0
- package/dist/es/ai-model/models/doubao.mjs +138 -0
- package/dist/es/ai-model/models/doubao.mjs.map +1 -0
- package/dist/es/ai-model/models/gemini.mjs +34 -0
- package/dist/es/ai-model/models/gemini.mjs.map +1 -0
- package/dist/es/ai-model/models/glm.mjs +37 -0
- package/dist/es/ai-model/models/glm.mjs.map +1 -0
- package/dist/es/ai-model/models/gpt.mjs +31 -0
- package/dist/es/ai-model/models/gpt.mjs.map +1 -0
- package/dist/es/ai-model/models/index.mjs +2 -0
- package/dist/es/ai-model/models/qwen.mjs +113 -0
- package/dist/es/ai-model/models/qwen.mjs.map +1 -0
- package/dist/es/ai-model/models/registry.mjs +45 -0
- package/dist/es/ai-model/models/registry.mjs.map +1 -0
- package/dist/es/ai-model/models/resolved.mjs +104 -0
- package/dist/es/ai-model/models/resolved.mjs.map +1 -0
- package/dist/es/ai-model/models/types.mjs +0 -0
- package/dist/es/ai-model/models/ui-tars/adapter.mjs +142 -0
- package/dist/es/ai-model/models/ui-tars/adapter.mjs.map +1 -0
- package/dist/es/ai-model/{ui-tars-planning.mjs → models/ui-tars/planning.mjs} +44 -62
- package/dist/es/ai-model/models/ui-tars/planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/extraction.mjs +3 -3
- package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-locator.mjs +11 -11
- package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +25 -60
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -10
- package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/locate-grounding-rules.mjs +9 -0
- package/dist/es/ai-model/prompt/locate-grounding-rules.mjs.map +1 -0
- package/dist/es/ai-model/prompt/locate-param-example.mjs +15 -0
- package/dist/es/ai-model/prompt/locate-param-example.mjs.map +1 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs +5 -5
- package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/yaml-generator.mjs +5 -5
- package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
- package/dist/es/ai-model/prompts/locate-result-coordinates.mjs +107 -0
- package/dist/es/ai-model/prompts/locate-result-coordinates.mjs.map +1 -0
- package/dist/es/ai-model/service-caller/index.mjs +59 -190
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/json.mjs +60 -0
- package/dist/es/ai-model/service-caller/json.mjs.map +1 -0
- package/dist/es/ai-model/shared/model-locate-result/bbox.mjs +68 -0
- package/dist/es/ai-model/shared/model-locate-result/bbox.mjs.map +1 -0
- package/dist/es/ai-model/shared/model-locate-result/factory.mjs +96 -0
- package/dist/es/ai-model/shared/model-locate-result/factory.mjs.map +1 -0
- package/dist/es/ai-model/shared/model-locate-result/index.mjs +3 -0
- package/dist/es/ai-model/shared/model-locate-result/parse.mjs +41 -0
- package/dist/es/ai-model/shared/model-locate-result/parse.mjs.map +1 -0
- package/dist/es/ai-model/shared/model-locate-result/pixel-bbox-mapper.mjs +64 -0
- package/dist/es/ai-model/shared/model-locate-result/pixel-bbox-mapper.mjs.map +1 -0
- package/dist/es/ai-model/shared/model-locate-result/types.mjs +0 -0
- package/dist/es/ai-model/types.mjs +0 -0
- package/dist/es/ai-model/workflows/image-preprocess.mjs +27 -0
- package/dist/es/ai-model/workflows/image-preprocess.mjs.map +1 -0
- package/dist/es/ai-model/workflows/inspect/index.mjs +2 -0
- package/dist/es/ai-model/workflows/inspect/locate-result-rect.mjs +23 -0
- package/dist/es/ai-model/workflows/inspect/locate-result-rect.mjs.map +1 -0
- package/dist/es/ai-model/workflows/inspect/search-area-mapping.mjs +18 -0
- package/dist/es/ai-model/workflows/inspect/search-area-mapping.mjs.map +1 -0
- package/dist/es/ai-model/workflows/inspect/types.mjs +0 -0
- package/dist/es/ai-model/workflows/planning/index.mjs +5 -0
- package/dist/es/ai-model/workflows/planning/index.mjs.map +1 -0
- package/dist/es/ai-model/workflows/planning/types.mjs +0 -0
- package/dist/es/common.mjs +2 -174
- package/dist/es/common.mjs.map +1 -1
- package/dist/es/device/index.mjs.map +1 -1
- package/dist/es/service/index.mjs +96 -69
- package/dist/es/service/index.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/es/yaml/player.mjs +4 -3
- package/dist/es/yaml/player.mjs.map +1 -1
- package/dist/lib/agent/agent.js +43 -53
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/task-builder.js +38 -18
- package/dist/lib/agent/task-builder.js.map +1 -1
- package/dist/lib/agent/tasks.js +23 -21
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/utils.js +17 -17
- package/dist/lib/agent/utils.js.map +1 -1
- package/dist/lib/ai-model/connectivity.js +7 -3
- package/dist/lib/ai-model/connectivity.js.map +1 -1
- package/dist/lib/ai-model/errors.js +46 -0
- package/dist/lib/ai-model/errors.js.map +1 -0
- package/dist/lib/ai-model/index.js +7 -14
- package/dist/lib/ai-model/inspect.js +141 -144
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +44 -26
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/{auto-glm → models/auto-glm}/actions.js +22 -44
- package/dist/lib/ai-model/models/auto-glm/actions.js.map +1 -0
- package/dist/lib/ai-model/models/auto-glm/adapter.js +79 -0
- package/dist/lib/ai-model/models/auto-glm/adapter.js.map +1 -0
- package/dist/lib/ai-model/models/auto-glm/locate.js +146 -0
- package/dist/lib/ai-model/models/auto-glm/locate.js.map +1 -0
- package/dist/lib/ai-model/models/auto-glm/parser.js.map +1 -0
- package/dist/lib/ai-model/{auto-glm → models/auto-glm}/planning.js +8 -9
- package/dist/lib/ai-model/models/auto-glm/planning.js.map +1 -0
- package/dist/lib/ai-model/{auto-glm → models/auto-glm}/prompt.js +14 -16
- package/dist/lib/ai-model/models/auto-glm/prompt.js.map +1 -0
- package/dist/lib/ai-model/{auto-glm/util.js → models/default.js} +13 -13
- package/dist/lib/ai-model/models/default.js.map +1 -0
- package/dist/lib/ai-model/models/doubao.js +184 -0
- package/dist/lib/ai-model/models/doubao.js.map +1 -0
- package/dist/lib/ai-model/models/gemini.js +68 -0
- package/dist/lib/ai-model/models/gemini.js.map +1 -0
- package/dist/lib/ai-model/models/glm.js +71 -0
- package/dist/lib/ai-model/models/glm.js.map +1 -0
- package/dist/lib/ai-model/models/gpt.js +65 -0
- package/dist/lib/ai-model/models/gpt.js.map +1 -0
- package/dist/lib/ai-model/{service-caller/image-detail.js → models/index.js} +8 -7
- package/dist/lib/ai-model/models/index.js.map +1 -0
- package/dist/lib/ai-model/models/qwen.js +147 -0
- package/dist/lib/ai-model/models/qwen.js.map +1 -0
- package/dist/lib/ai-model/models/registry.js +85 -0
- package/dist/lib/ai-model/models/registry.js.map +1 -0
- package/dist/lib/ai-model/models/resolved.js +138 -0
- package/dist/lib/ai-model/models/resolved.js.map +1 -0
- package/dist/lib/ai-model/models/types.js +20 -0
- package/dist/lib/ai-model/models/types.js.map +1 -0
- package/dist/lib/ai-model/models/ui-tars/adapter.js +176 -0
- package/dist/lib/ai-model/models/ui-tars/adapter.js.map +1 -0
- package/dist/lib/ai-model/{ui-tars-planning.js → models/ui-tars/planning.js} +44 -62
- package/dist/lib/ai-model/models/ui-tars/planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/extraction.js +3 -3
- package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-locator.js +11 -11
- package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +25 -60
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-section-locator.js +15 -10
- package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/locate-grounding-rules.js +43 -0
- package/dist/lib/ai-model/prompt/locate-grounding-rules.js.map +1 -0
- package/dist/lib/ai-model/prompt/locate-param-example.js +52 -0
- package/dist/lib/ai-model/prompt/locate-param-example.js.map +1 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js +5 -5
- package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
- package/dist/lib/ai-model/prompt/yaml-generator.js +5 -5
- package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
- package/dist/lib/ai-model/prompts/locate-result-coordinates.js +150 -0
- package/dist/lib/ai-model/prompts/locate-result-coordinates.js.map +1 -0
- package/dist/lib/ai-model/service-caller/index.js +68 -199
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/service-caller/json.js +100 -0
- package/dist/lib/ai-model/service-caller/json.js.map +1 -0
- package/dist/lib/ai-model/shared/model-locate-result/bbox.js +117 -0
- package/dist/lib/ai-model/shared/model-locate-result/bbox.js.map +1 -0
- package/dist/lib/ai-model/shared/model-locate-result/factory.js +130 -0
- package/dist/lib/ai-model/shared/model-locate-result/factory.js.map +1 -0
- package/dist/lib/ai-model/{prompt/common.js → shared/model-locate-result/index.js} +9 -9
- package/dist/lib/ai-model/shared/model-locate-result/index.js.map +1 -0
- package/dist/lib/ai-model/shared/model-locate-result/parse.js +78 -0
- package/dist/lib/ai-model/shared/model-locate-result/parse.js.map +1 -0
- package/dist/lib/ai-model/shared/model-locate-result/pixel-bbox-mapper.js +98 -0
- package/dist/lib/ai-model/shared/model-locate-result/pixel-bbox-mapper.js.map +1 -0
- package/dist/lib/ai-model/shared/model-locate-result/types.js +20 -0
- package/dist/lib/ai-model/shared/model-locate-result/types.js.map +1 -0
- package/dist/lib/ai-model/types.js +20 -0
- package/dist/lib/ai-model/types.js.map +1 -0
- package/dist/lib/ai-model/workflows/image-preprocess.js +61 -0
- package/dist/lib/ai-model/workflows/image-preprocess.js.map +1 -0
- package/dist/lib/ai-model/workflows/inspect/index.js +50 -0
- package/dist/lib/ai-model/workflows/inspect/index.js.map +1 -0
- package/dist/lib/ai-model/workflows/inspect/locate-result-rect.js +60 -0
- package/dist/lib/ai-model/workflows/inspect/locate-result-rect.js.map +1 -0
- package/dist/lib/ai-model/workflows/inspect/search-area-mapping.js +52 -0
- package/dist/lib/ai-model/workflows/inspect/search-area-mapping.js.map +1 -0
- package/dist/lib/ai-model/workflows/inspect/types.js +20 -0
- package/dist/lib/ai-model/workflows/inspect/types.js.map +1 -0
- package/dist/lib/ai-model/{model-family.js → workflows/planning/index.js} +6 -7
- package/dist/lib/ai-model/workflows/planning/index.js.map +1 -0
- package/dist/lib/ai-model/workflows/planning/types.js +20 -0
- package/dist/lib/ai-model/workflows/planning/types.js.map +1 -0
- package/dist/lib/common.js +4 -206
- package/dist/lib/common.js.map +1 -1
- package/dist/lib/device/index.js.map +1 -1
- package/dist/lib/service/index.js +96 -69
- package/dist/lib/service/index.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/lib/yaml/player.js +4 -3
- package/dist/lib/yaml/player.js.map +1 -1
- package/dist/types/agent/agent.d.ts +14 -6
- package/dist/types/agent/task-builder.d.ts +2 -2
- package/dist/types/agent/tasks.d.ts +6 -6
- package/dist/types/agent/utils.d.ts +8 -5
- package/dist/types/ai-model/errors.d.ts +2 -0
- package/dist/types/ai-model/index.d.ts +2 -4
- package/dist/types/ai-model/inspect.d.ts +13 -33
- package/dist/types/ai-model/llm-planning.d.ts +6 -17
- package/dist/types/ai-model/{auto-glm → models/auto-glm}/actions.d.ts +2 -2
- package/dist/types/ai-model/models/auto-glm/adapter.d.ts +5 -0
- package/dist/types/ai-model/models/auto-glm/locate.d.ts +3 -0
- package/dist/types/ai-model/models/auto-glm/planning.d.ts +3 -0
- package/dist/types/ai-model/models/auto-glm/prompt.d.ts +4 -0
- package/dist/types/ai-model/models/default.d.ts +2 -0
- package/dist/types/ai-model/models/doubao.d.ts +10 -0
- package/dist/types/ai-model/models/gemini.d.ts +18 -0
- package/dist/types/ai-model/models/glm.d.ts +18 -0
- package/dist/types/ai-model/models/gpt.d.ts +18 -0
- package/dist/types/ai-model/models/index.d.ts +2 -0
- package/dist/types/ai-model/models/qwen.d.ts +30 -0
- package/dist/types/ai-model/models/registry.d.ts +81 -0
- package/dist/types/ai-model/models/resolved.d.ts +9 -0
- package/dist/types/ai-model/models/types.d.ts +102 -0
- package/dist/types/ai-model/models/ui-tars/adapter.d.ts +6 -0
- package/dist/types/ai-model/{ui-tars-planning.d.ts → models/ui-tars/planning.d.ts} +7 -11
- package/dist/types/ai-model/prompt/llm-locator.d.ts +2 -2
- package/dist/types/ai-model/prompt/llm-planning.d.ts +5 -5
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +2 -2
- package/dist/types/ai-model/prompt/locate-grounding-rules.d.ts +1 -0
- package/dist/types/ai-model/prompt/locate-param-example.d.ts +3 -0
- package/dist/types/ai-model/prompt/playwright-generator.d.ts +3 -3
- package/dist/types/ai-model/prompt/yaml-generator.d.ts +3 -3
- package/dist/types/ai-model/prompts/locate-result-coordinates.d.ts +6 -0
- package/dist/types/ai-model/service-caller/index.d.ts +19 -27
- package/dist/types/ai-model/service-caller/json.d.ts +9 -0
- package/dist/types/ai-model/shared/model-locate-result/bbox.d.ts +7 -0
- package/dist/types/ai-model/shared/model-locate-result/factory.d.ts +2 -0
- package/dist/types/ai-model/shared/model-locate-result/index.d.ts +3 -0
- package/dist/types/ai-model/shared/model-locate-result/parse.d.ts +5 -0
- package/dist/types/ai-model/shared/model-locate-result/pixel-bbox-mapper.d.ts +7 -0
- package/dist/types/ai-model/shared/model-locate-result/types.d.ts +157 -0
- package/dist/types/ai-model/types.d.ts +2 -0
- package/dist/types/ai-model/workflows/image-preprocess.d.ts +30 -0
- package/dist/types/ai-model/workflows/inspect/index.d.ts +1 -0
- package/dist/types/ai-model/workflows/inspect/locate-result-rect.d.ts +4 -0
- package/dist/types/ai-model/workflows/inspect/search-area-mapping.d.ts +3 -0
- package/dist/types/ai-model/workflows/inspect/types.d.ts +37 -0
- package/dist/types/ai-model/workflows/planning/index.d.ts +2 -0
- package/dist/types/ai-model/workflows/planning/types.d.ts +15 -0
- package/dist/types/common.d.ts +0 -30
- package/dist/types/device/index.d.ts +22 -22
- package/dist/types/service/index.d.ts +5 -4
- package/dist/types/types.d.ts +21 -9
- package/dist/types/yaml.d.ts +8 -2
- package/package.json +2 -2
- package/dist/es/ai-model/auto-glm/actions.mjs.map +0 -1
- package/dist/es/ai-model/auto-glm/index.mjs +0 -6
- package/dist/es/ai-model/auto-glm/parser.mjs.map +0 -1
- package/dist/es/ai-model/auto-glm/planning.mjs.map +0 -1
- package/dist/es/ai-model/auto-glm/prompt.mjs.map +0 -1
- package/dist/es/ai-model/auto-glm/util.mjs +0 -9
- package/dist/es/ai-model/auto-glm/util.mjs.map +0 -1
- package/dist/es/ai-model/model-family.mjs +0 -6
- package/dist/es/ai-model/model-family.mjs.map +0 -1
- package/dist/es/ai-model/prompt/common.mjs +0 -8
- package/dist/es/ai-model/prompt/common.mjs.map +0 -1
- package/dist/es/ai-model/service-caller/image-detail.mjs +0 -6
- package/dist/es/ai-model/service-caller/image-detail.mjs.map +0 -1
- package/dist/es/ai-model/ui-tars-planning.mjs.map +0 -1
- package/dist/lib/ai-model/auto-glm/actions.js.map +0 -1
- package/dist/lib/ai-model/auto-glm/index.js +0 -66
- package/dist/lib/ai-model/auto-glm/index.js.map +0 -1
- package/dist/lib/ai-model/auto-glm/parser.js.map +0 -1
- package/dist/lib/ai-model/auto-glm/planning.js.map +0 -1
- package/dist/lib/ai-model/auto-glm/prompt.js.map +0 -1
- package/dist/lib/ai-model/auto-glm/util.js.map +0 -1
- package/dist/lib/ai-model/model-family.js.map +0 -1
- package/dist/lib/ai-model/prompt/common.js.map +0 -1
- package/dist/lib/ai-model/service-caller/image-detail.js.map +0 -1
- package/dist/lib/ai-model/ui-tars-planning.js.map +0 -1
- package/dist/types/ai-model/auto-glm/index.d.ts +0 -6
- package/dist/types/ai-model/auto-glm/planning.d.ts +0 -12
- package/dist/types/ai-model/auto-glm/prompt.d.ts +0 -27
- package/dist/types/ai-model/auto-glm/util.d.ts +0 -13
- package/dist/types/ai-model/model-family.d.ts +0 -7
- package/dist/types/ai-model/prompt/common.d.ts +0 -2
- package/dist/types/ai-model/service-caller/image-detail.d.ts +0 -2
- /package/dist/es/ai-model/{auto-glm → models/auto-glm}/parser.mjs +0 -0
- /package/dist/lib/ai-model/{auto-glm → models/auto-glm}/parser.js +0 -0
- /package/dist/types/ai-model/{auto-glm → models/auto-glm}/parser.d.ts +0 -0
|
@@ -27,19 +27,20 @@ __webpack_require__.d(__webpack_exports__, {
|
|
|
27
27
|
parseXMLPlanningResponse: ()=>parseXMLPlanningResponse,
|
|
28
28
|
plan: ()=>plan
|
|
29
29
|
});
|
|
30
|
-
const img_namespaceObject = require("@midscene/shared/img");
|
|
31
30
|
const logger_namespaceObject = require("@midscene/shared/logger");
|
|
32
31
|
const utils_namespaceObject = require("@midscene/shared/utils");
|
|
33
32
|
const external_common_js_namespaceObject = require("../common.js");
|
|
33
|
+
const external_errors_js_namespaceObject = require("./errors.js");
|
|
34
34
|
const llm_planning_js_namespaceObject = require("./prompt/llm-planning.js");
|
|
35
35
|
const util_js_namespaceObject = require("./prompt/util.js");
|
|
36
36
|
const index_js_namespaceObject = require("./service-caller/index.js");
|
|
37
|
+
const image_preprocess_js_namespaceObject = require("./workflows/image-preprocess.js");
|
|
37
38
|
const debug = (0, logger_namespaceObject.getDebug)('planning');
|
|
38
39
|
const warnLog = (0, logger_namespaceObject.getDebug)('planning', {
|
|
39
40
|
console: true
|
|
40
41
|
});
|
|
41
42
|
const noPreviousActionsText = 'No previous actions have been executed in this aiAct execution yet. If the instruction asks for actions, choose the first action to execute.';
|
|
42
|
-
function parseXMLPlanningResponse(xmlString,
|
|
43
|
+
function parseXMLPlanningResponse(xmlString, jsonParser) {
|
|
43
44
|
const thought = (0, util_js_namespaceObject.extractXMLTag)(xmlString, 'thought');
|
|
44
45
|
const memory = (0, util_js_namespaceObject.extractXMLTag)(xmlString, 'memory');
|
|
45
46
|
const log = (0, util_js_namespaceObject.extractXMLTag)(xmlString, 'log') || '';
|
|
@@ -63,11 +64,12 @@ function parseXMLPlanningResponse(xmlString, modelFamily) {
|
|
|
63
64
|
const type = actionType.split('<')[0].trim();
|
|
64
65
|
let param;
|
|
65
66
|
if (actionParamStr) try {
|
|
66
|
-
param = (
|
|
67
|
-
|
|
67
|
+
param = jsonParser(actionParamStr, {
|
|
68
|
+
source: 'planning-action-param',
|
|
69
|
+
preserveStringValueKeys: 'input' === type.toLowerCase() ? [
|
|
68
70
|
'value'
|
|
69
|
-
]
|
|
70
|
-
}
|
|
71
|
+
] : void 0
|
|
72
|
+
});
|
|
71
73
|
} catch (e) {
|
|
72
74
|
throw new Error(`Failed to parse action-param-json: ${e}`);
|
|
73
75
|
}
|
|
@@ -105,27 +107,28 @@ function parseXMLPlanningResponse(xmlString, modelFamily) {
|
|
|
105
107
|
};
|
|
106
108
|
}
|
|
107
109
|
async function plan(userInstruction, opts) {
|
|
108
|
-
const { context,
|
|
110
|
+
const { context, conversationHistory } = opts;
|
|
111
|
+
const modelRuntime = opts.modelRuntime;
|
|
112
|
+
const { adapter } = modelRuntime;
|
|
109
113
|
const { shotSize } = context;
|
|
110
114
|
const screenshotBase64 = context.screenshot.base64;
|
|
111
|
-
|
|
115
|
+
if (opts.includeLocateInPlanning && !modelRuntime.config.modelFamily) throw new Error((0, external_errors_js_namespaceObject.planningModelFamilyRequiredForLocateMessage)(modelRuntime.config.slot));
|
|
116
|
+
const locateResultAdapter = modelRuntime.config.modelFamily && 'standard' === adapter.locate.kind ? adapter.locate.resultAdapter : void 0;
|
|
112
117
|
const includeSubGoals = true === opts.deepThink;
|
|
113
118
|
const systemPrompt = await (0, llm_planning_js_namespaceObject.systemPromptToTaskPlanning)({
|
|
114
119
|
actionSpace: opts.actionSpace,
|
|
115
|
-
|
|
116
|
-
|
|
120
|
+
locatePromptSpec: locateResultAdapter?.promptSpec,
|
|
121
|
+
includeLocateInPlanning: opts.includeLocateInPlanning,
|
|
117
122
|
includeThought: true,
|
|
118
123
|
includeSubGoals
|
|
119
124
|
});
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
imagePayload = paddedResult.imageBase64;
|
|
128
|
-
}
|
|
125
|
+
const preparedImage = await (0, image_preprocess_js_namespaceObject.prepareModelImage)({
|
|
126
|
+
imageBase64: screenshotBase64,
|
|
127
|
+
width: shotSize.width,
|
|
128
|
+
height: shotSize.height,
|
|
129
|
+
policy: adapter.imagePreprocess
|
|
130
|
+
});
|
|
131
|
+
const imagePayload = preparedImage.imageBase64;
|
|
129
132
|
const actionContext = opts.actionContext ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\n` : '';
|
|
130
133
|
const instruction = [
|
|
131
134
|
{
|
|
@@ -188,23 +191,23 @@ async function plan(userInstruction, opts) {
|
|
|
188
191
|
...instruction,
|
|
189
192
|
...historyLog
|
|
190
193
|
];
|
|
191
|
-
let { content: rawResponse, usage, reasoning_content } = await (0, index_js_namespaceObject.callAI)(msgs,
|
|
194
|
+
let { content: rawResponse, usage, reasoning_content } = await (0, index_js_namespaceObject.callAI)(msgs, modelRuntime, {
|
|
192
195
|
abortSignal: opts.abortSignal,
|
|
193
|
-
|
|
196
|
+
requiresOriginalImageDetail: opts.includeLocateInPlanning
|
|
194
197
|
});
|
|
195
198
|
let planFromAI;
|
|
196
199
|
try {
|
|
197
200
|
try {
|
|
198
|
-
planFromAI = parseXMLPlanningResponse(rawResponse,
|
|
201
|
+
planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);
|
|
199
202
|
} catch {
|
|
200
|
-
const retry = await (0, index_js_namespaceObject.callAI)(msgs,
|
|
203
|
+
const retry = await (0, index_js_namespaceObject.callAI)(msgs, modelRuntime, {
|
|
201
204
|
abortSignal: opts.abortSignal,
|
|
202
|
-
|
|
205
|
+
requiresOriginalImageDetail: opts.includeLocateInPlanning
|
|
203
206
|
});
|
|
204
207
|
rawResponse = retry.content;
|
|
205
208
|
usage = retry.usage;
|
|
206
209
|
reasoning_content = retry.reasoning_content;
|
|
207
|
-
planFromAI = parseXMLPlanningResponse(rawResponse,
|
|
210
|
+
planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);
|
|
208
211
|
}
|
|
209
212
|
if (planFromAI.action && void 0 !== planFromAI.finalizeSuccess) {
|
|
210
213
|
warnLog('Planning response included both an action and <complete>; ignoring <complete> output.');
|
|
@@ -238,7 +241,22 @@ async function plan(userInstruction, opts) {
|
|
|
238
241
|
debug('locateFields', locateFields);
|
|
239
242
|
locateFields.forEach((field)=>{
|
|
240
243
|
const locateResult = action.param[field];
|
|
241
|
-
if (locateResult
|
|
244
|
+
if (locateResult) {
|
|
245
|
+
if (!opts.includeLocateInPlanning) {
|
|
246
|
+
if ('object' == typeof locateResult) action.param[field] = {
|
|
247
|
+
prompt: locateResult.prompt
|
|
248
|
+
};
|
|
249
|
+
return;
|
|
250
|
+
}
|
|
251
|
+
(0, utils_namespaceObject.assert)(locateResultAdapter, 'generic planning locate normalization requires a standard locate adapter');
|
|
252
|
+
action.param[field] = {
|
|
253
|
+
...locateResult,
|
|
254
|
+
locatedPixelBbox: locateResultAdapter.adaptPlanningParamToPixelBbox(locateResult, {
|
|
255
|
+
preparedSize: preparedImage.preparedSize,
|
|
256
|
+
contentSize: preparedImage.contentSize
|
|
257
|
+
})
|
|
258
|
+
};
|
|
259
|
+
}
|
|
242
260
|
});
|
|
243
261
|
});
|
|
244
262
|
if (includeSubGoals) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig, TModelFamily } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport {\n extractXMLTag,\n parseMarkFinishedIndexes,\n parseSubGoalsFromXML,\n} from './prompt/util';\nimport {\n AIResponseParseError,\n callAI,\n parseModelResponseJson,\n} from './service-caller/index';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\nconst noPreviousActionsText =\n 'No previous actions have been executed in this aiAct execution yet. If the instruction asks for actions, choose the first action to execute.';\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n modelFamily: TModelFamily | undefined,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse sub-goal related tags\n const updatePlanContent = extractXMLTag(xmlString, 'update-plan-content');\n const markSubGoalDone = extractXMLTag(xmlString, 'mark-sub-goal-done');\n\n const updateSubGoals = updatePlanContent\n ? parseSubGoalsFromXML(updatePlanContent)\n : undefined;\n const markFinishedIndexes = markSubGoalDone\n ? parseMarkFinishedIndexes(markSubGoalDone)\n : undefined;\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n // Strip any trailing XML tags that LLM might have leaked into the action type\n // e.g. \"KeyboardPress</action-type>\\n<action-param-json>\" -> \"KeyboardPress\"\n const type = actionType.split('<')[0].trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = parseModelResponseJson(\n actionParamStr,\n modelFamily,\n type.toLowerCase() === 'input'\n ? { preserveStringValueKeys: ['value'] }\n : undefined,\n );\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n ...(updateSubGoals?.length ? { updateSubGoals } : {}),\n ...(markFinishedIndexes?.length ? { markFinishedIndexes } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n // Controls aiAct planning prompt shape and state updates, such as sub-goals.\n deepThink?: boolean;\n abortSignal?: AbortSignal;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n const { modelFamily } = modelConfig;\n\n // Only enable sub-goals when aiAct is in deep-thinking planning mode.\n const includeSubGoals = opts.deepThink === true;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n modelFamily,\n includeBbox: opts.includeBbox,\n includeThought: true, // always include thought\n includeSubGoals,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = shotSize.width;\n let imageHeight = shotSize.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build sub-goal status text to include in the message\n // In planning deep-think mode: show full sub-goals with logs\n // Otherwise: show historical execution logs\n const executionProgressText = includeSubGoals\n ? conversationHistory.subGoalsToText()\n : conversationHistory.historicalLogsToText();\n const executionProgressSection = executionProgressText\n ? `\\n\\n${executionProgressText}`\n : conversationHistory.pendingFeedbackMessage\n ? ''\n : `\\n\\n${noPreviousActionsText}`;\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `This is the current screenshot.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n let {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig, {\n abortSignal: opts.abortSignal,\n // When GPT-5 planning includes bbox, the planning call also performs\n // localization, so the screenshot should be sent with original detail.\n forceOriginalImageDetail: modelFamily === 'gpt-5' && opts.includeBbox,\n });\n\n // Parse XML response to JSON object, retry once on parse failure\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);\n } catch {\n const retry = await callAI(msgs, modelConfig, {\n abortSignal: opts.abortSignal,\n // Keep retry requests consistent with the initial planning call.\n forceOriginalImageDetail: modelFamily === 'gpt-5' && opts.includeBbox,\n });\n rawResponse = retry.content;\n usage = retry.usage;\n reasoning_content = retry.reasoning_content;\n planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);\n }\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n // Mark all sub-goals as finished when goal is completed in planning deep-think mode.\n if (includeSubGoals) {\n conversationHistory.markAllSubGoalsFinished();\n }\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && modelFamily !== undefined) {\n // Always use model family to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n modelFamily,\n );\n }\n });\n });\n\n // Update sub-goals in conversation history only in planning deep-think mode.\n if (includeSubGoals) {\n if (planFromAI.updateSubGoals?.length) {\n conversationHistory.mergeSubGoals(planFromAI.updateSubGoals);\n }\n if (planFromAI.markFinishedIndexes?.length) {\n for (const index of planFromAI.markFinishedIndexes) {\n conversationHistory.markSubGoalFinished(index);\n }\n }\n // Append the planning log to the currently running sub-goal\n if (planFromAI.log) {\n conversationHistory.appendSubGoalLog(planFromAI.log);\n }\n } else {\n // Without planning deep-think mode, accumulate logs as historical execution steps.\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","warnLog","noPreviousActionsText","parseXMLPlanningResponse","xmlString","modelFamily","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","updatePlanContent","markSubGoalDone","updateSubGoals","parseSubGoalsFromXML","markFinishedIndexes","parseMarkFinishedIndexes","action","type","param","parseModelResponseJson","e","Error","plan","userInstruction","opts","context","modelConfig","conversationHistory","shotSize","screenshotBase64","includeSubGoals","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","executionProgressText","executionProgressSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","retry","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","index","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;ACwBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,UAAUD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,YAAY;IAAE,SAAS;AAAK;AAErD,MAAME,wBACJ;AAKK,SAASC,yBACdC,SAAiB,EACjBC,WAAqC;IAErC,MAAMC,UAAUC,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACzC,MAAMI,SAASD,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACxC,MAAMK,MAAMF,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACvC,MAAMO,aAAaJ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,MAAMC,oBAAoBX,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACnD,MAAMe,kBAAkBZ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAEjD,MAAMgB,iBAAiBF,oBACnBG,AAAAA,IAAAA,wBAAAA,oBAAAA,AAAAA,EAAqBH,qBACrBD;IACJ,MAAMK,sBAAsBH,kBACxBI,AAAAA,IAAAA,wBAAAA,wBAAAA,AAAAA,EAAyBJ,mBACzBF;IAGJ,IAAIO,SAAc;IAClB,IAAIb,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QAGrD,MAAMc,OAAOd,WAAW,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI;QAC1C,IAAIe;QAEJ,IAAId,gBACF,IAAI;YAEFc,QAAQC,AAAAA,IAAAA,yBAAAA,sBAAAA,AAAAA,EACNf,gBACAP,aACAoB,AAAuB,YAAvBA,KAAK,WAAW,KACZ;gBAAE,yBAAyB;oBAAC;iBAAQ;YAAC,IACrCR;QAER,EAAE,OAAOW,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFJ,SAAS;YACPC;YACA,GAAIC,AAAUT,WAAVS,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAIpB,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1Bc;QACA,GAAIT,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAII,gBAAgB,SAAS;YAAEA;QAAe,IAAI,CAAC,CAAC;QACpD,GAAIE,qBAAqB,SAAS;YAAEA;QAAoB,IAAI,CAAC,CAAC;IAChE;AACF;AAEO,eAAeQ,KACpBC,eAAuB,EACvBC,IAYC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,QAAQ,EAAE,GAAGH;IACrB,MAAMI,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM,EAAE5B,WAAW,EAAE,GAAG6B;IAGxB,MAAMI,kBAAkBN,AAAmB,SAAnBA,KAAK,SAAS;IAEtC,MAAMO,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaR,KAAK,WAAW;QAC7B3B;QACA,aAAa2B,KAAK,WAAW;QAC7B,gBAAgB;QAChBM;IACF;IAEA,IAAIG,eAAeJ;IACnB,IAAIK,aAAaN,SAAS,KAAK;IAC/B,IAAIO,cAAcP,SAAS,MAAM;IAKjC,IAAI/B,AAAgB,iBAAhBA,aAA8B;QAChC,MAAMuC,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BJ;QACvDC,aAAaE,aAAa,KAAK;QAC/BD,cAAcC,aAAa,MAAM;QACjCH,eAAeG,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBd,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMe,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEf,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAIiB;IAKJ,MAAMC,wBAAwBX,kBAC1BH,oBAAoB,cAAc,KAClCA,oBAAoB,oBAAoB;IAC5C,MAAMe,2BAA2BD,wBAC7B,CAAC,IAAI,EAAEA,uBAAuB,GAC9Bd,oBAAoB,sBAAsB,GACxC,KACA,CAAC,IAAI,EAAEjC,uBAAuB;IAGpC,MAAMiD,eAAehB,oBAAoB,cAAc;IACvD,MAAMiB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIhB,oBAAoB,sBAAsB,EAAE;QAC9Ca,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGb,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEiB,kBAAkBF,0BAA0B;gBACzN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKT;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAN,oBAAoB,mCAAmC;IACzD,OACEa,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,+BAA+B,EAAEI,kBAAkBF,0BAA0B;YACtF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFN,oBAAoB,MAAM,CAACa;IAG3Bb,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMkB,aAAalB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMsB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASf;QAAa;WACrCQ;WACAM;KACJ;IAED,IAAI,EACF,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMpB,aAAa;QAClC,aAAaF,KAAK,WAAW;QAG7B,0BAA0B3B,AAAgB,YAAhBA,eAA2B2B,KAAK,WAAW;IACvE;IAGA,IAAI2B;IACJ,IAAI;QACF,IAAI;YACFA,aAAaxD,yBAAyBoD,aAAalD;QACrD,EAAE,OAAM;YACN,MAAMuD,QAAQ,MAAMF,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMpB,aAAa;gBAC5C,aAAaF,KAAK,WAAW;gBAE7B,0BAA0B3B,AAAgB,YAAhBA,eAA2B2B,KAAK,WAAW;YACvE;YACAuB,cAAcK,MAAM,OAAO;YAC3BJ,QAAQI,MAAM,KAAK;YACnBH,oBAAoBG,MAAM,iBAAiB;YAC3CD,aAAaxD,yBAAyBoD,aAAalD;QACrD;QAEA,IAAIsD,WAAW,MAAM,IAAIA,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YACjE1D,QACE;YAEF0D,WAAW,eAAe,GAAG1C;YAC7B0C,WAAW,eAAe,GAAG1C;QAC/B;QAEA,MAAM4C,UAAUF,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIG,yBAAyB;QAG7B,IAAIH,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YAC5C5D,MAAM;YACN+D,yBAAyB;YAEzB,IAAIxB,iBACFH,oBAAoB,uBAAuB;QAE/C;QAEA,MAAM4B,cAAkC;YACtC,GAAGJ,UAAU;YACbE;YACAN;YACAC;YACAC;YACA,UAAUO,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBH,SAAS7B,KAAK,WAAW;YAC1D8B;QACF;QAEAG,IAAAA,sBAAAA,MAAAA,AAAAA,EAAON,YAAY;QAEnBE,QAAQ,OAAO,CAAC,CAACrC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAM0C,sBAAsBlC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACR,SAAWA,OAAO,IAAI,KAAKC;YAG9B1B,MAAM,+BAA+BmE;YACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAENnE,MAAM,gBAAgBoE;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAe9C,OAAO,KAAK,CAAC6C,MAAM;gBACxC,IAAIC,gBAAgBjE,AAAgBY,WAAhBZ,aAElBmB,OAAO,KAAK,CAAC6C,MAAM,GAAGE,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EACpBD,cACA5B,YACAC,aACAtC;YAGN;QACF;QAGA,IAAIiC,iBAAiB;YACnB,IAAIqB,WAAW,cAAc,EAAE,QAC7BxB,oBAAoB,aAAa,CAACwB,WAAW,cAAc;YAE7D,IAAIA,WAAW,mBAAmB,EAAE,QAClC,KAAK,MAAMa,SAASb,WAAW,mBAAmB,CAChDxB,oBAAoB,mBAAmB,CAACqC;YAI5C,IAAIb,WAAW,GAAG,EAChBxB,oBAAoB,gBAAgB,CAACwB,WAAW,GAAG;QAEvD,OAEE,IAAIA,WAAW,GAAG,EAChBxB,oBAAoB,mBAAmB,CAACwB,WAAW,GAAG;QAK1D,IAAIA,WAAW,MAAM,EACnBxB,oBAAoB,YAAY,CAACwB,WAAW,MAAM;QAGpDxB,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMoB;gBACR;aACD;QACH;QAEA,OAAOQ;IACT,EAAE,OAAOU,YAAY;QAEnB,MAAMC,eACJD,sBAAsB5C,QAAQ4C,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEF,cAAc,EAClCnB,aACAC;IAEJ;AACF"}
|
|
1
|
+
{"version":3,"file":"ai-model/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { buildYamlFlowFromPlans, findAllMidsceneLocatorField } from '../common';\nimport { planningModelFamilyRequiredForLocateMessage } from './errors';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport {\n extractXMLTag,\n parseMarkFinishedIndexes,\n parseSubGoalsFromXML,\n} from './prompt/util';\nimport { AIResponseParseError, callAI } from './service-caller/index';\nimport type { JsonParser, JsonParserSource } from './service-caller/json';\nimport { prepareModelImage } from './workflows/image-preprocess';\nimport type { PlanOptions } from './workflows/planning/types';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\nconst noPreviousActionsText =\n 'No previous actions have been executed in this aiAct execution yet. If the instruction asks for actions, choose the first action to execute.';\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse.\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n jsonParser: JsonParser,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse sub-goal related tags\n const updatePlanContent = extractXMLTag(xmlString, 'update-plan-content');\n const markSubGoalDone = extractXMLTag(xmlString, 'mark-sub-goal-done');\n\n const updateSubGoals = updatePlanContent\n ? parseSubGoalsFromXML(updatePlanContent)\n : undefined;\n const markFinishedIndexes = markSubGoalDone\n ? parseMarkFinishedIndexes(markSubGoalDone)\n : undefined;\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n // Strip any trailing XML tags that LLM might have leaked into the action type\n // e.g. \"KeyboardPress</action-type>\\n<action-param-json>\" -> \"KeyboardPress\"\n const type = actionType.split('<')[0].trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = jsonParser(actionParamStr, {\n source: 'planning-action-param',\n preserveStringValueKeys:\n type.toLowerCase() === 'input' ? ['value'] : undefined,\n });\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n ...(updateSubGoals?.length ? { updateSubGoals } : {}),\n ...(markFinishedIndexes?.length ? { markFinishedIndexes } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: string,\n opts: PlanOptions,\n): Promise<PlanningAIResponse> {\n const { context, conversationHistory } = opts;\n const modelRuntime = opts.modelRuntime;\n const { adapter } = modelRuntime;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n if (opts.includeLocateInPlanning && !modelRuntime.config.modelFamily) {\n throw new Error(\n planningModelFamilyRequiredForLocateMessage(modelRuntime.config.slot),\n );\n }\n\n const locateResultAdapter =\n modelRuntime.config.modelFamily && adapter.locate.kind === 'standard'\n ? adapter.locate.resultAdapter\n : undefined;\n\n // Only enable sub-goals when aiAct is in deep-thinking planning mode.\n const includeSubGoals = opts.deepThink === true;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n locatePromptSpec: locateResultAdapter?.promptSpec,\n includeLocateInPlanning: opts.includeLocateInPlanning,\n includeThought: true, // always include thought\n includeSubGoals,\n });\n\n const preparedImage = await prepareModelImage({\n imageBase64: screenshotBase64,\n width: shotSize.width,\n height: shotSize.height,\n policy: adapter.imagePreprocess,\n });\n const imagePayload = preparedImage.imageBase64;\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build sub-goal status text to include in the message\n // In planning deep-think mode: show full sub-goals with logs\n // Otherwise: show historical execution logs\n const executionProgressText = includeSubGoals\n ? conversationHistory.subGoalsToText()\n : conversationHistory.historicalLogsToText();\n const executionProgressSection = executionProgressText\n ? `\\n\\n${executionProgressText}`\n : conversationHistory.pendingFeedbackMessage\n ? ''\n : `\\n\\n${noPreviousActionsText}`;\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `This is the current screenshot.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n let {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Planning with locate results is localization-sensitive. Adapters decide\n // whether this should request original image detail.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n\n // Parse XML response to JSON object, retry once on parse failure\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n } catch {\n const retry = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Keep retry requests consistent with the initial planning call.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n rawResponse = retry.content;\n usage = retry.usage;\n reasoning_content = retry.reasoning_content;\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n }\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n // Mark all sub-goals as finished when goal is completed in planning deep-think mode.\n if (includeSubGoals) {\n conversationHistory.markAllSubGoalsFinished();\n }\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult) {\n if (!opts.includeLocateInPlanning) {\n if (typeof locateResult === 'object') {\n // In prompt-only planning mode, ignore any accidental coordinates from the model.\n action.param[field] = { prompt: locateResult.prompt };\n }\n return;\n }\n\n assert(\n locateResultAdapter,\n 'generic planning locate normalization requires a standard locate adapter',\n );\n action.param[field] = {\n ...locateResult,\n locatedPixelBbox: locateResultAdapter.adaptPlanningParamToPixelBbox(\n locateResult,\n {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n },\n ),\n };\n }\n });\n });\n\n // Update sub-goals in conversation history only in planning deep-think mode.\n if (includeSubGoals) {\n if (planFromAI.updateSubGoals?.length) {\n conversationHistory.mergeSubGoals(planFromAI.updateSubGoals);\n }\n if (planFromAI.markFinishedIndexes?.length) {\n for (const index of planFromAI.markFinishedIndexes) {\n conversationHistory.markSubGoalFinished(index);\n }\n }\n // Append the planning log to the currently running sub-goal\n if (planFromAI.log) {\n conversationHistory.appendSubGoalLog(planFromAI.log);\n }\n } else {\n // Without planning deep-think mode, accumulate logs as historical execution steps.\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","warnLog","noPreviousActionsText","parseXMLPlanningResponse","xmlString","jsonParser","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","updatePlanContent","markSubGoalDone","updateSubGoals","parseSubGoalsFromXML","markFinishedIndexes","parseMarkFinishedIndexes","action","type","param","e","Error","plan","userInstruction","opts","context","conversationHistory","modelRuntime","adapter","shotSize","screenshotBase64","planningModelFamilyRequiredForLocateMessage","locateResultAdapter","includeSubGoals","systemPrompt","systemPromptToTaskPlanning","preparedImage","prepareModelImage","imagePayload","actionContext","instruction","latestFeedbackMessage","executionProgressText","executionProgressSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","retry","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","index","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;ACcA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,UAAUD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,YAAY;IAAE,SAAS;AAAK;AAErD,MAAME,wBACJ;AAKK,SAASC,yBACdC,SAAiB,EACjBC,UAAsB;IAEtB,MAAMC,UAAUC,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACzC,MAAMI,SAASD,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACxC,MAAMK,MAAMF,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACvC,MAAMO,aAAaJ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,MAAMC,oBAAoBX,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACnD,MAAMe,kBAAkBZ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAEjD,MAAMgB,iBAAiBF,oBACnBG,AAAAA,IAAAA,wBAAAA,oBAAAA,AAAAA,EAAqBH,qBACrBD;IACJ,MAAMK,sBAAsBH,kBACxBI,AAAAA,IAAAA,wBAAAA,wBAAAA,AAAAA,EAAyBJ,mBACzBF;IAGJ,IAAIO,SAAc;IAClB,IAAIb,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QAGrD,MAAMc,OAAOd,WAAW,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI;QAC1C,IAAIe;QAEJ,IAAId,gBACF,IAAI;YAEFc,QAAQrB,WAAWO,gBAAgB;gBACjC,QAAQ;gBACR,yBACEa,AAAuB,YAAvBA,KAAK,WAAW,KAAiB;oBAAC;iBAAQ,GAAGR;YACjD;QACF,EAAE,OAAOU,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFH,SAAS;YACPC;YACA,GAAIC,AAAUT,WAAVS,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAIpB,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1Bc;QACA,GAAIT,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAII,gBAAgB,SAAS;YAAEA;QAAe,IAAI,CAAC,CAAC;QACpD,GAAIE,qBAAqB,SAAS;YAAEA;QAAoB,IAAI,CAAC,CAAC;IAChE;AACF;AAEO,eAAeO,KACpBC,eAAuB,EACvBC,IAAiB;IAEjB,MAAM,EAAEC,OAAO,EAAEC,mBAAmB,EAAE,GAAGF;IACzC,MAAMG,eAAeH,KAAK,YAAY;IACtC,MAAM,EAAEI,OAAO,EAAE,GAAGD;IACpB,MAAM,EAAEE,QAAQ,EAAE,GAAGJ;IACrB,MAAMK,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElD,IAAID,KAAK,uBAAuB,IAAI,CAACG,aAAa,MAAM,CAAC,WAAW,EAClE,MAAM,IAAIN,MACRU,AAAAA,IAAAA,mCAAAA,2CAAAA,AAAAA,EAA4CJ,aAAa,MAAM,CAAC,IAAI;IAIxE,MAAMK,sBACJL,aAAa,MAAM,CAAC,WAAW,IAAIC,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,GAClDA,QAAQ,MAAM,CAAC,aAAa,GAC5BlB;IAGN,MAAMuB,kBAAkBT,AAAmB,SAAnBA,KAAK,SAAS;IAEtC,MAAMU,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaX,KAAK,WAAW;QAC7B,kBAAkBQ,qBAAqB;QACvC,yBAAyBR,KAAK,uBAAuB;QACrD,gBAAgB;QAChBS;IACF;IAEA,MAAMG,gBAAgB,MAAMC,AAAAA,IAAAA,oCAAAA,iBAAAA,AAAAA,EAAkB;QAC5C,aAAaP;QACb,OAAOD,SAAS,KAAK;QACrB,QAAQA,SAAS,MAAM;QACvB,QAAQD,QAAQ,eAAe;IACjC;IACA,MAAMU,eAAeF,cAAc,WAAW;IAE9C,MAAMG,gBAAgBf,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMgB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEhB,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAIkB;IAKJ,MAAMC,wBAAwBT,kBAC1BP,oBAAoB,cAAc,KAClCA,oBAAoB,oBAAoB;IAC5C,MAAMiB,2BAA2BD,wBAC7B,CAAC,IAAI,EAAEA,uBAAuB,GAC9BhB,oBAAoB,sBAAsB,GACxC,KACA,CAAC,IAAI,EAAE/B,uBAAuB;IAGpC,MAAMiD,eAAelB,oBAAoB,cAAc;IACvD,MAAMmB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIlB,oBAAoB,sBAAsB,EAAE;QAC9Ce,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGf,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEmB,kBAAkBF,0BAA0B;gBACzN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAZ,oBAAoB,mCAAmC;IACzD,OACEe,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,+BAA+B,EAAEI,kBAAkBF,0BAA0B;YACtF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKL;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFZ,oBAAoB,MAAM,CAACe;IAG3Bf,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMoB,aAAapB,oBAAoB,QAAQ,CAACF,KAAK,kBAAkB;IAEvE,MAAMuB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASb;QAAa;WACrCM;WACAM;KACJ;IAED,IAAI,EACF,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMpB,cAAc;QACnC,aAAaH,KAAK,WAAW;QAG7B,6BAA6BA,KAAK,uBAAuB;IAC3D;IAGA,IAAI4B;IACJ,IAAI;QACF,IAAI;YACFA,aAAaxD,yBAAyBoD,aAAapB,QAAQ,UAAU;QACvE,EAAE,OAAM;YACN,MAAMyB,QAAQ,MAAMF,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMpB,cAAc;gBAC7C,aAAaH,KAAK,WAAW;gBAE7B,6BAA6BA,KAAK,uBAAuB;YAC3D;YACAwB,cAAcK,MAAM,OAAO;YAC3BJ,QAAQI,MAAM,KAAK;YACnBH,oBAAoBG,MAAM,iBAAiB;YAC3CD,aAAaxD,yBAAyBoD,aAAapB,QAAQ,UAAU;QACvE;QAEA,IAAIwB,WAAW,MAAM,IAAIA,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YACjE1D,QACE;YAEF0D,WAAW,eAAe,GAAG1C;YAC7B0C,WAAW,eAAe,GAAG1C;QAC/B;QAEA,MAAM4C,UAAUF,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIG,yBAAyB;QAG7B,IAAIH,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YAC5C5D,MAAM;YACN+D,yBAAyB;YAEzB,IAAItB,iBACFP,oBAAoB,uBAAuB;QAE/C;QAEA,MAAM8B,cAAkC;YACtC,GAAGJ,UAAU;YACbE;YACAN;YACAC;YACAC;YACA,UAAUO,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBH,SAAS9B,KAAK,WAAW;YAC1D+B;QACF;QAEAG,IAAAA,sBAAAA,MAAAA,AAAAA,EAAON,YAAY;QAEnBE,QAAQ,OAAO,CAAC,CAACrC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAM0C,sBAAsBnC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACP,SAAWA,OAAO,IAAI,KAAKC;YAG9B1B,MAAM,+BAA+BmE;YACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAENnE,MAAM,gBAAgBoE;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAe9C,OAAO,KAAK,CAAC6C,MAAM;gBACxC,IAAIC,cAAc;oBAChB,IAAI,CAACvC,KAAK,uBAAuB,EAAE;wBACjC,IAAI,AAAwB,YAAxB,OAAOuC,cAET9C,OAAO,KAAK,CAAC6C,MAAM,GAAG;4BAAE,QAAQC,aAAa,MAAM;wBAAC;wBAEtD;oBACF;oBAEAL,IAAAA,sBAAAA,MAAAA,AAAAA,EACE1B,qBACA;oBAEFf,OAAO,KAAK,CAAC6C,MAAM,GAAG;wBACpB,GAAGC,YAAY;wBACf,kBAAkB/B,oBAAoB,6BAA6B,CACjE+B,cACA;4BACE,cAAc3B,cAAc,YAAY;4BACxC,aAAaA,cAAc,WAAW;wBACxC;oBAEJ;gBACF;YACF;QACF;QAGA,IAAIH,iBAAiB;YACnB,IAAImB,WAAW,cAAc,EAAE,QAC7B1B,oBAAoB,aAAa,CAAC0B,WAAW,cAAc;YAE7D,IAAIA,WAAW,mBAAmB,EAAE,QAClC,KAAK,MAAMY,SAASZ,WAAW,mBAAmB,CAChD1B,oBAAoB,mBAAmB,CAACsC;YAI5C,IAAIZ,WAAW,GAAG,EAChB1B,oBAAoB,gBAAgB,CAAC0B,WAAW,GAAG;QAEvD,OAEE,IAAIA,WAAW,GAAG,EAChB1B,oBAAoB,mBAAmB,CAAC0B,WAAW,GAAG;QAK1D,IAAIA,WAAW,MAAM,EACnB1B,oBAAoB,YAAY,CAAC0B,WAAW,MAAM;QAGpD1B,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMsB;gBACR;aACD;QACH;QAEA,OAAOQ;IACT,EAAE,OAAOS,YAAY;QAEnB,MAAMC,eACJD,sBAAsB5C,QAAQ4C,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEF,cAAc,EAClClB,aACAC;IAEJ;AACF"}
|
|
@@ -26,13 +26,27 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
26
26
|
__webpack_require__.d(__webpack_exports__, {
|
|
27
27
|
transformAutoGLMAction: ()=>transformAutoGLMAction
|
|
28
28
|
});
|
|
29
|
-
const external_common_js_namespaceObject = require("../../common.js");
|
|
30
29
|
const logger_namespaceObject = require("@midscene/shared/logger");
|
|
30
|
+
const bbox_js_namespaceObject = require("../../shared/model-locate-result/bbox.js");
|
|
31
|
+
const pixel_bbox_mapper_js_namespaceObject = require("../../shared/model-locate-result/pixel-bbox-mapper.js");
|
|
31
32
|
const debug = (0, logger_namespaceObject.getDebug)('auto-glm-actions');
|
|
32
33
|
const AUTO_GLM_COORDINATE_MAX = 1000;
|
|
33
|
-
function
|
|
34
|
-
const
|
|
35
|
-
|
|
34
|
+
function autoGLMCoordinateToLocateParam(coordinate, size) {
|
|
35
|
+
const ctx = {
|
|
36
|
+
preparedSize: size
|
|
37
|
+
};
|
|
38
|
+
const pixelBbox = (0, pixel_bbox_mapper_js_namespaceObject.mapLocateResultToPixelBboxByCoordinates)({
|
|
39
|
+
type: 'point',
|
|
40
|
+
coordinates: coordinate
|
|
41
|
+
}, ctx, {
|
|
42
|
+
shape: 'point',
|
|
43
|
+
order: 'xy',
|
|
44
|
+
normalizedBy: AUTO_GLM_COORDINATE_MAX
|
|
45
|
+
});
|
|
46
|
+
return {
|
|
47
|
+
prompt: '',
|
|
48
|
+
locatedPixelBbox: (0, bbox_js_namespaceObject.finalizePixelBbox)(pixelBbox, coordinate, ctx)
|
|
49
|
+
};
|
|
36
50
|
}
|
|
37
51
|
const BACK_BUTTON_NAMES = [
|
|
38
52
|
'AndroidBackButton',
|
|
@@ -70,16 +84,7 @@ function transformAutoGLMAction(action, size, actionSpace) {
|
|
|
70
84
|
{
|
|
71
85
|
const tapAction = doAction;
|
|
72
86
|
debug('Transform Tap action:', tapAction);
|
|
73
|
-
const
|
|
74
|
-
const locate = {
|
|
75
|
-
prompt: '',
|
|
76
|
-
bbox: [
|
|
77
|
-
x1,
|
|
78
|
-
y1,
|
|
79
|
-
x2,
|
|
80
|
-
y2
|
|
81
|
-
]
|
|
82
|
-
};
|
|
87
|
+
const locate = autoGLMCoordinateToLocateParam(tapAction.element, size);
|
|
83
88
|
return [
|
|
84
89
|
{
|
|
85
90
|
type: 'Tap',
|
|
@@ -93,16 +98,7 @@ function transformAutoGLMAction(action, size, actionSpace) {
|
|
|
93
98
|
{
|
|
94
99
|
const doubleTapAction = doAction;
|
|
95
100
|
debug('Transform Double Tap action:', doubleTapAction);
|
|
96
|
-
const
|
|
97
|
-
const locate = {
|
|
98
|
-
prompt: '',
|
|
99
|
-
bbox: [
|
|
100
|
-
x1,
|
|
101
|
-
y1,
|
|
102
|
-
x2,
|
|
103
|
-
y2
|
|
104
|
-
]
|
|
105
|
-
};
|
|
101
|
+
const locate = autoGLMCoordinateToLocateParam(doubleTapAction.element, size);
|
|
106
102
|
return [
|
|
107
103
|
{
|
|
108
104
|
type: 'DoubleClick',
|
|
@@ -129,16 +125,7 @@ function transformAutoGLMAction(action, size, actionSpace) {
|
|
|
129
125
|
{
|
|
130
126
|
const swipeAction = doAction;
|
|
131
127
|
debug('Transform Swipe action:', swipeAction);
|
|
132
|
-
const
|
|
133
|
-
const locate = {
|
|
134
|
-
prompt: '',
|
|
135
|
-
bbox: [
|
|
136
|
-
x1,
|
|
137
|
-
y1,
|
|
138
|
-
x2,
|
|
139
|
-
y2
|
|
140
|
-
]
|
|
141
|
-
};
|
|
128
|
+
const locate = autoGLMCoordinateToLocateParam(swipeAction.start, size);
|
|
142
129
|
const deltaX = swipeAction.end[0] - swipeAction.start[0];
|
|
143
130
|
const deltaY = swipeAction.end[1] - swipeAction.start[1];
|
|
144
131
|
let direction;
|
|
@@ -169,16 +156,7 @@ function transformAutoGLMAction(action, size, actionSpace) {
|
|
|
169
156
|
{
|
|
170
157
|
const longPressAction = doAction;
|
|
171
158
|
debug('Transform Long Press action:', longPressAction);
|
|
172
|
-
const
|
|
173
|
-
const locate = {
|
|
174
|
-
prompt: '',
|
|
175
|
-
bbox: [
|
|
176
|
-
x1,
|
|
177
|
-
y1,
|
|
178
|
-
x2,
|
|
179
|
-
y2
|
|
180
|
-
]
|
|
181
|
-
};
|
|
159
|
+
const locate = autoGLMCoordinateToLocateParam(longPressAction.element, size);
|
|
182
160
|
return [
|
|
183
161
|
{
|
|
184
162
|
type: 'LongPress',
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model/models/auto-glm/actions.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../../src/ai-model/models/auto-glm/actions.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { DeviceAction } from '@/device';\nimport type {\n PlanningAction,\n PlanningLocateParamWithLocatedPixelBbox,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { finalizePixelBbox } from '../../shared/model-locate-result/bbox';\nimport { mapLocateResultToPixelBboxByCoordinates } from '../../shared/model-locate-result/pixel-bbox-mapper';\n\nconst debug = getDebug('auto-glm-actions');\n\n/**\n * Auto-GLM coordinate system range: [0, AUTO_GLM_COORDINATE_MAX]\n */\nconst AUTO_GLM_COORDINATE_MAX = 1000;\n\nfunction autoGLMCoordinateToLocateParam(\n coordinate: [number, number],\n size: { width: number; height: number },\n): PlanningLocateParamWithLocatedPixelBbox {\n const ctx = { preparedSize: size };\n const pixelBbox = mapLocateResultToPixelBboxByCoordinates(\n { type: 'point', coordinates: coordinate },\n ctx,\n { shape: 'point', order: 'xy', normalizedBy: AUTO_GLM_COORDINATE_MAX },\n );\n\n return {\n prompt: '',\n locatedPixelBbox: finalizePixelBbox(pixelBbox, coordinate, ctx),\n };\n}\n\nexport interface BaseAction {\n _metadata: string;\n think?: string;\n}\n\nexport interface TapAction extends BaseAction {\n _metadata: 'do';\n action: 'Tap';\n element: [number, number];\n}\n\nexport interface DoubleTapAction extends BaseAction {\n _metadata: 'do';\n action: 'Double Tap';\n element: [number, number];\n}\n\nexport interface TypeAction extends BaseAction {\n _metadata: 'do';\n action: 'Type';\n text: string;\n}\n\nexport interface SwipeAction extends BaseAction {\n _metadata: 'do';\n action: 'Swipe';\n start: [number, number];\n end: [number, number];\n}\n\nexport interface LongPressAction extends BaseAction {\n _metadata: 'do';\n action: 'Long Press';\n element: [number, number];\n}\n\nexport interface LaunchAction extends BaseAction {\n _metadata: 'do';\n action: 'Launch';\n app: string;\n}\n\nexport interface BackAction extends BaseAction {\n _metadata: 'do';\n action: 'Back';\n}\n\nexport interface HomeAction extends BaseAction {\n _metadata: 'do';\n action: 'Home';\n}\n\nexport interface WaitAction extends BaseAction {\n _metadata: 'do';\n action: 'Wait';\n durationMs: number;\n}\n\nexport interface InteractAction extends BaseAction {\n _metadata: 'do';\n action: 'Interact';\n}\n\nexport interface CallAPIAction extends BaseAction {\n _metadata: 'do';\n action: 'Call_API';\n instruction: string;\n}\n\nexport interface TakeoverAction extends BaseAction {\n _metadata: 'do';\n action: 'Take_over';\n message: string;\n}\n\nexport interface NoteAction extends BaseAction {\n _metadata: 'do';\n action: 'Note';\n message: string;\n}\n\nexport interface FinishAction extends BaseAction {\n _metadata: 'finish';\n message: string;\n}\n\nexport type ParsedAction =\n | TapAction\n | DoubleTapAction\n | TypeAction\n | SwipeAction\n | LongPressAction\n | LaunchAction\n | BackAction\n | HomeAction\n | WaitAction\n | InteractAction\n | CallAPIAction\n | TakeoverAction\n | NoteAction\n | FinishAction;\n\nconst BACK_BUTTON_NAMES = ['AndroidBackButton', 'HarmonyBackButton'];\nconst HOME_BUTTON_NAMES = ['AndroidHomeButton', 'HarmonyHomeButton'];\n\n/**\n * Find the action name in actionSpace that matches one of the known names.\n * Falls back to defaultName if no match found or actionSpace is not provided.\n */\nfunction findActionName(\n actionSpace: DeviceAction[] | undefined,\n knownNames: string[],\n defaultName: string,\n): string {\n if (!actionSpace) return defaultName;\n const match = actionSpace.find((a) => knownNames.includes(a.name));\n return match ? match.name : defaultName;\n}\n\nexport function transformAutoGLMAction(\n action: ParsedAction,\n size: { width: number; height: number },\n actionSpace?: DeviceAction[],\n): PlanningAction[] {\n try {\n switch (action._metadata) {\n case 'finish': {\n const finishAction = action as FinishAction;\n debug('Transform finish action:', finishAction);\n return [\n {\n type: 'Finished',\n param: {},\n thought: finishAction.message,\n },\n ];\n }\n case 'do': {\n const doAction = action as\n | TapAction\n | DoubleTapAction\n | TypeAction\n | SwipeAction\n | LongPressAction\n | LaunchAction\n | BackAction\n | HomeAction\n | WaitAction\n | InteractAction\n | CallAPIAction\n | TakeoverAction\n | NoteAction;\n\n switch ((doAction as any).action) {\n case 'Tap': {\n const tapAction = doAction as TapAction;\n debug('Transform Tap action:', tapAction);\n const locate = autoGLMCoordinateToLocateParam(\n tapAction.element,\n size,\n );\n\n return [\n {\n type: 'Tap',\n param: {\n locate,\n },\n },\n ];\n }\n case 'Double Tap': {\n const doubleTapAction = doAction as DoubleTapAction;\n debug('Transform Double Tap action:', doubleTapAction);\n const locate = autoGLMCoordinateToLocateParam(\n doubleTapAction.element,\n size,\n );\n\n return [\n {\n type: 'DoubleClick',\n param: {\n locate,\n },\n },\n ];\n }\n case 'Type': {\n const typeAction = doAction as TypeAction;\n debug('Transform Type action:', typeAction);\n\n return [\n {\n type: 'Input',\n param: {\n value: typeAction.text,\n },\n },\n ];\n }\n case 'Swipe': {\n const swipeAction = doAction as SwipeAction;\n debug('Transform Swipe action:', swipeAction);\n\n const locate = autoGLMCoordinateToLocateParam(\n swipeAction.start,\n size,\n );\n\n // Calculate horizontal and vertical delta in [0,AUTO_GLM_COORDINATE_MAX] coordinate system\n const deltaX = swipeAction.end[0] - swipeAction.start[0];\n const deltaY = swipeAction.end[1] - swipeAction.start[1];\n\n // Determine direction and distance\n let direction: 'up' | 'down' | 'left' | 'right';\n let distance: number;\n\n const absDeltaX = Math.abs(deltaX);\n const absDeltaY = Math.abs(deltaY);\n\n if (absDeltaY > absDeltaX) {\n // Vertical scroll\n distance = Math.round(\n (absDeltaY * size.height) / AUTO_GLM_COORDINATE_MAX,\n );\n direction = deltaY > 0 ? 'up' : 'down';\n } else {\n // Horizontal scroll\n distance = Math.round(\n (absDeltaX * size.width) / AUTO_GLM_COORDINATE_MAX,\n );\n direction = deltaX > 0 ? 'left' : 'right';\n }\n\n debug(\n `Calculate swipe direction: ${direction}, distance: ${distance}`,\n );\n\n return [\n {\n type: 'Scroll',\n param: {\n locate,\n // The scrolling direction here all refers to which direction of the page's content will appear on the screen.\n distance,\n direction,\n },\n thought: swipeAction.think || '',\n },\n ];\n }\n case 'Long Press': {\n const longPressAction = doAction as LongPressAction;\n debug('Transform Long Press action:', longPressAction);\n const locate = autoGLMCoordinateToLocateParam(\n longPressAction.element,\n size,\n );\n\n return [\n {\n type: 'LongPress',\n param: {\n locate,\n },\n thought: longPressAction.think || '',\n },\n ];\n }\n case 'Back': {\n const backAction = doAction as BackAction;\n debug('Transform Back action:', backAction);\n return [\n {\n type: findActionName(\n actionSpace,\n BACK_BUTTON_NAMES,\n 'AndroidBackButton',\n ),\n param: {},\n thought: backAction.think || '',\n },\n ];\n }\n case 'Home': {\n const homeAction = doAction as HomeAction;\n debug('Transform Home action:', homeAction);\n return [\n {\n type: findActionName(\n actionSpace,\n HOME_BUTTON_NAMES,\n 'AndroidHomeButton',\n ),\n param: {},\n thought: homeAction.think || '',\n },\n ];\n }\n case 'Wait': {\n const waitAction = doAction as WaitAction;\n debug('Transform Wait action:', waitAction);\n return [\n {\n type: 'Sleep',\n param: {\n timeMs: waitAction.durationMs,\n },\n thought: waitAction.think || '',\n },\n ];\n }\n case 'Launch': {\n const launchAction = doAction as LaunchAction;\n debug('Transform Launch action:', launchAction);\n return [\n {\n type: 'Launch',\n param: { uri: launchAction.app },\n thought: launchAction.think || '',\n },\n ];\n }\n case 'Interact': {\n throw new Error(\n `Action \"Interact\" from auto-glm is not supported in the current implementation.`,\n );\n }\n case 'Call_API': {\n throw new Error(\n `Action \"Call_API\" from auto-glm is not supported in the current implementation.`,\n );\n }\n case 'Take_over': {\n throw new Error(\n `Action \"Take_over\" from auto-glm is not supported in the current implementation.`,\n );\n }\n case 'Note': {\n throw new Error(\n `Action \"Note\" from auto-glm is not supported in the current implementation.`,\n );\n }\n default:\n throw new Error(\n `Unknown do() action type: ${(doAction as any).action}`,\n );\n }\n }\n default:\n throw new Error(\n `Unknown action metadata: ${(action as any)._metadata}`,\n );\n }\n } catch (error) {\n const errorMessage = error instanceof Error ? error.message : String(error);\n debug('Transform error:', errorMessage);\n throw new Error(`Failed to transform action: ${errorMessage}`);\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","AUTO_GLM_COORDINATE_MAX","autoGLMCoordinateToLocateParam","coordinate","size","ctx","pixelBbox","mapLocateResultToPixelBboxByCoordinates","finalizePixelBbox","BACK_BUTTON_NAMES","HOME_BUTTON_NAMES","findActionName","actionSpace","knownNames","defaultName","match","a","transformAutoGLMAction","action","finishAction","doAction","tapAction","locate","doubleTapAction","typeAction","swipeAction","deltaX","deltaY","direction","distance","absDeltaX","Math","absDeltaY","longPressAction","backAction","homeAction","waitAction","launchAction","Error","error","errorMessage","String"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;ACGA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAKvB,MAAMC,0BAA0B;AAEhC,SAASC,+BACPC,UAA4B,EAC5BC,IAAuC;IAEvC,MAAMC,MAAM;QAAE,cAAcD;IAAK;IACjC,MAAME,YAAYC,AAAAA,IAAAA,qCAAAA,uCAAAA,AAAAA,EAChB;QAAE,MAAM;QAAS,aAAaJ;IAAW,GACzCE,KACA;QAAE,OAAO;QAAS,OAAO;QAAM,cAAcJ;IAAwB;IAGvE,OAAO;QACL,QAAQ;QACR,kBAAkBO,AAAAA,IAAAA,wBAAAA,iBAAAA,AAAAA,EAAkBF,WAAWH,YAAYE;IAC7D;AACF;AAwGA,MAAMI,oBAAoB;IAAC;IAAqB;CAAoB;AACpE,MAAMC,oBAAoB;IAAC;IAAqB;CAAoB;AAMpE,SAASC,eACPC,WAAuC,EACvCC,UAAoB,EACpBC,WAAmB;IAEnB,IAAI,CAACF,aAAa,OAAOE;IACzB,MAAMC,QAAQH,YAAY,IAAI,CAAC,CAACI,IAAMH,WAAW,QAAQ,CAACG,EAAE,IAAI;IAChE,OAAOD,QAAQA,MAAM,IAAI,GAAGD;AAC9B;AAEO,SAASG,uBACdC,MAAoB,EACpBd,IAAuC,EACvCQ,WAA4B;IAE5B,IAAI;QACF,OAAQM,OAAO,SAAS;YACtB,KAAK;gBAAU;oBACb,MAAMC,eAAeD;oBACrBnB,MAAM,4BAA4BoB;oBAClC,OAAO;wBACL;4BACE,MAAM;4BACN,OAAO,CAAC;4BACR,SAASA,aAAa,OAAO;wBAC/B;qBACD;gBACH;YACA,KAAK;gBAAM;oBACT,MAAMC,WAAWF;oBAejB,OAASE,SAAiB,MAAM;wBAC9B,KAAK;4BAAO;gCACV,MAAMC,YAAYD;gCAClBrB,MAAM,yBAAyBsB;gCAC/B,MAAMC,SAASpB,+BACbmB,UAAU,OAAO,EACjBjB;gCAGF,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLkB;wCACF;oCACF;iCACD;4BACH;wBACA,KAAK;4BAAc;gCACjB,MAAMC,kBAAkBH;gCACxBrB,MAAM,gCAAgCwB;gCACtC,MAAMD,SAASpB,+BACbqB,gBAAgB,OAAO,EACvBnB;gCAGF,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLkB;wCACF;oCACF;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAME,aAAaJ;gCACnBrB,MAAM,0BAA0ByB;gCAEhC,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACL,OAAOA,WAAW,IAAI;wCACxB;oCACF;iCACD;4BACH;wBACA,KAAK;4BAAS;gCACZ,MAAMC,cAAcL;gCACpBrB,MAAM,2BAA2B0B;gCAEjC,MAAMH,SAASpB,+BACbuB,YAAY,KAAK,EACjBrB;gCAIF,MAAMsB,SAASD,YAAY,GAAG,CAAC,EAAE,GAAGA,YAAY,KAAK,CAAC,EAAE;gCACxD,MAAME,SAASF,YAAY,GAAG,CAAC,EAAE,GAAGA,YAAY,KAAK,CAAC,EAAE;gCAGxD,IAAIG;gCACJ,IAAIC;gCAEJ,MAAMC,YAAYC,KAAK,GAAG,CAACL;gCAC3B,MAAMM,YAAYD,KAAK,GAAG,CAACJ;gCAE3B,IAAIK,YAAYF,WAAW;oCAEzBD,WAAWE,KAAK,KAAK,CAClBC,YAAY5B,KAAK,MAAM,GAAIH;oCAE9B2B,YAAYD,SAAS,IAAI,OAAO;gCAClC,OAAO;oCAELE,WAAWE,KAAK,KAAK,CAClBD,YAAY1B,KAAK,KAAK,GAAIH;oCAE7B2B,YAAYF,SAAS,IAAI,SAAS;gCACpC;gCAEA3B,MACE,CAAC,2BAA2B,EAAE6B,UAAU,YAAY,EAAEC,UAAU;gCAGlE,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLP;4CAEAO;4CACAD;wCACF;wCACA,SAASH,YAAY,KAAK,IAAI;oCAChC;iCACD;4BACH;wBACA,KAAK;4BAAc;gCACjB,MAAMQ,kBAAkBb;gCACxBrB,MAAM,gCAAgCkC;gCACtC,MAAMX,SAASpB,+BACb+B,gBAAgB,OAAO,EACvB7B;gCAGF,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLkB;wCACF;wCACA,SAASW,gBAAgB,KAAK,IAAI;oCACpC;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAMC,aAAad;gCACnBrB,MAAM,0BAA0BmC;gCAChC,OAAO;oCACL;wCACE,MAAMvB,eACJC,aACAH,mBACA;wCAEF,OAAO,CAAC;wCACR,SAASyB,WAAW,KAAK,IAAI;oCAC/B;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAMC,aAAaf;gCACnBrB,MAAM,0BAA0BoC;gCAChC,OAAO;oCACL;wCACE,MAAMxB,eACJC,aACAF,mBACA;wCAEF,OAAO,CAAC;wCACR,SAASyB,WAAW,KAAK,IAAI;oCAC/B;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAMC,aAAahB;gCACnBrB,MAAM,0BAA0BqC;gCAChC,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACL,QAAQA,WAAW,UAAU;wCAC/B;wCACA,SAASA,WAAW,KAAK,IAAI;oCAC/B;iCACD;4BACH;wBACA,KAAK;4BAAU;gCACb,MAAMC,eAAejB;gCACrBrB,MAAM,4BAA4BsC;gCAClC,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CAAE,KAAKA,aAAa,GAAG;wCAAC;wCAC/B,SAASA,aAAa,KAAK,IAAI;oCACjC;iCACD;4BACH;wBACA,KAAK;4BACH,MAAM,IAAIC,MACR;wBAGJ,KAAK;4BACH,MAAM,IAAIA,MACR;wBAGJ,KAAK;4BACH,MAAM,IAAIA,MACR;wBAGJ,KAAK;4BACH,MAAM,IAAIA,MACR;wBAGJ;4BACE,MAAM,IAAIA,MACR,CAAC,0BAA0B,EAAGlB,SAAiB,MAAM,EAAE;oBAE7D;gBACF;YACA;gBACE,MAAM,IAAIkB,MACR,CAAC,yBAAyB,EAAGpB,OAAe,SAAS,EAAE;QAE7D;IACF,EAAE,OAAOqB,OAAO;QACd,MAAMC,eAAeD,iBAAiBD,QAAQC,MAAM,OAAO,GAAGE,OAAOF;QACrExC,MAAM,oBAAoByC;QAC1B,MAAM,IAAIF,MAAM,CAAC,4BAA4B,EAAEE,cAAc;IAC/D;AACF"}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __webpack_require__ = {};
|
|
3
|
+
(()=>{
|
|
4
|
+
__webpack_require__.d = (exports1, definition)=>{
|
|
5
|
+
for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
|
|
6
|
+
enumerable: true,
|
|
7
|
+
get: definition[key]
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
})();
|
|
11
|
+
(()=>{
|
|
12
|
+
__webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
|
|
13
|
+
})();
|
|
14
|
+
(()=>{
|
|
15
|
+
__webpack_require__.r = (exports1)=>{
|
|
16
|
+
if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
|
|
17
|
+
value: 'Module'
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(exports1, '__esModule', {
|
|
20
|
+
value: true
|
|
21
|
+
});
|
|
22
|
+
};
|
|
23
|
+
})();
|
|
24
|
+
var __webpack_exports__ = {};
|
|
25
|
+
__webpack_require__.r(__webpack_exports__);
|
|
26
|
+
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
+
autoGlmAdapters: ()=>autoGlmAdapters
|
|
28
|
+
});
|
|
29
|
+
const external_locate_js_namespaceObject = require("./locate.js");
|
|
30
|
+
const external_planning_js_namespaceObject = require("./planning.js");
|
|
31
|
+
const external_prompt_js_namespaceObject = require("./prompt.js");
|
|
32
|
+
const defaultAutoGlmReplanningCycleLimit = 100;
|
|
33
|
+
function createAutoGlmAdapter({ getPlanPrompt, getLocatePrompt }) {
|
|
34
|
+
return {
|
|
35
|
+
chatCompletion: {
|
|
36
|
+
unsupportedUserConfig: [
|
|
37
|
+
'reasoningEnabled',
|
|
38
|
+
'reasoningEffort',
|
|
39
|
+
'reasoningBudget'
|
|
40
|
+
],
|
|
41
|
+
buildChatCompletionParams: ({ midsceneDefaults, userConfig })=>({
|
|
42
|
+
config: {
|
|
43
|
+
temperature: userConfig.temperature ?? midsceneDefaults.temperature,
|
|
44
|
+
top_p: 0.85,
|
|
45
|
+
frequency_penalty: 0.2
|
|
46
|
+
}
|
|
47
|
+
})
|
|
48
|
+
},
|
|
49
|
+
planning: {
|
|
50
|
+
kind: 'custom',
|
|
51
|
+
cacheEnabled: false,
|
|
52
|
+
defaultReplanningCycleLimit: defaultAutoGlmReplanningCycleLimit,
|
|
53
|
+
planFn: (userInstruction, options)=>(0, external_planning_js_namespaceObject.autoGlmPlanning)(userInstruction, options, getPlanPrompt)
|
|
54
|
+
},
|
|
55
|
+
locate: {
|
|
56
|
+
kind: 'custom',
|
|
57
|
+
locateFn: (elementDescription, options)=>(0, external_locate_js_namespaceObject.autoGlmLocate)(elementDescription, options, getLocatePrompt)
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
const autoGlmAdapters = {
|
|
62
|
+
'auto-glm': createAutoGlmAdapter({
|
|
63
|
+
getPlanPrompt: external_prompt_js_namespaceObject.getAutoGLMChinesePlanPrompt,
|
|
64
|
+
getLocatePrompt: external_prompt_js_namespaceObject.getAutoGLMChineseLocatePrompt
|
|
65
|
+
}),
|
|
66
|
+
'auto-glm-multilingual': createAutoGlmAdapter({
|
|
67
|
+
getPlanPrompt: external_prompt_js_namespaceObject.getAutoGLMMultilingualPlanPrompt,
|
|
68
|
+
getLocatePrompt: external_prompt_js_namespaceObject.getAutoGLMMultilingualLocatePrompt
|
|
69
|
+
})
|
|
70
|
+
};
|
|
71
|
+
exports.autoGlmAdapters = __webpack_exports__.autoGlmAdapters;
|
|
72
|
+
for(var __rspack_i in __webpack_exports__)if (-1 === [
|
|
73
|
+
"autoGlmAdapters"
|
|
74
|
+
].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
|
|
75
|
+
Object.defineProperty(exports, '__esModule', {
|
|
76
|
+
value: true
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
//# sourceMappingURL=adapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model/models/auto-glm/adapter.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../../src/ai-model/models/auto-glm/adapter.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { TModelFamily } from '@midscene/shared/env';\nimport type { ModelAdapterDefinition } from '../types';\nimport { autoGlmLocate } from './locate';\nimport { autoGlmPlanning } from './planning';\nimport {\n getAutoGLMChineseLocatePrompt,\n getAutoGLMChinesePlanPrompt,\n getAutoGLMMultilingualLocatePrompt,\n getAutoGLMMultilingualPlanPrompt,\n} from './prompt';\n\nconst defaultAutoGlmReplanningCycleLimit = 100;\n\nfunction createAutoGlmAdapter({\n getPlanPrompt,\n getLocatePrompt,\n}: {\n getPlanPrompt: () => string;\n getLocatePrompt: () => string;\n}): ModelAdapterDefinition {\n return {\n chatCompletion: {\n unsupportedUserConfig: [\n 'reasoningEnabled',\n 'reasoningEffort',\n 'reasoningBudget',\n ],\n buildChatCompletionParams: ({ midsceneDefaults, userConfig }) => ({\n config: {\n temperature: userConfig.temperature ?? midsceneDefaults.temperature,\n top_p: 0.85,\n frequency_penalty: 0.2,\n },\n }),\n },\n planning: {\n kind: 'custom',\n cacheEnabled: false,\n defaultReplanningCycleLimit: defaultAutoGlmReplanningCycleLimit,\n planFn: (userInstruction, options) =>\n autoGlmPlanning(userInstruction, options, getPlanPrompt),\n },\n locate: {\n kind: 'custom',\n locateFn: (elementDescription, options) =>\n autoGlmLocate(elementDescription, options, getLocatePrompt),\n },\n };\n}\n\nexport const autoGlmAdapters = {\n 'auto-glm': createAutoGlmAdapter({\n getPlanPrompt: getAutoGLMChinesePlanPrompt,\n getLocatePrompt: getAutoGLMChineseLocatePrompt,\n }),\n 'auto-glm-multilingual': createAutoGlmAdapter({\n getPlanPrompt: getAutoGLMMultilingualPlanPrompt,\n getLocatePrompt: getAutoGLMMultilingualLocatePrompt,\n }),\n} satisfies Pick<\n Record<TModelFamily, ModelAdapterDefinition>,\n 'auto-glm' | 'auto-glm-multilingual'\n>;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","defaultAutoGlmReplanningCycleLimit","createAutoGlmAdapter","getPlanPrompt","getLocatePrompt","midsceneDefaults","userConfig","userInstruction","options","autoGlmPlanning","elementDescription","autoGlmLocate","autoGlmAdapters","getAutoGLMChinesePlanPrompt","getAutoGLMChineseLocatePrompt","getAutoGLMMultilingualPlanPrompt","getAutoGLMMultilingualLocatePrompt"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;ACKA,MAAMI,qCAAqC;AAE3C,SAASC,qBAAqB,EAC5BC,aAAa,EACbC,eAAe,EAIhB;IACC,OAAO;QACL,gBAAgB;YACd,uBAAuB;gBACrB;gBACA;gBACA;aACD;YACD,2BAA2B,CAAC,EAAEC,gBAAgB,EAAEC,UAAU,EAAE,GAAM;oBAChE,QAAQ;wBACN,aAAaA,WAAW,WAAW,IAAID,iBAAiB,WAAW;wBACnE,OAAO;wBACP,mBAAmB;oBACrB;gBACF;QACF;QACA,UAAU;YACR,MAAM;YACN,cAAc;YACd,6BAA6BJ;YAC7B,QAAQ,CAACM,iBAAiBC,UACxBC,AAAAA,IAAAA,qCAAAA,eAAAA,AAAAA,EAAgBF,iBAAiBC,SAASL;QAC9C;QACA,QAAQ;YACN,MAAM;YACN,UAAU,CAACO,oBAAoBF,UAC7BG,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EAAcD,oBAAoBF,SAASJ;QAC/C;IACF;AACF;AAEO,MAAMQ,kBAAkB;IAC7B,YAAYV,qBAAqB;QAC/B,eAAeW,mCAAAA,2BAA2BA;QAC1C,iBAAiBC,mCAAAA,6BAA6BA;IAChD;IACA,yBAAyBZ,qBAAqB;QAC5C,eAAea,mCAAAA,gCAAgCA;QAC/C,iBAAiBC,mCAAAA,kCAAkCA;IACrD;AACF"}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __webpack_require__ = {};
|
|
3
|
+
(()=>{
|
|
4
|
+
__webpack_require__.d = (exports1, definition)=>{
|
|
5
|
+
for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
|
|
6
|
+
enumerable: true,
|
|
7
|
+
get: definition[key]
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
})();
|
|
11
|
+
(()=>{
|
|
12
|
+
__webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
|
|
13
|
+
})();
|
|
14
|
+
(()=>{
|
|
15
|
+
__webpack_require__.r = (exports1)=>{
|
|
16
|
+
if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
|
|
17
|
+
value: 'Module'
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(exports1, '__esModule', {
|
|
20
|
+
value: true
|
|
21
|
+
});
|
|
22
|
+
};
|
|
23
|
+
})();
|
|
24
|
+
var __webpack_exports__ = {};
|
|
25
|
+
__webpack_require__.r(__webpack_exports__);
|
|
26
|
+
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
+
autoGlmLocate: ()=>autoGlmLocate
|
|
28
|
+
});
|
|
29
|
+
const extractor_namespaceObject = require("@midscene/shared/extractor");
|
|
30
|
+
const logger_namespaceObject = require("@midscene/shared/logger");
|
|
31
|
+
const utils_namespaceObject = require("@midscene/shared/utils");
|
|
32
|
+
const external_inspect_js_namespaceObject = require("../../inspect.js");
|
|
33
|
+
const llm_locator_js_namespaceObject = require("../../prompt/llm-locator.js");
|
|
34
|
+
const index_js_namespaceObject = require("../../service-caller/index.js");
|
|
35
|
+
const bbox_js_namespaceObject = require("../../shared/model-locate-result/bbox.js");
|
|
36
|
+
const pixel_bbox_mapper_js_namespaceObject = require("../../shared/model-locate-result/pixel-bbox-mapper.js");
|
|
37
|
+
const locate_result_rect_js_namespaceObject = require("../../workflows/inspect/locate-result-rect.js");
|
|
38
|
+
const search_area_mapping_js_namespaceObject = require("../../workflows/inspect/search-area-mapping.js");
|
|
39
|
+
const external_parser_js_namespaceObject = require("./parser.js");
|
|
40
|
+
const debugInspect = (0, logger_namespaceObject.getDebug)('ai:inspect');
|
|
41
|
+
async function autoGlmLocate(elementDescription, options, getSystemPrompt) {
|
|
42
|
+
const { context, modelRuntime } = options;
|
|
43
|
+
const screenshotBase64 = context.screenshot.base64;
|
|
44
|
+
(0, utils_namespaceObject.assert)(elementDescription, "cannot find the target element description");
|
|
45
|
+
const elementDescriptionText = (0, external_inspect_js_namespaceObject.extraTextFromUserPrompt)(elementDescription);
|
|
46
|
+
const userInstructionPrompt = (0, llm_locator_js_namespaceObject.findElementPrompt)(elementDescriptionText);
|
|
47
|
+
const locateImage = options.searchConfig?.image ?? {
|
|
48
|
+
imageBase64: screenshotBase64,
|
|
49
|
+
width: context.shotSize.width,
|
|
50
|
+
height: context.shotSize.height
|
|
51
|
+
};
|
|
52
|
+
const imagePayload = locateImage.imageBase64;
|
|
53
|
+
const imageWidth = locateImage.width;
|
|
54
|
+
const imageHeight = locateImage.height;
|
|
55
|
+
const msgs = [
|
|
56
|
+
{
|
|
57
|
+
role: 'system',
|
|
58
|
+
content: getSystemPrompt()
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
role: 'user',
|
|
62
|
+
content: [
|
|
63
|
+
{
|
|
64
|
+
type: 'image_url',
|
|
65
|
+
image_url: {
|
|
66
|
+
url: imagePayload,
|
|
67
|
+
detail: 'high'
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
type: 'text',
|
|
72
|
+
text: `Tap: ${userInstructionPrompt}`
|
|
73
|
+
}
|
|
74
|
+
]
|
|
75
|
+
}
|
|
76
|
+
];
|
|
77
|
+
if ('string' != typeof elementDescription) {
|
|
78
|
+
const addOns = await (0, external_inspect_js_namespaceObject.promptsToChatParam)({
|
|
79
|
+
images: elementDescription.images,
|
|
80
|
+
convertHttpImage2Base64: elementDescription.convertHttpImage2Base64
|
|
81
|
+
});
|
|
82
|
+
msgs.push(...addOns);
|
|
83
|
+
}
|
|
84
|
+
const { content: rawResponseContent, usage } = await (0, index_js_namespaceObject.callAIWithStringResponse)(msgs, modelRuntime, {
|
|
85
|
+
abortSignal: options.abortSignal
|
|
86
|
+
});
|
|
87
|
+
debugInspect('auto-glm rawResponse:', rawResponseContent);
|
|
88
|
+
const parsed = (0, external_parser_js_namespaceObject.parseAutoGLMLocateResponse)(rawResponseContent);
|
|
89
|
+
debugInspect('auto-glm thinking:', parsed.think);
|
|
90
|
+
debugInspect('auto-glm coordinates:', parsed.coordinates);
|
|
91
|
+
let resRect;
|
|
92
|
+
let matchedElement;
|
|
93
|
+
let errors = [];
|
|
94
|
+
if (parsed.error || !parsed.coordinates) {
|
|
95
|
+
errors = [
|
|
96
|
+
parsed.error || 'Failed to parse auto-glm response'
|
|
97
|
+
];
|
|
98
|
+
debugInspect('auto-glm parse error:', errors[0]);
|
|
99
|
+
} else {
|
|
100
|
+
const { x, y } = parsed.coordinates;
|
|
101
|
+
debugInspect('auto-glm coordinates [0-999]:', {
|
|
102
|
+
x,
|
|
103
|
+
y
|
|
104
|
+
});
|
|
105
|
+
const ctx = {
|
|
106
|
+
preparedSize: {
|
|
107
|
+
width: imageWidth,
|
|
108
|
+
height: imageHeight
|
|
109
|
+
}
|
|
110
|
+
};
|
|
111
|
+
const targetPixelBbox = (0, bbox_js_namespaceObject.finalizePixelBbox)((0, pixel_bbox_mapper_js_namespaceObject.mapLocateResultToPixelBboxByCoordinates)({
|
|
112
|
+
type: 'point',
|
|
113
|
+
coordinates: [
|
|
114
|
+
x,
|
|
115
|
+
y
|
|
116
|
+
]
|
|
117
|
+
}, ctx, {
|
|
118
|
+
shape: 'point',
|
|
119
|
+
order: 'xy',
|
|
120
|
+
normalizedBy: 1000
|
|
121
|
+
}), parsed.coordinates, ctx);
|
|
122
|
+
resRect = (0, locate_result_rect_js_namespaceObject.pixelBboxToRect)((0, search_area_mapping_js_namespaceObject.mapSearchAreaPixelBboxToOriginalPixelBbox)(targetPixelBbox, options.searchConfig?.mapping));
|
|
123
|
+
debugInspect('auto-glm resRect:', resRect);
|
|
124
|
+
const element = (0, extractor_namespaceObject.generateElementByRect)(resRect, elementDescriptionText);
|
|
125
|
+
if (element) matchedElement = element;
|
|
126
|
+
}
|
|
127
|
+
return {
|
|
128
|
+
rect: resRect,
|
|
129
|
+
parseResult: {
|
|
130
|
+
element: matchedElement,
|
|
131
|
+
errors
|
|
132
|
+
},
|
|
133
|
+
rawResponse: rawResponseContent,
|
|
134
|
+
usage,
|
|
135
|
+
reasoning_content: parsed.think
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
exports.autoGlmLocate = __webpack_exports__.autoGlmLocate;
|
|
139
|
+
for(var __rspack_i in __webpack_exports__)if (-1 === [
|
|
140
|
+
"autoGlmLocate"
|
|
141
|
+
].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
|
|
142
|
+
Object.defineProperty(exports, '__esModule', {
|
|
143
|
+
value: true
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
//# sourceMappingURL=locate.js.map
|