@midscene/core 1.8.10 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +40 -50
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/task-builder.mjs +39 -19
- package/dist/es/agent/task-builder.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +24 -22
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +11 -14
- package/dist/es/agent/utils.mjs.map +1 -1
- package/dist/es/ai-model/connectivity.mjs +7 -3
- package/dist/es/ai-model/connectivity.mjs.map +1 -1
- package/dist/es/ai-model/errors.mjs +9 -0
- package/dist/es/ai-model/errors.mjs.map +1 -0
- package/dist/es/ai-model/index.mjs +3 -4
- package/dist/es/ai-model/inspect.mjs +132 -144
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +46 -28
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/{auto-glm → models/auto-glm}/actions.mjs +22 -44
- package/dist/es/ai-model/models/auto-glm/actions.mjs.map +1 -0
- package/dist/es/ai-model/models/auto-glm/adapter.mjs +45 -0
- package/dist/es/ai-model/models/auto-glm/adapter.mjs.map +1 -0
- package/dist/es/ai-model/models/auto-glm/locate.mjs +112 -0
- package/dist/es/ai-model/models/auto-glm/locate.mjs.map +1 -0
- package/dist/es/ai-model/models/auto-glm/parser.mjs.map +1 -0
- package/dist/es/ai-model/{auto-glm → models/auto-glm}/planning.mjs +6 -7
- package/dist/es/ai-model/models/auto-glm/planning.mjs.map +1 -0
- package/dist/es/ai-model/{auto-glm → models/auto-glm}/prompt.mjs +3 -11
- package/dist/es/ai-model/models/auto-glm/prompt.mjs.map +1 -0
- package/dist/es/ai-model/models/default.mjs +12 -0
- package/dist/es/ai-model/models/default.mjs.map +1 -0
- package/dist/es/ai-model/models/doubao.mjs +138 -0
- package/dist/es/ai-model/models/doubao.mjs.map +1 -0
- package/dist/es/ai-model/models/gemini.mjs +34 -0
- package/dist/es/ai-model/models/gemini.mjs.map +1 -0
- package/dist/es/ai-model/models/glm.mjs +37 -0
- package/dist/es/ai-model/models/glm.mjs.map +1 -0
- package/dist/es/ai-model/models/gpt.mjs +31 -0
- package/dist/es/ai-model/models/gpt.mjs.map +1 -0
- package/dist/es/ai-model/models/index.mjs +2 -0
- package/dist/es/ai-model/models/qwen.mjs +113 -0
- package/dist/es/ai-model/models/qwen.mjs.map +1 -0
- package/dist/es/ai-model/models/registry.mjs +45 -0
- package/dist/es/ai-model/models/registry.mjs.map +1 -0
- package/dist/es/ai-model/models/resolved.mjs +104 -0
- package/dist/es/ai-model/models/resolved.mjs.map +1 -0
- package/dist/es/ai-model/models/types.mjs +0 -0
- package/dist/es/ai-model/models/ui-tars/adapter.mjs +142 -0
- package/dist/es/ai-model/models/ui-tars/adapter.mjs.map +1 -0
- package/dist/es/ai-model/{ui-tars-planning.mjs → models/ui-tars/planning.mjs} +44 -62
- package/dist/es/ai-model/models/ui-tars/planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/extraction.mjs +3 -3
- package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-locator.mjs +11 -11
- package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +25 -60
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -10
- package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/locate-grounding-rules.mjs +9 -0
- package/dist/es/ai-model/prompt/locate-grounding-rules.mjs.map +1 -0
- package/dist/es/ai-model/prompt/locate-param-example.mjs +15 -0
- package/dist/es/ai-model/prompt/locate-param-example.mjs.map +1 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs +5 -5
- package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/yaml-generator.mjs +5 -5
- package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
- package/dist/es/ai-model/prompts/locate-result-coordinates.mjs +107 -0
- package/dist/es/ai-model/prompts/locate-result-coordinates.mjs.map +1 -0
- package/dist/es/ai-model/service-caller/index.mjs +59 -190
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/json.mjs +60 -0
- package/dist/es/ai-model/service-caller/json.mjs.map +1 -0
- package/dist/es/ai-model/shared/model-locate-result/bbox.mjs +68 -0
- package/dist/es/ai-model/shared/model-locate-result/bbox.mjs.map +1 -0
- package/dist/es/ai-model/shared/model-locate-result/factory.mjs +96 -0
- package/dist/es/ai-model/shared/model-locate-result/factory.mjs.map +1 -0
- package/dist/es/ai-model/shared/model-locate-result/index.mjs +3 -0
- package/dist/es/ai-model/shared/model-locate-result/parse.mjs +41 -0
- package/dist/es/ai-model/shared/model-locate-result/parse.mjs.map +1 -0
- package/dist/es/ai-model/shared/model-locate-result/pixel-bbox-mapper.mjs +64 -0
- package/dist/es/ai-model/shared/model-locate-result/pixel-bbox-mapper.mjs.map +1 -0
- package/dist/es/ai-model/shared/model-locate-result/types.mjs +0 -0
- package/dist/es/ai-model/types.mjs +0 -0
- package/dist/es/ai-model/workflows/image-preprocess.mjs +27 -0
- package/dist/es/ai-model/workflows/image-preprocess.mjs.map +1 -0
- package/dist/es/ai-model/workflows/inspect/index.mjs +2 -0
- package/dist/es/ai-model/workflows/inspect/locate-result-rect.mjs +23 -0
- package/dist/es/ai-model/workflows/inspect/locate-result-rect.mjs.map +1 -0
- package/dist/es/ai-model/workflows/inspect/search-area-mapping.mjs +18 -0
- package/dist/es/ai-model/workflows/inspect/search-area-mapping.mjs.map +1 -0
- package/dist/es/ai-model/workflows/inspect/types.mjs +0 -0
- package/dist/es/ai-model/workflows/planning/index.mjs +5 -0
- package/dist/es/ai-model/workflows/planning/index.mjs.map +1 -0
- package/dist/es/ai-model/workflows/planning/types.mjs +0 -0
- package/dist/es/common.mjs +2 -174
- package/dist/es/common.mjs.map +1 -1
- package/dist/es/device/index.mjs.map +1 -1
- package/dist/es/service/index.mjs +96 -69
- package/dist/es/service/index.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/es/yaml/player.mjs +4 -3
- package/dist/es/yaml/player.mjs.map +1 -1
- package/dist/lib/agent/agent.js +43 -53
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/task-builder.js +38 -18
- package/dist/lib/agent/task-builder.js.map +1 -1
- package/dist/lib/agent/tasks.js +23 -21
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/utils.js +17 -17
- package/dist/lib/agent/utils.js.map +1 -1
- package/dist/lib/ai-model/connectivity.js +7 -3
- package/dist/lib/ai-model/connectivity.js.map +1 -1
- package/dist/lib/ai-model/errors.js +46 -0
- package/dist/lib/ai-model/errors.js.map +1 -0
- package/dist/lib/ai-model/index.js +7 -14
- package/dist/lib/ai-model/inspect.js +141 -144
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +44 -26
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/{auto-glm → models/auto-glm}/actions.js +22 -44
- package/dist/lib/ai-model/models/auto-glm/actions.js.map +1 -0
- package/dist/lib/ai-model/models/auto-glm/adapter.js +79 -0
- package/dist/lib/ai-model/models/auto-glm/adapter.js.map +1 -0
- package/dist/lib/ai-model/models/auto-glm/locate.js +146 -0
- package/dist/lib/ai-model/models/auto-glm/locate.js.map +1 -0
- package/dist/lib/ai-model/models/auto-glm/parser.js.map +1 -0
- package/dist/lib/ai-model/{auto-glm → models/auto-glm}/planning.js +8 -9
- package/dist/lib/ai-model/models/auto-glm/planning.js.map +1 -0
- package/dist/lib/ai-model/{auto-glm → models/auto-glm}/prompt.js +14 -16
- package/dist/lib/ai-model/models/auto-glm/prompt.js.map +1 -0
- package/dist/lib/ai-model/{auto-glm/util.js → models/default.js} +13 -13
- package/dist/lib/ai-model/models/default.js.map +1 -0
- package/dist/lib/ai-model/models/doubao.js +184 -0
- package/dist/lib/ai-model/models/doubao.js.map +1 -0
- package/dist/lib/ai-model/models/gemini.js +68 -0
- package/dist/lib/ai-model/models/gemini.js.map +1 -0
- package/dist/lib/ai-model/models/glm.js +71 -0
- package/dist/lib/ai-model/models/glm.js.map +1 -0
- package/dist/lib/ai-model/models/gpt.js +65 -0
- package/dist/lib/ai-model/models/gpt.js.map +1 -0
- package/dist/lib/ai-model/{service-caller/image-detail.js → models/index.js} +8 -7
- package/dist/lib/ai-model/models/index.js.map +1 -0
- package/dist/lib/ai-model/models/qwen.js +147 -0
- package/dist/lib/ai-model/models/qwen.js.map +1 -0
- package/dist/lib/ai-model/models/registry.js +85 -0
- package/dist/lib/ai-model/models/registry.js.map +1 -0
- package/dist/lib/ai-model/models/resolved.js +138 -0
- package/dist/lib/ai-model/models/resolved.js.map +1 -0
- package/dist/lib/ai-model/models/types.js +20 -0
- package/dist/lib/ai-model/models/types.js.map +1 -0
- package/dist/lib/ai-model/models/ui-tars/adapter.js +176 -0
- package/dist/lib/ai-model/models/ui-tars/adapter.js.map +1 -0
- package/dist/lib/ai-model/{ui-tars-planning.js → models/ui-tars/planning.js} +44 -62
- package/dist/lib/ai-model/models/ui-tars/planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/extraction.js +3 -3
- package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-locator.js +11 -11
- package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +25 -60
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-section-locator.js +15 -10
- package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/locate-grounding-rules.js +43 -0
- package/dist/lib/ai-model/prompt/locate-grounding-rules.js.map +1 -0
- package/dist/lib/ai-model/prompt/locate-param-example.js +52 -0
- package/dist/lib/ai-model/prompt/locate-param-example.js.map +1 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js +5 -5
- package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
- package/dist/lib/ai-model/prompt/yaml-generator.js +5 -5
- package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
- package/dist/lib/ai-model/prompts/locate-result-coordinates.js +150 -0
- package/dist/lib/ai-model/prompts/locate-result-coordinates.js.map +1 -0
- package/dist/lib/ai-model/service-caller/index.js +68 -199
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/service-caller/json.js +100 -0
- package/dist/lib/ai-model/service-caller/json.js.map +1 -0
- package/dist/lib/ai-model/shared/model-locate-result/bbox.js +117 -0
- package/dist/lib/ai-model/shared/model-locate-result/bbox.js.map +1 -0
- package/dist/lib/ai-model/shared/model-locate-result/factory.js +130 -0
- package/dist/lib/ai-model/shared/model-locate-result/factory.js.map +1 -0
- package/dist/lib/ai-model/{prompt/common.js → shared/model-locate-result/index.js} +9 -9
- package/dist/lib/ai-model/shared/model-locate-result/index.js.map +1 -0
- package/dist/lib/ai-model/shared/model-locate-result/parse.js +78 -0
- package/dist/lib/ai-model/shared/model-locate-result/parse.js.map +1 -0
- package/dist/lib/ai-model/shared/model-locate-result/pixel-bbox-mapper.js +98 -0
- package/dist/lib/ai-model/shared/model-locate-result/pixel-bbox-mapper.js.map +1 -0
- package/dist/lib/ai-model/shared/model-locate-result/types.js +20 -0
- package/dist/lib/ai-model/shared/model-locate-result/types.js.map +1 -0
- package/dist/lib/ai-model/types.js +20 -0
- package/dist/lib/ai-model/types.js.map +1 -0
- package/dist/lib/ai-model/workflows/image-preprocess.js +61 -0
- package/dist/lib/ai-model/workflows/image-preprocess.js.map +1 -0
- package/dist/lib/ai-model/workflows/inspect/index.js +50 -0
- package/dist/lib/ai-model/workflows/inspect/index.js.map +1 -0
- package/dist/lib/ai-model/workflows/inspect/locate-result-rect.js +60 -0
- package/dist/lib/ai-model/workflows/inspect/locate-result-rect.js.map +1 -0
- package/dist/lib/ai-model/workflows/inspect/search-area-mapping.js +52 -0
- package/dist/lib/ai-model/workflows/inspect/search-area-mapping.js.map +1 -0
- package/dist/lib/ai-model/workflows/inspect/types.js +20 -0
- package/dist/lib/ai-model/workflows/inspect/types.js.map +1 -0
- package/dist/lib/ai-model/{model-family.js → workflows/planning/index.js} +6 -7
- package/dist/lib/ai-model/workflows/planning/index.js.map +1 -0
- package/dist/lib/ai-model/workflows/planning/types.js +20 -0
- package/dist/lib/ai-model/workflows/planning/types.js.map +1 -0
- package/dist/lib/common.js +4 -206
- package/dist/lib/common.js.map +1 -1
- package/dist/lib/device/index.js.map +1 -1
- package/dist/lib/service/index.js +96 -69
- package/dist/lib/service/index.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/lib/yaml/player.js +4 -3
- package/dist/lib/yaml/player.js.map +1 -1
- package/dist/types/agent/agent.d.ts +14 -6
- package/dist/types/agent/task-builder.d.ts +2 -2
- package/dist/types/agent/tasks.d.ts +6 -6
- package/dist/types/agent/utils.d.ts +8 -5
- package/dist/types/ai-model/errors.d.ts +2 -0
- package/dist/types/ai-model/index.d.ts +2 -4
- package/dist/types/ai-model/inspect.d.ts +13 -33
- package/dist/types/ai-model/llm-planning.d.ts +6 -17
- package/dist/types/ai-model/{auto-glm → models/auto-glm}/actions.d.ts +2 -2
- package/dist/types/ai-model/models/auto-glm/adapter.d.ts +5 -0
- package/dist/types/ai-model/models/auto-glm/locate.d.ts +3 -0
- package/dist/types/ai-model/models/auto-glm/planning.d.ts +3 -0
- package/dist/types/ai-model/models/auto-glm/prompt.d.ts +4 -0
- package/dist/types/ai-model/models/default.d.ts +2 -0
- package/dist/types/ai-model/models/doubao.d.ts +10 -0
- package/dist/types/ai-model/models/gemini.d.ts +18 -0
- package/dist/types/ai-model/models/glm.d.ts +18 -0
- package/dist/types/ai-model/models/gpt.d.ts +18 -0
- package/dist/types/ai-model/models/index.d.ts +2 -0
- package/dist/types/ai-model/models/qwen.d.ts +30 -0
- package/dist/types/ai-model/models/registry.d.ts +81 -0
- package/dist/types/ai-model/models/resolved.d.ts +9 -0
- package/dist/types/ai-model/models/types.d.ts +102 -0
- package/dist/types/ai-model/models/ui-tars/adapter.d.ts +6 -0
- package/dist/types/ai-model/{ui-tars-planning.d.ts → models/ui-tars/planning.d.ts} +7 -11
- package/dist/types/ai-model/prompt/llm-locator.d.ts +2 -2
- package/dist/types/ai-model/prompt/llm-planning.d.ts +5 -5
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +2 -2
- package/dist/types/ai-model/prompt/locate-grounding-rules.d.ts +1 -0
- package/dist/types/ai-model/prompt/locate-param-example.d.ts +3 -0
- package/dist/types/ai-model/prompt/playwright-generator.d.ts +3 -3
- package/dist/types/ai-model/prompt/yaml-generator.d.ts +3 -3
- package/dist/types/ai-model/prompts/locate-result-coordinates.d.ts +6 -0
- package/dist/types/ai-model/service-caller/index.d.ts +19 -27
- package/dist/types/ai-model/service-caller/json.d.ts +9 -0
- package/dist/types/ai-model/shared/model-locate-result/bbox.d.ts +7 -0
- package/dist/types/ai-model/shared/model-locate-result/factory.d.ts +2 -0
- package/dist/types/ai-model/shared/model-locate-result/index.d.ts +3 -0
- package/dist/types/ai-model/shared/model-locate-result/parse.d.ts +5 -0
- package/dist/types/ai-model/shared/model-locate-result/pixel-bbox-mapper.d.ts +7 -0
- package/dist/types/ai-model/shared/model-locate-result/types.d.ts +157 -0
- package/dist/types/ai-model/types.d.ts +2 -0
- package/dist/types/ai-model/workflows/image-preprocess.d.ts +30 -0
- package/dist/types/ai-model/workflows/inspect/index.d.ts +1 -0
- package/dist/types/ai-model/workflows/inspect/locate-result-rect.d.ts +4 -0
- package/dist/types/ai-model/workflows/inspect/search-area-mapping.d.ts +3 -0
- package/dist/types/ai-model/workflows/inspect/types.d.ts +37 -0
- package/dist/types/ai-model/workflows/planning/index.d.ts +2 -0
- package/dist/types/ai-model/workflows/planning/types.d.ts +15 -0
- package/dist/types/common.d.ts +0 -30
- package/dist/types/device/index.d.ts +22 -22
- package/dist/types/service/index.d.ts +5 -4
- package/dist/types/types.d.ts +21 -9
- package/dist/types/yaml.d.ts +8 -2
- package/package.json +2 -2
- package/dist/es/ai-model/auto-glm/actions.mjs.map +0 -1
- package/dist/es/ai-model/auto-glm/index.mjs +0 -6
- package/dist/es/ai-model/auto-glm/parser.mjs.map +0 -1
- package/dist/es/ai-model/auto-glm/planning.mjs.map +0 -1
- package/dist/es/ai-model/auto-glm/prompt.mjs.map +0 -1
- package/dist/es/ai-model/auto-glm/util.mjs +0 -9
- package/dist/es/ai-model/auto-glm/util.mjs.map +0 -1
- package/dist/es/ai-model/model-family.mjs +0 -6
- package/dist/es/ai-model/model-family.mjs.map +0 -1
- package/dist/es/ai-model/prompt/common.mjs +0 -8
- package/dist/es/ai-model/prompt/common.mjs.map +0 -1
- package/dist/es/ai-model/service-caller/image-detail.mjs +0 -6
- package/dist/es/ai-model/service-caller/image-detail.mjs.map +0 -1
- package/dist/es/ai-model/ui-tars-planning.mjs.map +0 -1
- package/dist/lib/ai-model/auto-glm/actions.js.map +0 -1
- package/dist/lib/ai-model/auto-glm/index.js +0 -66
- package/dist/lib/ai-model/auto-glm/index.js.map +0 -1
- package/dist/lib/ai-model/auto-glm/parser.js.map +0 -1
- package/dist/lib/ai-model/auto-glm/planning.js.map +0 -1
- package/dist/lib/ai-model/auto-glm/prompt.js.map +0 -1
- package/dist/lib/ai-model/auto-glm/util.js.map +0 -1
- package/dist/lib/ai-model/model-family.js.map +0 -1
- package/dist/lib/ai-model/prompt/common.js.map +0 -1
- package/dist/lib/ai-model/service-caller/image-detail.js.map +0 -1
- package/dist/lib/ai-model/ui-tars-planning.js.map +0 -1
- package/dist/types/ai-model/auto-glm/index.d.ts +0 -6
- package/dist/types/ai-model/auto-glm/planning.d.ts +0 -12
- package/dist/types/ai-model/auto-glm/prompt.d.ts +0 -27
- package/dist/types/ai-model/auto-glm/util.d.ts +0 -13
- package/dist/types/ai-model/model-family.d.ts +0 -7
- package/dist/types/ai-model/prompt/common.d.ts +0 -2
- package/dist/types/ai-model/service-caller/image-detail.d.ts +0 -2
- /package/dist/es/ai-model/{auto-glm → models/auto-glm}/parser.mjs +0 -0
- /package/dist/lib/ai-model/{auto-glm → models/auto-glm}/parser.js +0 -0
- /package/dist/types/ai-model/{auto-glm → models/auto-glm}/parser.d.ts +0 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { type LocateResultValue, type PixelBbox } from '../shared/model-locate-result';
|
|
2
|
+
import type { ChatCompletionCallContext, ChatCompletionParamsResult, ModelAdapterDefinition } from './types';
|
|
3
|
+
declare function parseQwen25RawLocateValue(input: unknown): LocateResultValue;
|
|
4
|
+
declare function normalizeQwen25ResultToPixelBbox(result: LocateResultValue): PixelBbox;
|
|
5
|
+
export declare const qwenAdapters: {
|
|
6
|
+
'qwen2.5-vl': {
|
|
7
|
+
chatCompletion: {
|
|
8
|
+
unsupportedUserConfig: ("reasoningEnabled" | "reasoningEffort" | "reasoningBudget")[];
|
|
9
|
+
buildChatCompletionParams: (input: ChatCompletionCallContext) => ChatCompletionParamsResult;
|
|
10
|
+
};
|
|
11
|
+
imagePreprocess: {
|
|
12
|
+
padBlockSize: number;
|
|
13
|
+
};
|
|
14
|
+
locate: {
|
|
15
|
+
resultAdapter: {
|
|
16
|
+
coordinates: {
|
|
17
|
+
shape: "bbox";
|
|
18
|
+
order: "xy";
|
|
19
|
+
};
|
|
20
|
+
parseRawLocateValue: typeof parseQwen25RawLocateValue;
|
|
21
|
+
mapLocateResultToPixelBbox: typeof normalizeQwen25ResultToPixelBbox;
|
|
22
|
+
};
|
|
23
|
+
};
|
|
24
|
+
};
|
|
25
|
+
'qwen3-vl': ModelAdapterDefinition;
|
|
26
|
+
qwen3: ModelAdapterDefinition;
|
|
27
|
+
'qwen3.5': ModelAdapterDefinition;
|
|
28
|
+
'qwen3.6': ModelAdapterDefinition;
|
|
29
|
+
};
|
|
30
|
+
export {};
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import type { IModelConfig, TModelFamily } from '@midscene/shared/env';
|
|
2
|
+
import type { ModelAdapter, ModelAdapterDefinition, ModelRuntime } from './types';
|
|
3
|
+
export declare const MODEL_ADAPTER_CONFIGS: {
|
|
4
|
+
'gpt-5': {
|
|
5
|
+
chatCompletion: {
|
|
6
|
+
unsupportedUserConfig: ("temperature" | "reasoningEnabled" | "reasoningEffort" | "reasoningBudget")[];
|
|
7
|
+
buildChatCompletionParams: () => import("./types").ChatCompletionParamsResult;
|
|
8
|
+
resolveImageDetail: (input: import("./types").ChatCompletionCallContext) => import("./types").ImageDetail | undefined;
|
|
9
|
+
};
|
|
10
|
+
locate: {
|
|
11
|
+
resultAdapter: {
|
|
12
|
+
coordinates: {
|
|
13
|
+
shape: "bbox";
|
|
14
|
+
order: "xy";
|
|
15
|
+
};
|
|
16
|
+
};
|
|
17
|
+
};
|
|
18
|
+
};
|
|
19
|
+
'auto-glm': ModelAdapterDefinition;
|
|
20
|
+
'auto-glm-multilingual': ModelAdapterDefinition;
|
|
21
|
+
'glm-v': {
|
|
22
|
+
chatCompletion: {
|
|
23
|
+
unsupportedUserConfig: ("reasoningEffort" | "reasoningBudget")[];
|
|
24
|
+
buildChatCompletionParams: (input: import("./types").ChatCompletionCallContext) => import("./types").ChatCompletionParamsResult;
|
|
25
|
+
};
|
|
26
|
+
locate: {
|
|
27
|
+
resultAdapter: {
|
|
28
|
+
coordinates: {
|
|
29
|
+
shape: "bbox";
|
|
30
|
+
order: "xy";
|
|
31
|
+
normalizedBy: number;
|
|
32
|
+
};
|
|
33
|
+
};
|
|
34
|
+
};
|
|
35
|
+
};
|
|
36
|
+
'vlm-ui-tars': ModelAdapterDefinition;
|
|
37
|
+
'vlm-ui-tars-doubao': ModelAdapterDefinition;
|
|
38
|
+
'vlm-ui-tars-doubao-1.5': ModelAdapterDefinition;
|
|
39
|
+
gemini: {
|
|
40
|
+
chatCompletion: {
|
|
41
|
+
unsupportedUserConfig: ("reasoningEnabled" | "reasoningBudget")[];
|
|
42
|
+
buildChatCompletionParams: (input: import("./types").ChatCompletionCallContext) => import("./types").ChatCompletionParamsResult;
|
|
43
|
+
};
|
|
44
|
+
locate: {
|
|
45
|
+
resultAdapter: {
|
|
46
|
+
coordinates: {
|
|
47
|
+
shape: "bbox";
|
|
48
|
+
order: "yx";
|
|
49
|
+
normalizedBy: number;
|
|
50
|
+
};
|
|
51
|
+
};
|
|
52
|
+
};
|
|
53
|
+
};
|
|
54
|
+
'doubao-vision': ModelAdapterDefinition;
|
|
55
|
+
'doubao-seed': ModelAdapterDefinition;
|
|
56
|
+
'qwen2.5-vl': {
|
|
57
|
+
chatCompletion: {
|
|
58
|
+
unsupportedUserConfig: ("reasoningEnabled" | "reasoningEffort" | "reasoningBudget")[];
|
|
59
|
+
buildChatCompletionParams: (input: import("./types").ChatCompletionCallContext) => import("./types").ChatCompletionParamsResult;
|
|
60
|
+
};
|
|
61
|
+
imagePreprocess: {
|
|
62
|
+
padBlockSize: number;
|
|
63
|
+
};
|
|
64
|
+
locate: {
|
|
65
|
+
resultAdapter: {
|
|
66
|
+
coordinates: {
|
|
67
|
+
shape: "bbox";
|
|
68
|
+
order: "xy";
|
|
69
|
+
};
|
|
70
|
+
parseRawLocateValue: (input: unknown) => import("../shared/model-locate-result").LocateResultValue;
|
|
71
|
+
mapLocateResultToPixelBbox: (result: import("../shared/model-locate-result").LocateResultValue) => import("../shared/model-locate-result").PixelBbox;
|
|
72
|
+
};
|
|
73
|
+
};
|
|
74
|
+
};
|
|
75
|
+
'qwen3-vl': ModelAdapterDefinition;
|
|
76
|
+
qwen3: ModelAdapterDefinition;
|
|
77
|
+
'qwen3.5': ModelAdapterDefinition;
|
|
78
|
+
'qwen3.6': ModelAdapterDefinition;
|
|
79
|
+
};
|
|
80
|
+
export declare function getModelAdapter(modelFamily?: TModelFamily): ModelAdapter;
|
|
81
|
+
export declare function getModelRuntime(config: IModelConfig): ModelRuntime;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { ChatCompletionAdapter, ImagePreprocessPolicy, JsonParser, LocateAdapter, ModelAdapter, ModelAdapterDefinition, PlanningAdapter } from './types';
|
|
2
|
+
export declare class ResolvedModelAdapter implements ModelAdapter {
|
|
3
|
+
readonly jsonParser: JsonParser;
|
|
4
|
+
readonly chatCompletion: ChatCompletionAdapter;
|
|
5
|
+
readonly imagePreprocess: ImagePreprocessPolicy;
|
|
6
|
+
readonly planning: PlanningAdapter;
|
|
7
|
+
readonly locate: LocateAdapter;
|
|
8
|
+
constructor(config: ModelAdapterDefinition, modelFamily: string);
|
|
9
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import type { IModelConfig, TIntent } from '@midscene/shared/env';
|
|
2
|
+
import type { JsonParser, JsonParserContext, JsonParserSource } from '../service-caller/json';
|
|
3
|
+
import type { LocateResultAdapter, LocateResultAdapterDefinition } from '../shared/model-locate-result/types';
|
|
4
|
+
import type { ImagePreprocessPolicy } from '../workflows/image-preprocess';
|
|
5
|
+
import type { LocateFn } from '../workflows/inspect/types';
|
|
6
|
+
import type { PlanFn } from '../workflows/planning/types';
|
|
7
|
+
export type { ImagePreprocessPolicy, JsonParser, JsonParserContext, JsonParserSource, };
|
|
8
|
+
export type JsonParserPreset = 'lenient-json';
|
|
9
|
+
export interface ReasoningInput {
|
|
10
|
+
reasoningEnabled?: boolean;
|
|
11
|
+
reasoningEffort?: string;
|
|
12
|
+
reasoningBudget?: number;
|
|
13
|
+
}
|
|
14
|
+
export interface ChatCompletionParamsResult {
|
|
15
|
+
config: Record<string, unknown>;
|
|
16
|
+
}
|
|
17
|
+
export interface MidsceneChatCompletionDefaults {
|
|
18
|
+
temperature: number;
|
|
19
|
+
}
|
|
20
|
+
export interface ChatCompletionCallUserConfig extends ReasoningInput {
|
|
21
|
+
temperature?: number;
|
|
22
|
+
}
|
|
23
|
+
export type ChatCompletionUnsupportedUserConfig = keyof ChatCompletionCallUserConfig;
|
|
24
|
+
export interface ChatCompletionCallInput {
|
|
25
|
+
intent?: TIntent;
|
|
26
|
+
userConfig?: ChatCompletionCallUserConfig;
|
|
27
|
+
requiresOriginalImageDetail?: boolean;
|
|
28
|
+
}
|
|
29
|
+
export interface ChatCompletionCallContext {
|
|
30
|
+
intent?: TIntent;
|
|
31
|
+
userConfig: ChatCompletionCallUserConfig;
|
|
32
|
+
requiresOriginalImageDetail?: boolean;
|
|
33
|
+
midsceneDefaults: MidsceneChatCompletionDefaults;
|
|
34
|
+
}
|
|
35
|
+
export type ImageDetail = 'auto' | 'low' | 'high' | 'original';
|
|
36
|
+
export interface ChatCompletionAdapter {
|
|
37
|
+
unsupportedUserConfig: ChatCompletionUnsupportedUserConfig[];
|
|
38
|
+
buildChatCompletionParams(input: ChatCompletionCallInput): ChatCompletionParamsResult;
|
|
39
|
+
resolveImageDetail(input: ChatCompletionCallInput): ImageDetail | undefined;
|
|
40
|
+
}
|
|
41
|
+
export interface ChatCompletionDefinition {
|
|
42
|
+
unsupportedUserConfig?: ChatCompletionUnsupportedUserConfig[];
|
|
43
|
+
buildChatCompletionParams?: (input: ChatCompletionCallContext) => ChatCompletionParamsResult;
|
|
44
|
+
resolveImageDetail?: (input: ChatCompletionCallContext) => ImageDetail | undefined;
|
|
45
|
+
}
|
|
46
|
+
export type ImagePreprocessDefinition = Partial<ImagePreprocessPolicy>;
|
|
47
|
+
interface PlanningPolicy {
|
|
48
|
+
cacheEnabled: boolean;
|
|
49
|
+
defaultReplanningCycleLimit: number;
|
|
50
|
+
supportsActionDeepLocate: boolean;
|
|
51
|
+
}
|
|
52
|
+
export type PlanningAdapter = (PlanningPolicy & {
|
|
53
|
+
kind: 'standard';
|
|
54
|
+
}) | (PlanningPolicy & {
|
|
55
|
+
kind: 'custom';
|
|
56
|
+
planFn: PlanFn;
|
|
57
|
+
});
|
|
58
|
+
export type PlanningDefinition = (Partial<PlanningPolicy> & {
|
|
59
|
+
kind?: 'standard';
|
|
60
|
+
}) | (Partial<PlanningPolicy> & {
|
|
61
|
+
kind: 'custom';
|
|
62
|
+
planFn: PlanFn;
|
|
63
|
+
});
|
|
64
|
+
interface LocatePolicy {
|
|
65
|
+
supportsSearchArea: boolean;
|
|
66
|
+
}
|
|
67
|
+
type StandardLocateAdapter = LocatePolicy & {
|
|
68
|
+
kind: 'standard';
|
|
69
|
+
resultAdapter: LocateResultAdapter;
|
|
70
|
+
};
|
|
71
|
+
type CustomLocateAdapter = LocatePolicy & {
|
|
72
|
+
kind: 'custom';
|
|
73
|
+
locateFn: LocateFn;
|
|
74
|
+
};
|
|
75
|
+
export type LocateAdapter = StandardLocateAdapter | CustomLocateAdapter;
|
|
76
|
+
type StandardLocateDefinition = Partial<LocatePolicy> & {
|
|
77
|
+
kind?: 'standard';
|
|
78
|
+
resultAdapter?: LocateResultAdapterDefinition;
|
|
79
|
+
};
|
|
80
|
+
type CustomLocateDefinition = Partial<LocatePolicy> & {
|
|
81
|
+
kind: 'custom';
|
|
82
|
+
locateFn: LocateFn;
|
|
83
|
+
};
|
|
84
|
+
export type LocateDefinition = StandardLocateDefinition | CustomLocateDefinition;
|
|
85
|
+
export interface ModelAdapter {
|
|
86
|
+
jsonParser: JsonParser;
|
|
87
|
+
chatCompletion: ChatCompletionAdapter;
|
|
88
|
+
imagePreprocess: ImagePreprocessPolicy;
|
|
89
|
+
planning: PlanningAdapter;
|
|
90
|
+
locate: LocateAdapter;
|
|
91
|
+
}
|
|
92
|
+
export interface ModelRuntime {
|
|
93
|
+
config: IModelConfig;
|
|
94
|
+
adapter: ModelAdapter;
|
|
95
|
+
}
|
|
96
|
+
export interface ModelAdapterDefinition {
|
|
97
|
+
jsonParser?: JsonParserPreset | JsonParser;
|
|
98
|
+
chatCompletion?: ChatCompletionDefinition;
|
|
99
|
+
imagePreprocess?: ImagePreprocessDefinition;
|
|
100
|
+
planning?: PlanningDefinition;
|
|
101
|
+
locate?: LocateDefinition;
|
|
102
|
+
}
|
|
@@ -1,14 +1,8 @@
|
|
|
1
|
-
import type { PlanningAIResponse
|
|
2
|
-
import {
|
|
3
|
-
import type {
|
|
1
|
+
import type { PlanningAIResponse } from '../../../types';
|
|
2
|
+
import type { UITarsModelVersion } from '@midscene/shared/env';
|
|
3
|
+
import type { PlanOptions } from '../../workflows/planning/types';
|
|
4
4
|
type ActionType = 'click' | 'left_double' | 'right_single' | 'drag' | 'type' | 'hotkey' | 'finished' | 'scroll' | 'wait';
|
|
5
|
-
export declare function uiTarsPlanning(userInstruction: string, options:
|
|
6
|
-
conversationHistory: ConversationHistory;
|
|
7
|
-
context: UIContext;
|
|
8
|
-
modelConfig: IModelConfig;
|
|
9
|
-
actionContext?: string;
|
|
10
|
-
abortSignal?: AbortSignal;
|
|
11
|
-
}): Promise<PlanningAIResponse>;
|
|
5
|
+
export declare function uiTarsPlanning(userInstruction: string, options: PlanOptions, uiTarsModelVersion: UITarsModelVersion): Promise<PlanningAIResponse>;
|
|
12
6
|
interface BaseAction {
|
|
13
7
|
action_type: ActionType;
|
|
14
8
|
action_inputs: Record<string, any>;
|
|
@@ -66,7 +60,9 @@ interface ScrollAction extends BaseAction {
|
|
|
66
60
|
}
|
|
67
61
|
interface FinishedAction extends BaseAction {
|
|
68
62
|
action_type: 'finished';
|
|
69
|
-
action_inputs:
|
|
63
|
+
action_inputs: {
|
|
64
|
+
content?: string;
|
|
65
|
+
};
|
|
70
66
|
}
|
|
71
67
|
export type Action = ClickAction | LeftDoubleAction | RightSingleAction | DragAction | TypeAction | HotkeyAction | ScrollAction | FinishedAction | WaitAction;
|
|
72
68
|
export {};
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
export declare function systemPromptToLocateElement(
|
|
1
|
+
import type { LocateResultPromptSpec } from '../shared/model-locate-result';
|
|
2
|
+
export declare function systemPromptToLocateElement(promptSpec: LocateResultPromptSpec): string;
|
|
3
3
|
export declare const findElementPrompt: (targetElementDescription: string) => string;
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import type { DeviceAction } from '../../types';
|
|
2
|
-
import type {
|
|
3
|
-
export declare const descriptionForAction: (action: DeviceAction<any>,
|
|
4
|
-
export declare function systemPromptToTaskPlanning({ actionSpace,
|
|
2
|
+
import type { LocateResultPromptSpec } from '../shared/model-locate-result';
|
|
3
|
+
export declare const descriptionForAction: (action: DeviceAction<any>, locateParamTypeDescription: string, includeLocateInPlanning?: boolean, locatePromptSpec?: LocateResultPromptSpec) => string;
|
|
4
|
+
export declare function systemPromptToTaskPlanning({ actionSpace, locatePromptSpec, includeLocateInPlanning, includeThought, includeSubGoals, }: {
|
|
5
5
|
actionSpace: DeviceAction<any>[];
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
locatePromptSpec?: LocateResultPromptSpec;
|
|
7
|
+
includeLocateInPlanning: boolean;
|
|
8
8
|
includeThought?: boolean;
|
|
9
9
|
includeSubGoals?: boolean;
|
|
10
10
|
}): Promise<string>;
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
export declare function systemPromptToLocateSection(
|
|
1
|
+
import type { LocateResultPromptSpec } from '../shared/model-locate-result';
|
|
2
|
+
export declare function systemPromptToLocateSection(promptSpec: LocateResultPromptSpec): string;
|
|
3
3
|
export declare const sectionLocatorInstruction: (sectionDescription: string) => string;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function locateGroundingRules(): string;
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { LocateResultPromptSpec } from '../shared/model-locate-result';
|
|
2
|
+
export declare function formatLocateExampleValue(value: unknown): string;
|
|
3
|
+
export declare function locateParamExample(prompt: string, promptSpec?: LocateResultPromptSpec, exampleValue?: unknown): string;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { StreamingAIResponse, StreamingCodeGenerationOptions } from '../../types';
|
|
2
|
-
import type {
|
|
2
|
+
import type { ModelRuntime } from '../models';
|
|
3
3
|
import { type ChromeRecordedEvent, type EventCounts, type EventSummary, type InputDescription, type ProcessedEvent, createEventCounts, createMessageContent, extractInputDescriptions, filterEventsByType, getScreenshotsForLLM, prepareEventSummary, processEventsForLLM, validateEvents } from './yaml-generator';
|
|
4
4
|
export interface PlaywrightGenerationOptions {
|
|
5
5
|
testName?: string;
|
|
@@ -19,8 +19,8 @@ export { getScreenshotsForLLM, filterEventsByType, createEventCounts, extractInp
|
|
|
19
19
|
/**
|
|
20
20
|
* Generates Playwright test code from recorded events
|
|
21
21
|
*/
|
|
22
|
-
export declare const generatePlaywrightTest: (events: ChromeRecordedEvent[], options: PlaywrightGenerationOptions,
|
|
22
|
+
export declare const generatePlaywrightTest: (events: ChromeRecordedEvent[], options: PlaywrightGenerationOptions, modelRuntime: ModelRuntime) => Promise<string>;
|
|
23
23
|
/**
|
|
24
24
|
* Generates Playwright test code from recorded events with streaming support
|
|
25
25
|
*/
|
|
26
|
-
export declare const generatePlaywrightTestStream: (events: ChromeRecordedEvent[], options: PlaywrightGenerationOptions & StreamingCodeGenerationOptions,
|
|
26
|
+
export declare const generatePlaywrightTestStream: (events: ChromeRecordedEvent[], options: PlaywrightGenerationOptions & StreamingCodeGenerationOptions, modelRuntime: ModelRuntime) => Promise<StreamingAIResponse>;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { StreamingAIResponse, StreamingCodeGenerationOptions } from '../../types';
|
|
2
|
-
import type {
|
|
2
|
+
import type { ModelRuntime } from '../models';
|
|
3
3
|
export interface EventCounts {
|
|
4
4
|
navigation: number;
|
|
5
5
|
click: number;
|
|
@@ -95,8 +95,8 @@ export declare const validateEvents: (events: ChromeRecordedEvent[]) => void;
|
|
|
95
95
|
/**
|
|
96
96
|
* Generates YAML test configuration from recorded events using AI
|
|
97
97
|
*/
|
|
98
|
-
export declare const generateYamlTest: (events: ChromeRecordedEvent[], options: YamlGenerationOptions,
|
|
98
|
+
export declare const generateYamlTest: (events: ChromeRecordedEvent[], options: YamlGenerationOptions, modelRuntime: ModelRuntime) => Promise<string>;
|
|
99
99
|
/**
|
|
100
100
|
* Generates YAML test configuration from recorded events using AI with streaming support
|
|
101
101
|
*/
|
|
102
|
-
export declare const generateYamlTestStream: (events: ChromeRecordedEvent[], options: YamlGenerationOptions & StreamingCodeGenerationOptions,
|
|
102
|
+
export declare const generateYamlTestStream: (events: ChromeRecordedEvent[], options: YamlGenerationOptions & StreamingCodeGenerationOptions, modelRuntime: ModelRuntime) => Promise<StreamingAIResponse>;
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { LocateResultBbox, LocateResultPromptSpec } from '../shared/model-locate-result';
|
|
2
|
+
import type { ResolvedLocateResultCoordinates } from '../shared/model-locate-result/types';
|
|
3
|
+
export declare function describeLocateResultValueSchema({ shape, }: ResolvedLocateResultCoordinates): string;
|
|
4
|
+
export declare function locateResultExampleValue(resolvedCoordinates: ResolvedLocateResultCoordinates, region: LocateResultBbox): number[];
|
|
5
|
+
export declare const locateResultExampleRegions: LocateResultBbox[];
|
|
6
|
+
export declare function createLocateResultPromptSpec(resolvedCoordinates: ResolvedLocateResultCoordinates): LocateResultPromptSpec;
|
|
@@ -6,52 +6,44 @@ export declare class AIResponseParseError extends Error {
|
|
|
6
6
|
constructor(message: string, rawResponse: string, usage?: AIUsageInfo);
|
|
7
7
|
}
|
|
8
8
|
import { type IModelConfig, type TModelFamily } from '@midscene/shared/env';
|
|
9
|
+
import OpenAI from 'openai';
|
|
9
10
|
import type { ChatCompletionMessageParam } from 'openai/resources/index';
|
|
10
|
-
import type
|
|
11
|
-
|
|
11
|
+
import { type ModelRuntime } from '../models';
|
|
12
|
+
import type { AIArgs } from '../types';
|
|
13
|
+
import type { JsonParserSource } from './json';
|
|
14
|
+
export { extractJSONFromCodeBlock, normalJsonParser, safeParseJson, } from './json';
|
|
15
|
+
export type { JsonParser } from './json';
|
|
16
|
+
export declare function createChatClient({ modelConfig, }: {
|
|
17
|
+
modelConfig: IModelConfig;
|
|
18
|
+
}): Promise<{
|
|
19
|
+
completion: OpenAI.Chat.Completions;
|
|
20
|
+
modelName: string;
|
|
21
|
+
modelDescription: string;
|
|
22
|
+
modelFamily: TModelFamily | undefined;
|
|
23
|
+
}>;
|
|
24
|
+
export declare function callAI(messages: ChatCompletionMessageParam[], modelRuntime: ModelRuntime, options?: {
|
|
12
25
|
stream?: boolean;
|
|
13
26
|
onChunk?: StreamingCallback;
|
|
14
27
|
abortSignal?: AbortSignal;
|
|
15
|
-
|
|
28
|
+
requiresOriginalImageDetail?: boolean;
|
|
16
29
|
}): Promise<{
|
|
17
30
|
content: string;
|
|
18
31
|
reasoning_content?: string;
|
|
19
32
|
usage?: AIUsageInfo;
|
|
20
33
|
isStreamed: boolean;
|
|
21
34
|
}>;
|
|
22
|
-
export declare function callAIWithObjectResponse<T>(messages: ChatCompletionMessageParam[],
|
|
35
|
+
export declare function callAIWithObjectResponse<T>(messages: ChatCompletionMessageParam[], model: IModelConfig | ModelRuntime, options?: {
|
|
23
36
|
abortSignal?: AbortSignal;
|
|
37
|
+
jsonParserSource?: JsonParserSource;
|
|
24
38
|
}): Promise<{
|
|
25
39
|
content: T;
|
|
26
40
|
contentString: string;
|
|
27
41
|
usage?: AIUsageInfo;
|
|
28
42
|
reasoning_content?: string;
|
|
29
43
|
}>;
|
|
30
|
-
export declare function callAIWithStringResponse(msgs: AIArgs,
|
|
44
|
+
export declare function callAIWithStringResponse(msgs: AIArgs, modelRuntime: ModelRuntime, options?: {
|
|
31
45
|
abortSignal?: AbortSignal;
|
|
32
46
|
}): Promise<{
|
|
33
47
|
content: string;
|
|
34
48
|
usage?: AIUsageInfo;
|
|
35
49
|
}>;
|
|
36
|
-
export declare function extractJSONFromCodeBlock(response: string): string;
|
|
37
|
-
export declare function preprocessDoubaoBboxJson(input: string): string;
|
|
38
|
-
export declare function resolveReasoningConfig({ reasoningEnabled, reasoningEffort, reasoningBudget, modelFamily, }: {
|
|
39
|
-
reasoningEnabled?: boolean;
|
|
40
|
-
reasoningEffort?: string;
|
|
41
|
-
reasoningBudget?: number;
|
|
42
|
-
modelFamily?: TModelFamily;
|
|
43
|
-
}): {
|
|
44
|
-
config: Record<string, unknown>;
|
|
45
|
-
debugMessage?: string;
|
|
46
|
-
};
|
|
47
|
-
/**
|
|
48
|
-
* Normalize a parsed JSON object by trimming whitespace from:
|
|
49
|
-
* 1. All object keys (e.g., " prompt " -> "prompt")
|
|
50
|
-
* 2. String values unless the key is explicitly preserved
|
|
51
|
-
* This handles LLM output that may include leading/trailing spaces.
|
|
52
|
-
*/
|
|
53
|
-
interface ParseModelResponseJsonOptions {
|
|
54
|
-
preserveStringValueKeys?: string[];
|
|
55
|
-
}
|
|
56
|
-
export declare function parseModelResponseJson(input: string, modelFamily: TModelFamily | undefined, options?: ParseModelResponseJsonOptions): any;
|
|
57
|
-
export {};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export declare function extractJSONFromCodeBlock(response: string): string;
|
|
2
|
+
export type JsonParserSource = 'generic-object' | 'planning-action-param' | 'locate' | 'section-locator';
|
|
3
|
+
export interface JsonParserContext {
|
|
4
|
+
source: JsonParserSource;
|
|
5
|
+
preserveStringValueKeys?: string[];
|
|
6
|
+
}
|
|
7
|
+
export type JsonParser = (raw: string, context?: JsonParserContext) => unknown;
|
|
8
|
+
export declare function safeParseJson(raw: string, context?: JsonParserContext): any;
|
|
9
|
+
export declare const normalJsonParser: JsonParser;
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { LocateResultBbox, LocateResultContext, PixelBbox, SectionLocatePixelBboxGroup } from './types';
|
|
2
|
+
export declare function maxPixelIndex(size: number): number;
|
|
3
|
+
export declare function normalizedCoordinateToPixelIndex(value: number, normalizedBy: number, size: number): number;
|
|
4
|
+
export declare function mapNormalizedCoordinatesToPixelBbox(coordinates: LocateResultBbox, normalizedBy: number, width: number, height: number): PixelBbox;
|
|
5
|
+
export declare function expandPointToBbox(x: number, y: number, maxX: number, maxY: number, halfSize: number): LocateResultBbox;
|
|
6
|
+
export declare function finalizePixelBbox(pixelBbox: PixelBbox, rawResult: unknown, { preparedSize, contentSize }: LocateResultContext): PixelBbox;
|
|
7
|
+
export declare function finalizeSectionLocatePixelBboxGroup(result: SectionLocatePixelBboxGroup, rawResult: unknown, ctx: LocateResultContext): SectionLocatePixelBboxGroup;
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
export { createLocateResultAdapter } from './factory';
|
|
2
|
+
export { unwrapCoordinateListLikeInput } from './parse';
|
|
3
|
+
export type { LocateResultBbox, PixelBbox, RawLocateValue, SectionLocatePixelBboxGroup, CustomLocateResultAdapterDefinition, LocateResultAdapter, LocateResultAdapterDefinition, LocateResultCoordinates, LocateResultContext, LocateResultShape, LocateResultPromptSpec, LocateResultValue, NonEmptyArray, StandardLocateResultAdapterDefinition, } from './types';
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { LocateResultValue, ResolvedLocateResultCoordinates } from './types';
|
|
2
|
+
type CoordinateListLikeInput = number[] | string[] | string | (number[] | string[])[];
|
|
3
|
+
export declare function unwrapCoordinateListLikeInput(coordinateList: CoordinateListLikeInput): number[] | string[] | string;
|
|
4
|
+
export declare function parseNumericLocateResult(resolvedCoordinates: ResolvedLocateResultCoordinates, input: unknown): LocateResultValue;
|
|
5
|
+
export {};
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { LocateResultValue, PixelBbox, ResolvedLocateResultCoordinates } from './types';
|
|
2
|
+
export declare function mapLocateResultToPixelBboxByCoordinates(result: LocateResultValue, { preparedSize }: {
|
|
3
|
+
preparedSize: {
|
|
4
|
+
width: number;
|
|
5
|
+
height: number;
|
|
6
|
+
};
|
|
7
|
+
}, resolvedCoordinates: ResolvedLocateResultCoordinates): PixelBbox;
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import type { Bbox } from '../../../types';
|
|
2
|
+
export type { Bbox };
|
|
3
|
+
export type LocateResultBbox = Bbox;
|
|
4
|
+
export type PixelBbox = Bbox;
|
|
5
|
+
export type NonEmptyArray<T> = [T, ...T[]];
|
|
6
|
+
export type RawLocateValue = unknown;
|
|
7
|
+
export type LocateResultValue = {
|
|
8
|
+
type: 'bbox';
|
|
9
|
+
coordinates: LocateResultBbox;
|
|
10
|
+
} | {
|
|
11
|
+
type: 'point';
|
|
12
|
+
coordinates: [number, number];
|
|
13
|
+
};
|
|
14
|
+
export type LocateResultShape = 'bbox' | 'point';
|
|
15
|
+
export interface LocateResultContext {
|
|
16
|
+
preparedSize: {
|
|
17
|
+
width: number;
|
|
18
|
+
height: number;
|
|
19
|
+
};
|
|
20
|
+
contentSize?: {
|
|
21
|
+
width: number;
|
|
22
|
+
height: number;
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
export interface LocateResultPromptSpec {
|
|
26
|
+
resultKey: string;
|
|
27
|
+
resultValueSchema: string;
|
|
28
|
+
resultValueDescription: string;
|
|
29
|
+
resultNoun: string;
|
|
30
|
+
resultNounPlural: string;
|
|
31
|
+
exampleValues: NonEmptyArray<unknown>;
|
|
32
|
+
}
|
|
33
|
+
export interface SectionLocatePixelBboxGroup {
|
|
34
|
+
target: PixelBbox;
|
|
35
|
+
references?: PixelBbox[];
|
|
36
|
+
}
|
|
37
|
+
export interface LocateResultAdapter {
|
|
38
|
+
kind: 'standard' | 'custom';
|
|
39
|
+
promptSpec: LocateResultPromptSpec;
|
|
40
|
+
/**
|
|
41
|
+
* Converts a locate payload to a pixel bbox. This adapter intentionally does
|
|
42
|
+
* not interpret model-level `error` / `errors` fields; callers decide whether
|
|
43
|
+
* those fields should stop the locate flow before invoking the adapter.
|
|
44
|
+
*/
|
|
45
|
+
adaptElementLocateResultToPixelBbox(input: unknown, ctx: LocateResultContext): PixelBbox;
|
|
46
|
+
/**
|
|
47
|
+
* Converts a section locate payload to target/reference pixel bboxes. This
|
|
48
|
+
* adapter intentionally does not interpret model-level `error` / `errors`
|
|
49
|
+
* fields; callers own that policy before invoking the adapter.
|
|
50
|
+
*/
|
|
51
|
+
adaptSectionLocateResultToPixelBboxGroup(input: unknown, ctx: LocateResultContext): SectionLocatePixelBboxGroup;
|
|
52
|
+
adaptPlanningParamToPixelBbox(planningParam: unknown, ctx: LocateResultContext): PixelBbox;
|
|
53
|
+
}
|
|
54
|
+
export interface LocateResultCoordinates {
|
|
55
|
+
shape: LocateResultShape;
|
|
56
|
+
order?: 'xy' | 'yx';
|
|
57
|
+
normalizedBy?: number;
|
|
58
|
+
}
|
|
59
|
+
export interface ResolvedLocateResultCoordinates {
|
|
60
|
+
shape: LocateResultShape;
|
|
61
|
+
order: 'xy' | 'yx';
|
|
62
|
+
normalizedBy?: number;
|
|
63
|
+
}
|
|
64
|
+
export type RawLocateValueParser = (input: RawLocateValue) => LocateResultValue;
|
|
65
|
+
export type LocateResultPixelBboxMapper = (result: LocateResultValue, ctx: LocateResultContext) => PixelBbox;
|
|
66
|
+
/**
|
|
67
|
+
* Declarative config for the standard locate workflow.
|
|
68
|
+
*
|
|
69
|
+
* The standard workflow has three steps:
|
|
70
|
+
* 1. `coordinates` is expanded into prompt wording, a default
|
|
71
|
+
* raw result parser, and a default pixel bbox mapper.
|
|
72
|
+
* 2. `parseRawLocateValue` converts that raw result value into Midscene's
|
|
73
|
+
* internal `LocateResultValue` shape:
|
|
74
|
+
* `{ type: 'bbox' | 'point', coordinates: ... }`. Omit it when the model
|
|
75
|
+
* returns a plain numeric bbox/point matching `coordinates`; provide it when the
|
|
76
|
+
* model needs repair or fallback handling.
|
|
77
|
+
* 3. `mapLocateResultToPixelBbox` converts the parsed result into a pixel bbox
|
|
78
|
+
* `[left, top, right, bottom]`. Omit it when `coordinates` is enough to describe
|
|
79
|
+
* the coordinate system and order; provide it only for model-specific
|
|
80
|
+
* conversion rules.
|
|
81
|
+
*
|
|
82
|
+
* Standard adapters intentionally use fixed result fields (`bbox` / `bbox_2d` /
|
|
83
|
+
* `point` and `references_*`). A previous design considered `pickRawLocateValue`
|
|
84
|
+
* for custom keys, but normal locate, search-area references, and future
|
|
85
|
+
* locateAll responses may return different shapes (single arrays, nested
|
|
86
|
+
* arrays, or object arrays), so a generic picker contract was unclear. A
|
|
87
|
+
* declarative `resultKeys` option is one possible future direction, but without
|
|
88
|
+
* a concrete need we avoid that over-design for now.
|
|
89
|
+
*
|
|
90
|
+
* Example 1: a GLM-like model that directly matches the standard coordinates.
|
|
91
|
+
*
|
|
92
|
+
* ```ts
|
|
93
|
+
* resultAdapter: {
|
|
94
|
+
* coordinates: { shape: 'bbox', order: 'xy', normalizedBy: 1000 },
|
|
95
|
+
* }
|
|
96
|
+
* ```
|
|
97
|
+
*
|
|
98
|
+
* Example 2: Qwen 2.5 returns pixel coordinates, but may return a point-like
|
|
99
|
+
* value that needs custom parsing/fallback. The default pixel bbox mapper is
|
|
100
|
+
* bypassed only if custom fallback sizing is required.
|
|
101
|
+
*
|
|
102
|
+
* ```ts
|
|
103
|
+
* resultAdapter: {
|
|
104
|
+
* coordinates: { shape: 'bbox', order: 'xy' },
|
|
105
|
+
* parseRawLocateValue: parseQwen25RawLocateValue,
|
|
106
|
+
* mapLocateResultToPixelBbox: normalizeQwen25ResultToPixelBbox,
|
|
107
|
+
* }
|
|
108
|
+
* ```
|
|
109
|
+
*
|
|
110
|
+
* Example 3: a model with a custom raw value shape can keep the standard
|
|
111
|
+
* workflow while replacing parsing and mapping.
|
|
112
|
+
*
|
|
113
|
+
* ```ts
|
|
114
|
+
* resultAdapter: {
|
|
115
|
+
* coordinates: { shape: 'bbox', order: 'xy' },
|
|
116
|
+
* parseRawLocateValue: (raw) => ({
|
|
117
|
+
* type: 'bbox',
|
|
118
|
+
* coordinates: [
|
|
119
|
+
* Number((raw as any).left),
|
|
120
|
+
* Number((raw as any).top),
|
|
121
|
+
* Number((raw as any).right),
|
|
122
|
+
* Number((raw as any).bottom),
|
|
123
|
+
* ],
|
|
124
|
+
* }),
|
|
125
|
+
* mapLocateResultToPixelBbox: (result) => result.coordinates,
|
|
126
|
+
* }
|
|
127
|
+
* ```
|
|
128
|
+
*/
|
|
129
|
+
export type StandardLocateResultAdapterDefinition = {
|
|
130
|
+
kind?: 'standard';
|
|
131
|
+
/**
|
|
132
|
+
* Common locate result coordinates shorthand. This is the preferred config surface
|
|
133
|
+
* for normal models because it keeps result type, coordinate system, and
|
|
134
|
+
* coordinate order in one orthogonal field.
|
|
135
|
+
*/
|
|
136
|
+
coordinates: LocateResultCoordinates;
|
|
137
|
+
/**
|
|
138
|
+
* Parses the picked raw value into a `LocateResultValue`. This function
|
|
139
|
+
* should handle response repair and bbox-vs-point fallback only;
|
|
140
|
+
* coordinate-system conversion should stay in `mapLocateResultToPixelBbox`.
|
|
141
|
+
*/
|
|
142
|
+
parseRawLocateValue?: RawLocateValueParser;
|
|
143
|
+
/**
|
|
144
|
+
* Maps the parsed result into a pixel bbox. Most models should omit this
|
|
145
|
+
* and let `coordinates` drive the default conversion. Provide it only when point
|
|
146
|
+
* fallback size, clipping, or coordinate semantics are model-specific.
|
|
147
|
+
*/
|
|
148
|
+
mapLocateResultToPixelBbox?: LocateResultPixelBboxMapper;
|
|
149
|
+
};
|
|
150
|
+
export type CustomLocateResultAdapterDefinition = {
|
|
151
|
+
kind: 'custom';
|
|
152
|
+
promptSpec: LocateResultPromptSpec;
|
|
153
|
+
adaptElementLocateResultToPixelBbox(input: unknown, ctx: LocateResultContext): PixelBbox;
|
|
154
|
+
adaptSectionLocateResultToPixelBboxGroup(input: unknown, ctx: LocateResultContext): SectionLocatePixelBboxGroup;
|
|
155
|
+
adaptPlanningParamToPixelBbox(planningParam: unknown, ctx: LocateResultContext): PixelBbox;
|
|
156
|
+
};
|
|
157
|
+
export type LocateResultAdapterDefinition = StandardLocateResultAdapterDefinition | CustomLocateResultAdapterDefinition;
|