@midscene/core 0.14.3 → 0.14.4-beta-20250415065130.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/ai-model.d.ts +3 -3
- package/dist/es/ai-model.js +2 -2
- package/dist/es/{chunk-SAXMKI7Z.js → chunk-6MPYG6WR.js} +3 -3
- package/dist/es/{chunk-UBGEKXK7.js → chunk-6SYVFZ6C.js} +4 -1
- package/dist/es/chunk-6SYVFZ6C.js.map +1 -0
- package/dist/es/{chunk-5EO33FHK.js → chunk-QX6K65KH.js} +2 -2
- package/dist/es/env.d.ts +1 -1
- package/dist/es/env.js +1 -1
- package/dist/es/index.d.ts +4 -4
- package/dist/es/index.js +3 -3
- package/dist/es/{llm-planning-bd80e99e.d.ts → llm-planning-d084250f.d.ts} +1 -1
- package/dist/es/tree.d.ts +1 -1
- package/dist/{lib/types-e2a418c3.d.ts → es/types-d2831105.d.ts} +12 -3
- package/dist/es/utils.d.ts +1 -1
- package/dist/es/utils.js +2 -2
- package/dist/lib/ai-model.d.ts +3 -3
- package/dist/lib/ai-model.js +3 -3
- package/dist/lib/{chunk-SAXMKI7Z.js → chunk-6MPYG6WR.js} +5 -5
- package/dist/lib/{chunk-UBGEKXK7.js → chunk-6SYVFZ6C.js} +4 -1
- package/dist/lib/chunk-6SYVFZ6C.js.map +1 -0
- package/dist/lib/{chunk-5EO33FHK.js → chunk-QX6K65KH.js} +48 -48
- package/dist/lib/env.d.ts +1 -1
- package/dist/lib/env.js +2 -2
- package/dist/lib/index.d.ts +4 -4
- package/dist/lib/index.js +20 -20
- package/dist/lib/{llm-planning-bd80e99e.d.ts → llm-planning-d084250f.d.ts} +1 -1
- package/dist/lib/tree.d.ts +1 -1
- package/dist/{es/types-e2a418c3.d.ts → lib/types-d2831105.d.ts} +12 -3
- package/dist/lib/utils.d.ts +1 -1
- package/dist/lib/utils.js +3 -3
- package/dist/types/ai-model.d.ts +3 -3
- package/dist/types/env.d.ts +1 -1
- package/dist/types/index.d.ts +4 -4
- package/dist/types/{llm-planning-bd80e99e.d.ts → llm-planning-d084250f.d.ts} +1 -1
- package/dist/types/tree.d.ts +1 -1
- package/dist/types/{types-e2a418c3.d.ts → types-d2831105.d.ts} +12 -3
- package/dist/types/utils.d.ts +1 -1
- package/package.json +2 -2
- package/report/index.html +19 -19
- package/dist/es/chunk-UBGEKXK7.js.map +0 -1
- package/dist/lib/chunk-UBGEKXK7.js.map +0 -1
- /package/dist/es/{chunk-SAXMKI7Z.js.map → chunk-6MPYG6WR.js.map} +0 -0
- /package/dist/es/{chunk-5EO33FHK.js.map → chunk-QX6K65KH.js.map} +0 -0
- /package/dist/lib/{chunk-SAXMKI7Z.js.map → chunk-6MPYG6WR.js.map} +0 -0
- /package/dist/lib/{chunk-5EO33FHK.js.map → chunk-QX6K65KH.js.map} +0 -0
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
var
|
|
28
|
+
var _chunk6SYVFZ6Cjs = require('./chunk-6SYVFZ6C.js');
|
|
29
29
|
|
|
30
30
|
// src/ai-model/service-caller/index.ts
|
|
31
31
|
var _sdk = require('@anthropic-ai/sdk');
|
|
@@ -118,7 +118,7 @@ function adaptDoubaoBbox(bbox, width, height, errorMsg) {
|
|
|
118
118
|
throw new Error(msg);
|
|
119
119
|
}
|
|
120
120
|
function adaptBbox(bbox, width, height, errorMsg) {
|
|
121
|
-
if (
|
|
121
|
+
if (_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, ) === "doubao-vision") {
|
|
122
122
|
return adaptDoubaoBbox(bbox, width, height, errorMsg);
|
|
123
123
|
}
|
|
124
124
|
return adaptQwenBbox(bbox, errorMsg);
|
|
@@ -598,7 +598,7 @@ async function describeUserPage(context, opt) {
|
|
|
598
598
|
_optionalChain([opt, 'optionalAccess', _10 => _10.filterNonTextContent])
|
|
599
599
|
);
|
|
600
600
|
const sizeDescription = describeSize({ width, height });
|
|
601
|
-
const pageDescription =
|
|
601
|
+
const pageDescription = _chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, ) ? "" : `The size of the page: ${sizeDescription}
|
|
602
602
|
Some of the elements are marked with a rectangle in the screenshot, some are not.
|
|
603
603
|
The page elements tree:
|
|
604
604
|
${contentTree}`;
|
|
@@ -819,7 +819,7 @@ Reason:
|
|
|
819
819
|
* Since the option button is not shown in the screenshot, there are still more actions to be done, so the \`more_actions_needed_by_instruction\` field should be true
|
|
820
820
|
`;
|
|
821
821
|
async function systemPromptToTaskPlanning() {
|
|
822
|
-
if (
|
|
822
|
+
if (_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, )) {
|
|
823
823
|
return systemTemplateOfVLPlanning;
|
|
824
824
|
}
|
|
825
825
|
const promptTemplate = new (0, _prompts.PromptTemplate)({
|
|
@@ -961,7 +961,7 @@ Here is the user's instruction:
|
|
|
961
961
|
`;
|
|
962
962
|
};
|
|
963
963
|
var automationUserPrompt = () => {
|
|
964
|
-
if (
|
|
964
|
+
if (_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, )) {
|
|
965
965
|
return new (0, _prompts.PromptTemplate)({
|
|
966
966
|
template: "{taskBackgroundContext}",
|
|
967
967
|
inputVariables: ["taskBackgroundContext"]
|
|
@@ -981,18 +981,18 @@ pageDescription:
|
|
|
981
981
|
|
|
982
982
|
// src/ai-model/service-caller/index.ts
|
|
983
983
|
function checkAIConfig() {
|
|
984
|
-
if (
|
|
984
|
+
if (_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_API_KEY))
|
|
985
985
|
return true;
|
|
986
|
-
if (
|
|
986
|
+
if (_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_AZURE_OPENAI))
|
|
987
987
|
return true;
|
|
988
|
-
if (
|
|
988
|
+
if (_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.ANTHROPIC_API_KEY))
|
|
989
989
|
return true;
|
|
990
|
-
return Boolean(
|
|
990
|
+
return Boolean(_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_OPENAI_INIT_CONFIG_JSON));
|
|
991
991
|
}
|
|
992
992
|
var debugProfileStats = _logger.getDebug.call(void 0, "ai:profile:stats");
|
|
993
993
|
var debugProfileDetail = _logger.getDebug.call(void 0, "ai:profile:detail");
|
|
994
994
|
var debugCall = _logger.getDebug.call(void 0, "ai:call");
|
|
995
|
-
var shouldPrintTiming =
|
|
995
|
+
var shouldPrintTiming = _chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_DEBUG_AI_PROFILE);
|
|
996
996
|
var debugConfig = "";
|
|
997
997
|
if (shouldPrintTiming) {
|
|
998
998
|
console.warn(
|
|
@@ -1000,7 +1000,7 @@ if (shouldPrintTiming) {
|
|
|
1000
1000
|
);
|
|
1001
1001
|
debugConfig = "ai:profile";
|
|
1002
1002
|
}
|
|
1003
|
-
var shouldPrintAIResponse =
|
|
1003
|
+
var shouldPrintAIResponse = _chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_DEBUG_AI_RESPONSE);
|
|
1004
1004
|
if (shouldPrintAIResponse) {
|
|
1005
1005
|
console.warn(
|
|
1006
1006
|
"MIDSCENE_DEBUG_AI_RESPONSE is deprecated, use DEBUG=midscene:ai:response instead"
|
|
@@ -1017,7 +1017,7 @@ if (debugConfig) {
|
|
|
1017
1017
|
var defaultModel = "gpt-4o";
|
|
1018
1018
|
function getModelName() {
|
|
1019
1019
|
let modelName = defaultModel;
|
|
1020
|
-
const nameInConfig =
|
|
1020
|
+
const nameInConfig = _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_MODEL_NAME);
|
|
1021
1021
|
if (nameInConfig) {
|
|
1022
1022
|
modelName = nameInConfig;
|
|
1023
1023
|
}
|
|
@@ -1027,22 +1027,22 @@ async function createChatClient({
|
|
|
1027
1027
|
AIActionTypeValue
|
|
1028
1028
|
}) {
|
|
1029
1029
|
let openai;
|
|
1030
|
-
const extraConfig =
|
|
1031
|
-
const socksProxy =
|
|
1030
|
+
const extraConfig = _chunk6SYVFZ6Cjs.getAIConfigInJson.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_OPENAI_INIT_CONFIG_JSON);
|
|
1031
|
+
const socksProxy = _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_OPENAI_SOCKS_PROXY);
|
|
1032
1032
|
const socksAgent = socksProxy ? new (0, _socksproxyagent.SocksProxyAgent)(socksProxy) : void 0;
|
|
1033
|
-
if (
|
|
1033
|
+
if (_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_USE_AZURE)) {
|
|
1034
1034
|
openai = new (0, _openai.AzureOpenAI)({
|
|
1035
|
-
baseURL:
|
|
1036
|
-
apiKey:
|
|
1035
|
+
baseURL: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_BASE_URL),
|
|
1036
|
+
apiKey: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_API_KEY),
|
|
1037
1037
|
httpAgent: socksAgent,
|
|
1038
1038
|
...extraConfig,
|
|
1039
1039
|
dangerouslyAllowBrowser: true
|
|
1040
1040
|
});
|
|
1041
|
-
} else if (
|
|
1042
|
-
const extraAzureConfig =
|
|
1043
|
-
|
|
1041
|
+
} else if (_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_AZURE_OPENAI)) {
|
|
1042
|
+
const extraAzureConfig = _chunk6SYVFZ6Cjs.getAIConfigInJson.call(void 0,
|
|
1043
|
+
_chunk6SYVFZ6Cjs.MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON
|
|
1044
1044
|
);
|
|
1045
|
-
const scope =
|
|
1045
|
+
const scope = _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_AZURE_OPENAI_SCOPE);
|
|
1046
1046
|
let tokenProvider = void 0;
|
|
1047
1047
|
if (scope) {
|
|
1048
1048
|
_utils.assert.call(void 0,
|
|
@@ -1054,25 +1054,25 @@ async function createChatClient({
|
|
|
1054
1054
|
tokenProvider = _identity.getBearerTokenProvider.call(void 0, credential, scope);
|
|
1055
1055
|
openai = new (0, _openai.AzureOpenAI)({
|
|
1056
1056
|
azureADTokenProvider: tokenProvider,
|
|
1057
|
-
endpoint:
|
|
1058
|
-
apiVersion:
|
|
1059
|
-
deployment:
|
|
1057
|
+
endpoint: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.AZURE_OPENAI_ENDPOINT),
|
|
1058
|
+
apiVersion: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.AZURE_OPENAI_API_VERSION),
|
|
1059
|
+
deployment: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.AZURE_OPENAI_DEPLOYMENT),
|
|
1060
1060
|
...extraConfig,
|
|
1061
1061
|
...extraAzureConfig
|
|
1062
1062
|
});
|
|
1063
1063
|
} else {
|
|
1064
1064
|
openai = new (0, _openai.AzureOpenAI)({
|
|
1065
|
-
apiKey:
|
|
1066
|
-
endpoint:
|
|
1067
|
-
apiVersion:
|
|
1068
|
-
deployment:
|
|
1065
|
+
apiKey: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.AZURE_OPENAI_KEY),
|
|
1066
|
+
endpoint: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.AZURE_OPENAI_ENDPOINT),
|
|
1067
|
+
apiVersion: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.AZURE_OPENAI_API_VERSION),
|
|
1068
|
+
deployment: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.AZURE_OPENAI_DEPLOYMENT),
|
|
1069
1069
|
dangerouslyAllowBrowser: true,
|
|
1070
1070
|
...extraConfig,
|
|
1071
1071
|
...extraAzureConfig
|
|
1072
1072
|
});
|
|
1073
1073
|
}
|
|
1074
|
-
} else if (!
|
|
1075
|
-
const baseURL =
|
|
1074
|
+
} else if (!_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_ANTHROPIC_SDK)) {
|
|
1075
|
+
const baseURL = _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_BASE_URL);
|
|
1076
1076
|
if (typeof baseURL === "string") {
|
|
1077
1077
|
if (!/^https?:\/\//.test(baseURL)) {
|
|
1078
1078
|
throw new Error(
|
|
@@ -1082,18 +1082,18 @@ Please check your config.`
|
|
|
1082
1082
|
}
|
|
1083
1083
|
}
|
|
1084
1084
|
openai = new (0, _openai2.default)({
|
|
1085
|
-
baseURL:
|
|
1086
|
-
apiKey:
|
|
1085
|
+
baseURL: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_BASE_URL),
|
|
1086
|
+
apiKey: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_API_KEY),
|
|
1087
1087
|
httpAgent: socksAgent,
|
|
1088
1088
|
...extraConfig,
|
|
1089
1089
|
defaultHeaders: {
|
|
1090
1090
|
..._optionalChain([extraConfig, 'optionalAccess', _11 => _11.defaultHeaders]) || {},
|
|
1091
|
-
[
|
|
1091
|
+
[_chunk6SYVFZ6Cjs.MIDSCENE_API_TYPE]: AIActionTypeValue.toString()
|
|
1092
1092
|
},
|
|
1093
1093
|
dangerouslyAllowBrowser: true
|
|
1094
1094
|
});
|
|
1095
1095
|
}
|
|
1096
|
-
if (openai &&
|
|
1096
|
+
if (openai && _chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_LANGSMITH_DEBUG)) {
|
|
1097
1097
|
if (_utils.ifInBrowser) {
|
|
1098
1098
|
throw new Error("langsmith is not supported in browser");
|
|
1099
1099
|
}
|
|
@@ -1107,8 +1107,8 @@ Please check your config.`
|
|
|
1107
1107
|
style: "openai"
|
|
1108
1108
|
};
|
|
1109
1109
|
}
|
|
1110
|
-
if (
|
|
1111
|
-
const apiKey =
|
|
1110
|
+
if (_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_ANTHROPIC_SDK)) {
|
|
1111
|
+
const apiKey = _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.ANTHROPIC_API_KEY);
|
|
1112
1112
|
_utils.assert.call(void 0, apiKey, "ANTHROPIC_API_KEY is required");
|
|
1113
1113
|
openai = new (0, _sdk.Anthropic)({
|
|
1114
1114
|
apiKey,
|
|
@@ -1127,16 +1127,16 @@ async function call(messages, AIActionTypeValue, responseFormat) {
|
|
|
1127
1127
|
const { completion, style } = await createChatClient({
|
|
1128
1128
|
AIActionTypeValue
|
|
1129
1129
|
});
|
|
1130
|
-
const maxTokens =
|
|
1130
|
+
const maxTokens = _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_MAX_TOKENS);
|
|
1131
1131
|
const startTime = Date.now();
|
|
1132
1132
|
const model = getModelName();
|
|
1133
1133
|
let content;
|
|
1134
1134
|
let usage;
|
|
1135
1135
|
const commonConfig = {
|
|
1136
|
-
temperature:
|
|
1136
|
+
temperature: _chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_VLM_UI_TARS) ? 0 : 0.1,
|
|
1137
1137
|
stream: false,
|
|
1138
1138
|
max_tokens: typeof maxTokens === "number" ? maxTokens : Number.parseInt(maxTokens || "2048", 10),
|
|
1139
|
-
...
|
|
1139
|
+
..._chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_QWEN_VL) ? {
|
|
1140
1140
|
vl_high_resolution_images: true
|
|
1141
1141
|
} : {}
|
|
1142
1142
|
};
|
|
@@ -1160,7 +1160,7 @@ async function call(messages, AIActionTypeValue, responseFormat) {
|
|
|
1160
1160
|
throw newError;
|
|
1161
1161
|
}
|
|
1162
1162
|
debugProfileStats(
|
|
1163
|
-
`model, ${model}, mode, ${
|
|
1163
|
+
`model, ${model}, mode, ${_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, ) || "default"}, prompt-tokens, ${_optionalChain([result, 'access', _12 => _12.usage, 'optionalAccess', _13 => _13.prompt_tokens]) || ""}, completion-tokens, ${_optionalChain([result, 'access', _14 => _14.usage, 'optionalAccess', _15 => _15.completion_tokens]) || ""}, total-tokens, ${_optionalChain([result, 'access', _16 => _16.usage, 'optionalAccess', _17 => _17.total_tokens]) || ""}, cost-ms, ${Date.now() - startTime}, requestId, ${result._request_id || ""}`
|
|
1164
1164
|
);
|
|
1165
1165
|
debugProfileDetail("model usage detail: %s", JSON.stringify(result.usage));
|
|
1166
1166
|
_utils.assert.call(void 0,
|
|
@@ -1271,7 +1271,7 @@ function safeParseJson(input) {
|
|
|
1271
1271
|
return _dirtyjson2.default.parse(cleanJsonString);
|
|
1272
1272
|
} catch (e) {
|
|
1273
1273
|
}
|
|
1274
|
-
if (
|
|
1274
|
+
if (_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, ) === "doubao-vision") {
|
|
1275
1275
|
const jsonString = preprocessDoubaoBboxJson(cleanJsonString);
|
|
1276
1276
|
return _dirtyjson2.default.parse(jsonString);
|
|
1277
1277
|
}
|
|
@@ -1411,7 +1411,7 @@ async function AiLocateElement(options) {
|
|
|
1411
1411
|
pageDescription: description,
|
|
1412
1412
|
targetElementDescription
|
|
1413
1413
|
});
|
|
1414
|
-
const systemPrompt = systemPromptToLocateElement(!!
|
|
1414
|
+
const systemPrompt = systemPromptToLocateElement(!!_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, ));
|
|
1415
1415
|
let imagePayload = screenshotBase64WithElementMarker || screenshotBase64;
|
|
1416
1416
|
if (options.searchConfig) {
|
|
1417
1417
|
_utils.assert.call(void 0,
|
|
@@ -1423,7 +1423,7 @@ async function AiLocateElement(options) {
|
|
|
1423
1423
|
"searchArea is provided but its imageBase64 cannot be found. Failed to locate element"
|
|
1424
1424
|
);
|
|
1425
1425
|
imagePayload = options.searchConfig.imageBase64;
|
|
1426
|
-
} else if (
|
|
1426
|
+
} else if (_chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_QWEN_VL)) {
|
|
1427
1427
|
imagePayload = await _img.paddingToMatchBlockByBase64.call(void 0, imagePayload);
|
|
1428
1428
|
}
|
|
1429
1429
|
const msgs = [
|
|
@@ -1542,7 +1542,7 @@ async function AiLocateSection(options) {
|
|
|
1542
1542
|
imageBase64 = await _img.cropByRect.call(void 0,
|
|
1543
1543
|
screenshotBase64,
|
|
1544
1544
|
sectionRect,
|
|
1545
|
-
|
|
1545
|
+
_chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_QWEN_VL)
|
|
1546
1546
|
);
|
|
1547
1547
|
}
|
|
1548
1548
|
return {
|
|
@@ -1609,7 +1609,7 @@ async function AiAssert(options) {
|
|
|
1609
1609
|
_utils.assert.call(void 0, assertion, "assertion should be a string");
|
|
1610
1610
|
const { screenshotBase64 } = context;
|
|
1611
1611
|
const systemPrompt = systemPromptToAssert({
|
|
1612
|
-
isUITars:
|
|
1612
|
+
isUITars: _chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_VLM_UI_TARS)
|
|
1613
1613
|
});
|
|
1614
1614
|
const msgs = [
|
|
1615
1615
|
{ role: "system", content: systemPrompt },
|
|
@@ -1663,7 +1663,7 @@ async function plan(userInstruction, opts) {
|
|
|
1663
1663
|
taskBackgroundContext: taskBackgroundContextText
|
|
1664
1664
|
});
|
|
1665
1665
|
let imagePayload = screenshotBase64WithElementMarker || screenshotBase64;
|
|
1666
|
-
if (
|
|
1666
|
+
if (_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, ) === "qwen-vl") {
|
|
1667
1667
|
imagePayload = await _img.paddingToMatchBlockByBase64.call(void 0, imagePayload);
|
|
1668
1668
|
}
|
|
1669
1669
|
warnGPT4oSizeLimit(size);
|
|
@@ -1698,7 +1698,7 @@ async function plan(userInstruction, opts) {
|
|
|
1698
1698
|
usage
|
|
1699
1699
|
};
|
|
1700
1700
|
_utils.assert.call(void 0, planFromAI, "can't get plans from AI");
|
|
1701
|
-
if (
|
|
1701
|
+
if (_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, )) {
|
|
1702
1702
|
actions.forEach((action) => {
|
|
1703
1703
|
if (action.locate) {
|
|
1704
1704
|
action.locate = fillLocateParam(
|
|
@@ -1888,4 +1888,4 @@ function getPoint(startBox, size) {
|
|
|
1888
1888
|
|
|
1889
1889
|
exports.systemPromptToLocateElement = systemPromptToLocateElement; exports.describeUserPage = describeUserPage; exports.callToGetJSONObject = callToGetJSONObject; exports.callAiFn = callAiFn; exports.adaptBboxToRect = adaptBboxToRect; exports.AiLocateElement = AiLocateElement; exports.AiLocateSection = AiLocateSection; exports.AiExtractElementInfo = AiExtractElementInfo; exports.AiAssert = AiAssert; exports.plan = plan; exports.vlmPlanning = vlmPlanning;
|
|
1890
1890
|
|
|
1891
|
-
//# sourceMappingURL=chunk-
|
|
1891
|
+
//# sourceMappingURL=chunk-QX6K65KH.js.map
|
package/dist/lib/env.d.ts
CHANGED
|
@@ -60,7 +60,7 @@ declare const allConfigFromEnv: () => {
|
|
|
60
60
|
AZURE_OPENAI_DEPLOYMENT: string | undefined;
|
|
61
61
|
};
|
|
62
62
|
declare let userConfig: Partial<ReturnType<typeof allConfigFromEnv>>;
|
|
63
|
-
declare const vlLocateMode: () => "qwen-vl" | "doubao-vision" | "vl-model" | false;
|
|
63
|
+
declare const vlLocateMode: () => "qwen-vl" | "doubao-vision" | "vl-model" | "vlm-ui-tars" | false;
|
|
64
64
|
declare const getAIConfig: (configKey: keyof typeof userConfig) => string | undefined;
|
|
65
65
|
declare const getAIConfigInBoolean: (configKey: keyof typeof userConfig) => boolean;
|
|
66
66
|
declare const getAIConfigInJson: (configKey: keyof typeof userConfig) => any;
|
package/dist/lib/env.js
CHANGED
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
|
|
38
|
-
var
|
|
38
|
+
var _chunk6SYVFZ6Cjs = require('./chunk-6SYVFZ6C.js');
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
|
|
@@ -73,4 +73,4 @@ var _chunkUBGEKXK7js = require('./chunk-UBGEKXK7.js');
|
|
|
73
73
|
|
|
74
74
|
|
|
75
75
|
|
|
76
|
-
exports.ANTHROPIC_API_KEY =
|
|
76
|
+
exports.ANTHROPIC_API_KEY = _chunk6SYVFZ6Cjs.ANTHROPIC_API_KEY; exports.AZURE_OPENAI_API_VERSION = _chunk6SYVFZ6Cjs.AZURE_OPENAI_API_VERSION; exports.AZURE_OPENAI_DEPLOYMENT = _chunk6SYVFZ6Cjs.AZURE_OPENAI_DEPLOYMENT; exports.AZURE_OPENAI_ENDPOINT = _chunk6SYVFZ6Cjs.AZURE_OPENAI_ENDPOINT; exports.AZURE_OPENAI_KEY = _chunk6SYVFZ6Cjs.AZURE_OPENAI_KEY; exports.MATCH_BY_POSITION = _chunk6SYVFZ6Cjs.MATCH_BY_POSITION; exports.MIDSCENE_API_TYPE = _chunk6SYVFZ6Cjs.MIDSCENE_API_TYPE; exports.MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = _chunk6SYVFZ6Cjs.MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON; exports.MIDSCENE_AZURE_OPENAI_SCOPE = _chunk6SYVFZ6Cjs.MIDSCENE_AZURE_OPENAI_SCOPE; exports.MIDSCENE_CACHE = _chunk6SYVFZ6Cjs.MIDSCENE_CACHE; exports.MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG = _chunk6SYVFZ6Cjs.MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG; exports.MIDSCENE_DEBUG_AI_PROFILE = _chunk6SYVFZ6Cjs.MIDSCENE_DEBUG_AI_PROFILE; exports.MIDSCENE_DEBUG_AI_RESPONSE = _chunk6SYVFZ6Cjs.MIDSCENE_DEBUG_AI_RESPONSE; exports.MIDSCENE_DEBUG_MODE = _chunk6SYVFZ6Cjs.MIDSCENE_DEBUG_MODE; exports.MIDSCENE_FORCE_DEEP_THINK = _chunk6SYVFZ6Cjs.MIDSCENE_FORCE_DEEP_THINK; exports.MIDSCENE_LANGSMITH_DEBUG = _chunk6SYVFZ6Cjs.MIDSCENE_LANGSMITH_DEBUG; exports.MIDSCENE_MODEL_NAME = _chunk6SYVFZ6Cjs.MIDSCENE_MODEL_NAME; exports.MIDSCENE_OPENAI_INIT_CONFIG_JSON = _chunk6SYVFZ6Cjs.MIDSCENE_OPENAI_INIT_CONFIG_JSON; exports.MIDSCENE_OPENAI_SOCKS_PROXY = _chunk6SYVFZ6Cjs.MIDSCENE_OPENAI_SOCKS_PROXY; exports.MIDSCENE_REPORT_TAG_NAME = _chunk6SYVFZ6Cjs.MIDSCENE_REPORT_TAG_NAME; exports.MIDSCENE_USE_ANTHROPIC_SDK = _chunk6SYVFZ6Cjs.MIDSCENE_USE_ANTHROPIC_SDK; exports.MIDSCENE_USE_AZURE_OPENAI = _chunk6SYVFZ6Cjs.MIDSCENE_USE_AZURE_OPENAI; exports.MIDSCENE_USE_DOUBAO_VISION = _chunk6SYVFZ6Cjs.MIDSCENE_USE_DOUBAO_VISION; exports.MIDSCENE_USE_QWEN_VL = _chunk6SYVFZ6Cjs.MIDSCENE_USE_QWEN_VL; exports.MIDSCENE_USE_VLM_UI_TARS = _chunk6SYVFZ6Cjs.MIDSCENE_USE_VLM_UI_TARS; exports.MIDSCENE_USE_VL_MODEL = _chunk6SYVFZ6Cjs.MIDSCENE_USE_VL_MODEL; exports.OPENAI_API_KEY = _chunk6SYVFZ6Cjs.OPENAI_API_KEY; exports.OPENAI_BASE_URL = _chunk6SYVFZ6Cjs.OPENAI_BASE_URL; exports.OPENAI_MAX_TOKENS = _chunk6SYVFZ6Cjs.OPENAI_MAX_TOKENS; exports.OPENAI_USE_AZURE = _chunk6SYVFZ6Cjs.OPENAI_USE_AZURE; exports.allAIConfig = _chunk6SYVFZ6Cjs.allAIConfig; exports.getAIConfig = _chunk6SYVFZ6Cjs.getAIConfig; exports.getAIConfigInBoolean = _chunk6SYVFZ6Cjs.getAIConfigInBoolean; exports.getAIConfigInJson = _chunk6SYVFZ6Cjs.getAIConfigInJson; exports.overrideAIConfig = _chunk6SYVFZ6Cjs.overrideAIConfig; exports.vlLocateMode = _chunk6SYVFZ6Cjs.vlLocateMode;
|
package/dist/lib/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightAssertionResponse, A as AISingleElementResponse } from './types-
|
|
2
|
-
export { t as AIAssertionResponse, r as AIDataExtractionResponse, p as AIElementCoordinatesResponse, o as AIElementLocatorResponse, q as AIElementResponse, l as AIResponseFormat, s as AISectionLocatorResponse, m as AISingleElementResponseById, n as AISingleElementResponseByPosition, k as AIUsageInfo, K as AgentAssertOpt, J as AgentWaitForOpt, a2 as BaseAgentParserOpt, C as CallAIFn, a1 as Color, x as DumpMeta, H as ElementById, j as ElementTreeNode, u as EnsureObject, a5 as ExecutionRecorderItem, an as ExecutionTaskAction, am as ExecutionTaskActionApply, al as ExecutionTaskInsightAssertion, ak as ExecutionTaskInsightAssertionApply, aj as ExecutionTaskInsightAssertionParam, ac as ExecutionTaskInsightDumpLog, ae as ExecutionTaskInsightLocate, ad as ExecutionTaskInsightLocateApply, ab as ExecutionTaskInsightLocateOutput, aa as ExecutionTaskInsightLocateParam, ai as ExecutionTaskInsightQuery, ah as ExecutionTaskInsightQueryApply, ag as ExecutionTaskInsightQueryOutput, af as ExecutionTaskInsightQueryParam, ap as ExecutionTaskPlanning, ao as ExecutionTaskPlanningApply, a9 as ExecutionTaskReturn, a6 as ExecutionTaskType, a7 as ExecutorContext,
|
|
3
|
-
import { c as callAiFn } from './llm-planning-
|
|
4
|
-
export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-
|
|
1
|
+
import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightAssertionResponse, A as AISingleElementResponse } from './types-d2831105.js';
|
|
2
|
+
export { t as AIAssertionResponse, r as AIDataExtractionResponse, p as AIElementCoordinatesResponse, o as AIElementLocatorResponse, q as AIElementResponse, l as AIResponseFormat, s as AISectionLocatorResponse, m as AISingleElementResponseById, n as AISingleElementResponseByPosition, k as AIUsageInfo, K as AgentAssertOpt, J as AgentWaitForOpt, a2 as BaseAgentParserOpt, C as CallAIFn, a1 as Color, x as DumpMeta, H as ElementById, j as ElementTreeNode, u as EnsureObject, a5 as ExecutionRecorderItem, an as ExecutionTaskAction, am as ExecutionTaskActionApply, al as ExecutionTaskInsightAssertion, ak as ExecutionTaskInsightAssertionApply, aj as ExecutionTaskInsightAssertionParam, ac as ExecutionTaskInsightDumpLog, ae as ExecutionTaskInsightLocate, ad as ExecutionTaskInsightLocateApply, ab as ExecutionTaskInsightLocateOutput, aa as ExecutionTaskInsightLocateParam, ai as ExecutionTaskInsightQuery, ah as ExecutionTaskInsightQueryApply, ag as ExecutionTaskInsightQueryOutput, af as ExecutionTaskInsightQueryParam, ap as ExecutionTaskPlanning, ao as ExecutionTaskPlanningApply, a9 as ExecutionTaskReturn, a6 as ExecutionTaskType, a7 as ExecutorContext, aH as FreeFn, aq as GroupedActionDump, z as InsightDump, v as InsightExtractParam, G as LiteUISection, ar as LocateOption, w as LocateResultElement, i as MidsceneYamlFlowItem, ax as MidsceneYamlFlowItemAIAction, ay as MidsceneYamlFlowItemAIAssert, aC as MidsceneYamlFlowItemAIHover, aD as MidsceneYamlFlowItemAIInput, aE as MidsceneYamlFlowItemAIKeyboardPress, az as MidsceneYamlFlowItemAIQuery, aF as MidsceneYamlFlowItemAIScroll, aB as MidsceneYamlFlowItemAITap, aA as MidsceneYamlFlowItemAIWaitFor, aG as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, av as MidsceneYamlScriptAndroidEnv, aw as MidsceneYamlScriptEnv, at as MidsceneYamlScriptEnvBase, au as MidsceneYamlScriptWebEnv, h as MidsceneYamlTask, O as OnTaskStartTip, F as PartialInsightDumpFromSDK, T as PlanningAIResponse, Q as PlanningAction, Z as PlanningActionParamAssert, $ as PlanningActionParamError, W as PlanningActionParamHover, X as PlanningActionParamInputOrKeyPress, Y as PlanningActionParamScroll, _ as PlanningActionParamSleep, V as PlanningActionParamTap, a0 as PlanningActionParamWaitFor, N as PlanningLocateParam, a4 as PlaywrightParserOpt, P as Point, a3 as PuppeteerParserOpt, R as Rect, y as ReportDumpWithAttributes, aJ as ScriptPlayerStatusValue, aI as ScriptPlayerTaskStatus, S as Size, a8 as TaskCacheInfo, as as scrollParam } from './types-d2831105.js';
|
|
3
|
+
import { c as callAiFn } from './llm-planning-d084250f.js';
|
|
4
|
+
export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-d084250f.js';
|
|
5
5
|
export { getLogDirByType, getVersion, setLogDir } from './utils.js';
|
|
6
6
|
export { MIDSCENE_MODEL_NAME, getAIConfig } from './env.js';
|
|
7
7
|
import '@midscene/shared/constants';
|
package/dist/lib/index.js
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
var
|
|
6
|
+
var _chunk6MPYG6WRjs = require('./chunk-6MPYG6WR.js');
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
|
|
@@ -12,7 +12,7 @@ var _chunkSAXMKI7Zjs = require('./chunk-SAXMKI7Z.js');
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
var
|
|
15
|
+
var _chunkQX6K65KHjs = require('./chunk-QX6K65KH.js');
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
|
|
@@ -20,7 +20,7 @@ var _chunk5EO33FHKjs = require('./chunk-5EO33FHK.js');
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
var
|
|
23
|
+
var _chunk6SYVFZ6Cjs = require('./chunk-6SYVFZ6C.js');
|
|
24
24
|
|
|
25
25
|
// src/ai-model/action-executor.ts
|
|
26
26
|
var _utils = require('@midscene/shared/utils');
|
|
@@ -166,8 +166,8 @@ ${_optionalChain([this, 'access', _7 => _7.latestErrorTask, 'call', _8 => _8(),
|
|
|
166
166
|
}
|
|
167
167
|
dump() {
|
|
168
168
|
const dumpData = {
|
|
169
|
-
sdkVersion:
|
|
170
|
-
model_name:
|
|
169
|
+
sdkVersion: _chunk6MPYG6WRjs.getVersion.call(void 0, ),
|
|
170
|
+
model_name: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_MODEL_NAME) || "",
|
|
171
171
|
logTime: Date.now(),
|
|
172
172
|
name: this.name,
|
|
173
173
|
tasks: this.tasks
|
|
@@ -184,18 +184,18 @@ var _logger = require('@midscene/shared/logger');
|
|
|
184
184
|
|
|
185
185
|
|
|
186
186
|
function emitInsightDump(data, dumpSubscriber) {
|
|
187
|
-
const logDir =
|
|
187
|
+
const logDir = _chunk6MPYG6WRjs.getLogDir.call(void 0, );
|
|
188
188
|
_utils.assert.call(void 0, logDir, "logDir should be set before writing dump file");
|
|
189
189
|
let modelDescription = "";
|
|
190
|
-
if (
|
|
190
|
+
if (_chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_VLM_UI_TARS)) {
|
|
191
191
|
modelDescription = "vlm-ui-tars mode";
|
|
192
|
-
} else if (
|
|
193
|
-
modelDescription = `${
|
|
192
|
+
} else if (_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, )) {
|
|
193
|
+
modelDescription = `${_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, )} mode`;
|
|
194
194
|
}
|
|
195
195
|
const baseData = {
|
|
196
|
-
sdkVersion:
|
|
196
|
+
sdkVersion: _chunk6MPYG6WRjs.getVersion.call(void 0, ),
|
|
197
197
|
logTime: Date.now(),
|
|
198
|
-
model_name:
|
|
198
|
+
model_name: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_MODEL_NAME) || "",
|
|
199
199
|
model_description: modelDescription
|
|
200
200
|
};
|
|
201
201
|
const finalData = {
|
|
@@ -210,7 +210,7 @@ function emitInsightDump(data, dumpSubscriber) {
|
|
|
210
210
|
var debug = _logger.getDebug.call(void 0, "ai:insight");
|
|
211
211
|
var Insight = class {
|
|
212
212
|
constructor(context, opt) {
|
|
213
|
-
this.aiVendorFn =
|
|
213
|
+
this.aiVendorFn = _chunkQX6K65KHjs.callAiFn;
|
|
214
214
|
_utils.assert.call(void 0, context, "context is required for Insight");
|
|
215
215
|
if (typeof context === "function") {
|
|
216
216
|
this.contextRetrieverFn = context;
|
|
@@ -234,8 +234,8 @@ var Insight = class {
|
|
|
234
234
|
const dumpSubscriber = this.onceDumpUpdatedFn;
|
|
235
235
|
this.onceDumpUpdatedFn = void 0;
|
|
236
236
|
_utils.assert.call(void 0, typeof query === "object", "query should be an object for locate");
|
|
237
|
-
const globalDeepThinkSwitch =
|
|
238
|
-
|
|
237
|
+
const globalDeepThinkSwitch = _chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0,
|
|
238
|
+
_chunk6SYVFZ6Cjs.MIDSCENE_FORCE_DEEP_THINK
|
|
239
239
|
);
|
|
240
240
|
if (globalDeepThinkSwitch) {
|
|
241
241
|
debug("globalDeepThinkSwitch", globalDeepThinkSwitch);
|
|
@@ -244,7 +244,7 @@ var Insight = class {
|
|
|
244
244
|
if (query.deepThink || globalDeepThinkSwitch) {
|
|
245
245
|
searchAreaPrompt = query.prompt;
|
|
246
246
|
}
|
|
247
|
-
if (searchAreaPrompt && !
|
|
247
|
+
if (searchAreaPrompt && !_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, )) {
|
|
248
248
|
console.warn(
|
|
249
249
|
'The "deepThink" feature is not supported with general purposed LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model'
|
|
250
250
|
);
|
|
@@ -256,7 +256,7 @@ var Insight = class {
|
|
|
256
256
|
let searchAreaUsage = void 0;
|
|
257
257
|
let searchAreaResponse = void 0;
|
|
258
258
|
if (searchAreaPrompt) {
|
|
259
|
-
searchAreaResponse = await
|
|
259
|
+
searchAreaResponse = await _chunkQX6K65KHjs.AiLocateSection.call(void 0, {
|
|
260
260
|
context,
|
|
261
261
|
sectionDescription: searchAreaPrompt
|
|
262
262
|
});
|
|
@@ -269,7 +269,7 @@ var Insight = class {
|
|
|
269
269
|
searchArea = searchAreaResponse.rect;
|
|
270
270
|
}
|
|
271
271
|
const startTime = Date.now();
|
|
272
|
-
const { parseResult, rect, elementById, rawResponse, usage } = await
|
|
272
|
+
const { parseResult, rect, elementById, rawResponse, usage } = await _chunkQX6K65KHjs.AiLocateElement.call(void 0, {
|
|
273
273
|
callAI: callAI || this.aiVendorFn,
|
|
274
274
|
context,
|
|
275
275
|
targetElementDescription: queryPrompt,
|
|
@@ -358,7 +358,7 @@ ${parseResult.errors.join("\n")}`;
|
|
|
358
358
|
this.onceDumpUpdatedFn = void 0;
|
|
359
359
|
const context = await this.contextRetrieverFn("extract");
|
|
360
360
|
const startTime = Date.now();
|
|
361
|
-
const { parseResult, usage } = await
|
|
361
|
+
const { parseResult, usage } = await _chunkQX6K65KHjs.AiExtractElementInfo.call(void 0, {
|
|
362
362
|
context,
|
|
363
363
|
dataQuery: dataDemand
|
|
364
364
|
});
|
|
@@ -410,7 +410,7 @@ ${parseResult.errors.join("\n")}`;
|
|
|
410
410
|
this.onceDumpUpdatedFn = void 0;
|
|
411
411
|
const context = await this.contextRetrieverFn("assert");
|
|
412
412
|
const startTime = Date.now();
|
|
413
|
-
const assertResult = await
|
|
413
|
+
const assertResult = await _chunkQX6K65KHjs.AiAssert.call(void 0, {
|
|
414
414
|
assertion,
|
|
415
415
|
context
|
|
416
416
|
});
|
|
@@ -458,6 +458,6 @@ var src_default = Insight;
|
|
|
458
458
|
|
|
459
459
|
|
|
460
460
|
|
|
461
|
-
exports.AiAssert =
|
|
461
|
+
exports.AiAssert = _chunkQX6K65KHjs.AiAssert; exports.AiLocateElement = _chunkQX6K65KHjs.AiLocateElement; exports.Executor = Executor; exports.Insight = Insight; exports.MIDSCENE_MODEL_NAME = _chunk6SYVFZ6Cjs.MIDSCENE_MODEL_NAME; exports.default = src_default; exports.describeUserPage = _chunkQX6K65KHjs.describeUserPage; exports.getAIConfig = _chunk6SYVFZ6Cjs.getAIConfig; exports.getLogDirByType = _chunk6MPYG6WRjs.getLogDirByType; exports.getVersion = _chunk6MPYG6WRjs.getVersion; exports.plan = _chunkQX6K65KHjs.plan; exports.setLogDir = _chunk6MPYG6WRjs.setLogDir;
|
|
462
462
|
|
|
463
463
|
//# sourceMappingURL=index.js.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { k as AIUsageInfo, R as Rect, B as BaseElement, U as UIContext, A as AISingleElementResponse, n as AISingleElementResponseByPosition, o as AIElementLocatorResponse, H as ElementById, r as AIDataExtractionResponse, t as AIAssertionResponse, T as PlanningAIResponse } from './types-
|
|
1
|
+
import { k as AIUsageInfo, R as Rect, B as BaseElement, U as UIContext, A as AISingleElementResponse, n as AISingleElementResponseByPosition, o as AIElementLocatorResponse, H as ElementById, r as AIDataExtractionResponse, t as AIAssertionResponse, T as PlanningAIResponse } from './types-d2831105.js';
|
|
2
2
|
import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
|
|
3
3
|
|
|
4
4
|
type AIArgs = [
|
package/dist/lib/tree.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as _midscene_shared_constants from '@midscene/shared/constants';
|
|
2
|
-
import { B as BaseElement, j as ElementTreeNode } from './types-
|
|
2
|
+
import { B as BaseElement, j as ElementTreeNode } from './types-d2831105.js';
|
|
3
3
|
import 'openai/resources';
|
|
4
4
|
|
|
5
5
|
declare function truncateText(text: string | number | object | undefined, maxLength?: number): string;
|
|
@@ -14,7 +14,9 @@ interface scrollParam {
|
|
|
14
14
|
distance?: null | number;
|
|
15
15
|
}
|
|
16
16
|
interface MidsceneYamlScript {
|
|
17
|
-
target
|
|
17
|
+
target?: MidsceneYamlScriptWebEnv;
|
|
18
|
+
web?: MidsceneYamlScriptWebEnv;
|
|
19
|
+
android?: MidsceneYamlScriptAndroidEnv;
|
|
18
20
|
tasks: MidsceneYamlTask[];
|
|
19
21
|
}
|
|
20
22
|
interface MidsceneYamlTask {
|
|
@@ -22,9 +24,11 @@ interface MidsceneYamlTask {
|
|
|
22
24
|
flow: MidsceneYamlFlowItem[];
|
|
23
25
|
continueOnError?: boolean;
|
|
24
26
|
}
|
|
25
|
-
interface
|
|
27
|
+
interface MidsceneYamlScriptEnvBase {
|
|
26
28
|
output?: string;
|
|
27
29
|
aiActionContext?: string;
|
|
30
|
+
}
|
|
31
|
+
interface MidsceneYamlScriptWebEnv extends MidsceneYamlScriptEnvBase {
|
|
28
32
|
serve?: string;
|
|
29
33
|
url: string;
|
|
30
34
|
userAgent?: string;
|
|
@@ -41,6 +45,11 @@ interface MidsceneYamlScriptEnv {
|
|
|
41
45
|
bridgeMode?: false | 'newTabWithUrl' | 'currentTab';
|
|
42
46
|
closeNewTabsAfterDisconnect?: boolean;
|
|
43
47
|
}
|
|
48
|
+
interface MidsceneYamlScriptAndroidEnv extends MidsceneYamlScriptEnvBase {
|
|
49
|
+
deviceId?: string;
|
|
50
|
+
launch?: string;
|
|
51
|
+
}
|
|
52
|
+
type MidsceneYamlScriptEnv = MidsceneYamlScriptWebEnv | MidsceneYamlScriptAndroidEnv;
|
|
44
53
|
interface MidsceneYamlFlowItemAIAction {
|
|
45
54
|
ai?: string;
|
|
46
55
|
aiAction?: string;
|
|
@@ -410,4 +419,4 @@ interface GroupedActionDump {
|
|
|
410
419
|
executions: ExecutionDump[];
|
|
411
420
|
}
|
|
412
421
|
|
|
413
|
-
export { type PlanningActionParamError as $, type AISingleElementResponse as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type PartialInsightDumpFromSDK as F, type LiteUISection as G, type ElementById as H, type InsightAction as I, type AgentWaitForOpt as J, type AgentAssertOpt as K, type LocateResult as L, type MidsceneYamlScript as M, type PlanningLocateParam as N, type OnTaskStartTip as O, type Point as P, type PlanningAction as Q, type Rect as R, type Size as S, type PlanningAIResponse as T, UIContext as U, type PlanningActionParamTap as V, type PlanningActionParamHover as W, type PlanningActionParamInputOrKeyPress as X, type PlanningActionParamScroll as Y, type PlanningActionParamAssert as Z, type PlanningActionParamSleep as _, type ExecutionTaskProgressOptions as a, type PlanningActionParamWaitFor as a0, type Color as a1, type BaseAgentParserOpt as a2, type PuppeteerParserOpt as a3, type PlaywrightParserOpt as a4, type ExecutionRecorderItem as a5, type ExecutionTaskType as a6, type ExecutorContext as a7, type TaskCacheInfo as a8, type ExecutionTaskReturn as a9, type
|
|
422
|
+
export { type PlanningActionParamError as $, type AISingleElementResponse as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type PartialInsightDumpFromSDK as F, type LiteUISection as G, type ElementById as H, type InsightAction as I, type AgentWaitForOpt as J, type AgentAssertOpt as K, type LocateResult as L, type MidsceneYamlScript as M, type PlanningLocateParam as N, type OnTaskStartTip as O, type Point as P, type PlanningAction as Q, type Rect as R, type Size as S, type PlanningAIResponse as T, UIContext as U, type PlanningActionParamTap as V, type PlanningActionParamHover as W, type PlanningActionParamInputOrKeyPress as X, type PlanningActionParamScroll as Y, type PlanningActionParamAssert as Z, type PlanningActionParamSleep as _, type ExecutionTaskProgressOptions as a, type PlanningActionParamWaitFor as a0, type Color as a1, type BaseAgentParserOpt as a2, type PuppeteerParserOpt as a3, type PlaywrightParserOpt as a4, type ExecutionRecorderItem as a5, type ExecutionTaskType as a6, type ExecutorContext as a7, type TaskCacheInfo as a8, type ExecutionTaskReturn as a9, type MidsceneYamlFlowItemAIWaitFor as aA, type MidsceneYamlFlowItemAITap as aB, type MidsceneYamlFlowItemAIHover as aC, type MidsceneYamlFlowItemAIInput as aD, type MidsceneYamlFlowItemAIKeyboardPress as aE, type MidsceneYamlFlowItemAIScroll as aF, type MidsceneYamlFlowItemSleep as aG, type FreeFn as aH, type ScriptPlayerTaskStatus as aI, type ScriptPlayerStatusValue as aJ, type ExecutionTaskInsightLocateParam as aa, type ExecutionTaskInsightLocateOutput as ab, type ExecutionTaskInsightDumpLog as ac, type ExecutionTaskInsightLocateApply as ad, type ExecutionTaskInsightLocate as ae, type ExecutionTaskInsightQueryParam as af, type ExecutionTaskInsightQueryOutput as ag, type ExecutionTaskInsightQueryApply as ah, type ExecutionTaskInsightQuery as ai, type ExecutionTaskInsightAssertionParam as aj, type ExecutionTaskInsightAssertionApply as ak, type ExecutionTaskInsightAssertion as al, type ExecutionTaskActionApply as am, type ExecutionTaskAction as an, type ExecutionTaskPlanningApply as ao, type ExecutionTaskPlanning as ap, type GroupedActionDump as aq, type LocateOption as ar, type scrollParam as as, type MidsceneYamlScriptEnvBase as at, type MidsceneYamlScriptWebEnv as au, type MidsceneYamlScriptAndroidEnv as av, type MidsceneYamlScriptEnv as aw, type MidsceneYamlFlowItemAIAction as ax, type MidsceneYamlFlowItemAIAssert as ay, type MidsceneYamlFlowItemAIQuery as az, type ExecutionTaskApply as b, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightAssertionResponse as g, type MidsceneYamlTask as h, type MidsceneYamlFlowItem as i, type ElementTreeNode as j, type AIUsageInfo as k, AIResponseFormat as l, type AISingleElementResponseById as m, type AISingleElementResponseByPosition as n, type AIElementLocatorResponse as o, type AIElementCoordinatesResponse as p, type AIElementResponse as q, type AIDataExtractionResponse as r, type AISectionLocatorResponse as s, type AIAssertionResponse as t, type EnsureObject as u, type InsightExtractParam as v, type LocateResultElement as w, type DumpMeta as x, type ReportDumpWithAttributes as y, type InsightDump as z };
|
package/dist/lib/utils.d.ts
CHANGED
package/dist/lib/utils.js
CHANGED
|
@@ -16,8 +16,8 @@
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
var
|
|
20
|
-
require('./chunk-
|
|
19
|
+
var _chunk6MPYG6WRjs = require('./chunk-6MPYG6WR.js');
|
|
20
|
+
require('./chunk-6SYVFZ6C.js');
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
|
|
@@ -36,4 +36,4 @@ require('./chunk-UBGEKXK7.js');
|
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
exports.getLogDir =
|
|
39
|
+
exports.getLogDir = _chunk6MPYG6WRjs.getLogDir; exports.getLogDirByType = _chunk6MPYG6WRjs.getLogDirByType; exports.getTmpDir = _chunk6MPYG6WRjs.getTmpDir; exports.getTmpFile = _chunk6MPYG6WRjs.getTmpFile; exports.getVersion = _chunk6MPYG6WRjs.getVersion; exports.groupedActionDumpFileExt = _chunk6MPYG6WRjs.groupedActionDumpFileExt; exports.overlapped = _chunk6MPYG6WRjs.overlapped; exports.replaceStringWithFirstAppearance = _chunk6MPYG6WRjs.replaceStringWithFirstAppearance; exports.replacerForPageObject = _chunk6MPYG6WRjs.replacerForPageObject; exports.reportHTMLContent = _chunk6MPYG6WRjs.reportHTMLContent; exports.setLogDir = _chunk6MPYG6WRjs.setLogDir; exports.setReportTpl = _chunk6MPYG6WRjs.setReportTpl; exports.sleep = _chunk6MPYG6WRjs.sleep; exports.stringifyDumpData = _chunk6MPYG6WRjs.stringifyDumpData; exports.uploadTestInfoToServer = _chunk6MPYG6WRjs.uploadTestInfoToServer; exports.writeDumpReport = _chunk6MPYG6WRjs.writeDumpReport; exports.writeLogFile = _chunk6MPYG6WRjs.writeLogFile;
|
package/dist/types/ai-model.d.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { k as AIUsageInfo, Q as PlanningAction } from './types-
|
|
1
|
+
import { k as AIUsageInfo, Q as PlanningAction } from './types-d2831105.js';
|
|
2
2
|
import { ChatCompletionMessageParam } from 'openai/resources';
|
|
3
3
|
export { ChatCompletionMessageParam } from 'openai/resources';
|
|
4
|
-
import { b as AIActionType } from './llm-planning-
|
|
5
|
-
export { a as AiAssert, e as AiExtractElementInfo, A as AiLocateElement, f as AiLocateSection, g as adaptBboxToRect, c as callAiFn, d as describeUserPage, p as plan } from './llm-planning-
|
|
4
|
+
import { b as AIActionType } from './llm-planning-d084250f.js';
|
|
5
|
+
export { a as AiAssert, e as AiExtractElementInfo, A as AiLocateElement, f as AiLocateSection, g as adaptBboxToRect, c as callAiFn, d as describeUserPage, p as plan } from './llm-planning-d084250f.js';
|
|
6
6
|
import { actionParser } from '@ui-tars/action-parser';
|
|
7
7
|
import '@midscene/shared/constants';
|
|
8
8
|
|
package/dist/types/env.d.ts
CHANGED
|
@@ -60,7 +60,7 @@ declare const allConfigFromEnv: () => {
|
|
|
60
60
|
AZURE_OPENAI_DEPLOYMENT: string | undefined;
|
|
61
61
|
};
|
|
62
62
|
declare let userConfig: Partial<ReturnType<typeof allConfigFromEnv>>;
|
|
63
|
-
declare const vlLocateMode: () => "qwen-vl" | "doubao-vision" | "vl-model" | false;
|
|
63
|
+
declare const vlLocateMode: () => "qwen-vl" | "doubao-vision" | "vl-model" | "vlm-ui-tars" | false;
|
|
64
64
|
declare const getAIConfig: (configKey: keyof typeof userConfig) => string | undefined;
|
|
65
65
|
declare const getAIConfigInBoolean: (configKey: keyof typeof userConfig) => boolean;
|
|
66
66
|
declare const getAIConfigInJson: (configKey: keyof typeof userConfig) => any;
|
package/dist/types/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightAssertionResponse, A as AISingleElementResponse } from './types-
|
|
2
|
-
export { t as AIAssertionResponse, r as AIDataExtractionResponse, p as AIElementCoordinatesResponse, o as AIElementLocatorResponse, q as AIElementResponse, l as AIResponseFormat, s as AISectionLocatorResponse, m as AISingleElementResponseById, n as AISingleElementResponseByPosition, k as AIUsageInfo, K as AgentAssertOpt, J as AgentWaitForOpt, a2 as BaseAgentParserOpt, C as CallAIFn, a1 as Color, x as DumpMeta, H as ElementById, j as ElementTreeNode, u as EnsureObject, a5 as ExecutionRecorderItem, an as ExecutionTaskAction, am as ExecutionTaskActionApply, al as ExecutionTaskInsightAssertion, ak as ExecutionTaskInsightAssertionApply, aj as ExecutionTaskInsightAssertionParam, ac as ExecutionTaskInsightDumpLog, ae as ExecutionTaskInsightLocate, ad as ExecutionTaskInsightLocateApply, ab as ExecutionTaskInsightLocateOutput, aa as ExecutionTaskInsightLocateParam, ai as ExecutionTaskInsightQuery, ah as ExecutionTaskInsightQueryApply, ag as ExecutionTaskInsightQueryOutput, af as ExecutionTaskInsightQueryParam, ap as ExecutionTaskPlanning, ao as ExecutionTaskPlanningApply, a9 as ExecutionTaskReturn, a6 as ExecutionTaskType, a7 as ExecutorContext,
|
|
3
|
-
import { c as callAiFn } from './llm-planning-
|
|
4
|
-
export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-
|
|
1
|
+
import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightAssertionResponse, A as AISingleElementResponse } from './types-d2831105.js';
|
|
2
|
+
export { t as AIAssertionResponse, r as AIDataExtractionResponse, p as AIElementCoordinatesResponse, o as AIElementLocatorResponse, q as AIElementResponse, l as AIResponseFormat, s as AISectionLocatorResponse, m as AISingleElementResponseById, n as AISingleElementResponseByPosition, k as AIUsageInfo, K as AgentAssertOpt, J as AgentWaitForOpt, a2 as BaseAgentParserOpt, C as CallAIFn, a1 as Color, x as DumpMeta, H as ElementById, j as ElementTreeNode, u as EnsureObject, a5 as ExecutionRecorderItem, an as ExecutionTaskAction, am as ExecutionTaskActionApply, al as ExecutionTaskInsightAssertion, ak as ExecutionTaskInsightAssertionApply, aj as ExecutionTaskInsightAssertionParam, ac as ExecutionTaskInsightDumpLog, ae as ExecutionTaskInsightLocate, ad as ExecutionTaskInsightLocateApply, ab as ExecutionTaskInsightLocateOutput, aa as ExecutionTaskInsightLocateParam, ai as ExecutionTaskInsightQuery, ah as ExecutionTaskInsightQueryApply, ag as ExecutionTaskInsightQueryOutput, af as ExecutionTaskInsightQueryParam, ap as ExecutionTaskPlanning, ao as ExecutionTaskPlanningApply, a9 as ExecutionTaskReturn, a6 as ExecutionTaskType, a7 as ExecutorContext, aH as FreeFn, aq as GroupedActionDump, z as InsightDump, v as InsightExtractParam, G as LiteUISection, ar as LocateOption, w as LocateResultElement, i as MidsceneYamlFlowItem, ax as MidsceneYamlFlowItemAIAction, ay as MidsceneYamlFlowItemAIAssert, aC as MidsceneYamlFlowItemAIHover, aD as MidsceneYamlFlowItemAIInput, aE as MidsceneYamlFlowItemAIKeyboardPress, az as MidsceneYamlFlowItemAIQuery, aF as MidsceneYamlFlowItemAIScroll, aB as MidsceneYamlFlowItemAITap, aA as MidsceneYamlFlowItemAIWaitFor, aG as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, av as MidsceneYamlScriptAndroidEnv, aw as MidsceneYamlScriptEnv, at as MidsceneYamlScriptEnvBase, au as MidsceneYamlScriptWebEnv, h as MidsceneYamlTask, O as OnTaskStartTip, F as PartialInsightDumpFromSDK, T as PlanningAIResponse, Q as PlanningAction, Z as PlanningActionParamAssert, $ as PlanningActionParamError, W as PlanningActionParamHover, X as PlanningActionParamInputOrKeyPress, Y as PlanningActionParamScroll, _ as PlanningActionParamSleep, V as PlanningActionParamTap, a0 as PlanningActionParamWaitFor, N as PlanningLocateParam, a4 as PlaywrightParserOpt, P as Point, a3 as PuppeteerParserOpt, R as Rect, y as ReportDumpWithAttributes, aJ as ScriptPlayerStatusValue, aI as ScriptPlayerTaskStatus, S as Size, a8 as TaskCacheInfo, as as scrollParam } from './types-d2831105.js';
|
|
3
|
+
import { c as callAiFn } from './llm-planning-d084250f.js';
|
|
4
|
+
export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-d084250f.js';
|
|
5
5
|
export { getLogDirByType, getVersion, setLogDir } from './utils.js';
|
|
6
6
|
export { MIDSCENE_MODEL_NAME, getAIConfig } from './env.js';
|
|
7
7
|
import '@midscene/shared/constants';
|