@midscene/core 0.8.7 → 0.8.8-beta-20241223034944.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/ai-model.js +42 -13
- package/dist/lib/env.js +14 -2
- package/dist/lib/index.js +43 -14
- package/dist/lib/types/env.d.ts +11 -2
- package/dist/lib/utils.js +9 -3
- package/package.json +4 -2
- package/report/index.html +2267 -2
package/dist/lib/ai-model.js
CHANGED
|
@@ -4292,7 +4292,9 @@ module.exports = __toCommonJS(ai_model_exports);
|
|
|
4292
4292
|
|
|
4293
4293
|
// src/ai-model/openai/index.ts
|
|
4294
4294
|
var import_node_assert2 = __toESM(require("assert"));
|
|
4295
|
+
var import_identity = require("@azure/identity");
|
|
4295
4296
|
var import_utils = require("@midscene/shared/utils");
|
|
4297
|
+
var import_dirty_json = __toESM(require("dirty-json"));
|
|
4296
4298
|
var import_openai2 = __toESM(require("openai"));
|
|
4297
4299
|
var import_socks_proxy_agent = require("socks-proxy-agent");
|
|
4298
4300
|
|
|
@@ -4307,10 +4309,13 @@ var MIDSCENE_OPENAI_SOCKS_PROXY = "MIDSCENE_OPENAI_SOCKS_PROXY";
|
|
|
4307
4309
|
var OPENAI_API_KEY = "OPENAI_API_KEY";
|
|
4308
4310
|
var OPENAI_BASE_URL = "OPENAI_BASE_URL";
|
|
4309
4311
|
var MIDSCENE_MODEL_TEXT_ONLY = "MIDSCENE_MODEL_TEXT_ONLY";
|
|
4310
|
-
var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
|
|
4311
4312
|
var MIDSCENE_CACHE = "MIDSCENE_CACHE";
|
|
4312
4313
|
var MATCH_BY_POSITION = "MATCH_BY_POSITION";
|
|
4313
4314
|
var MIDSCENE_REPORT_TAG_NAME = "MIDSCENE_REPORT_TAG_NAME";
|
|
4315
|
+
var MIDSCENE_USE_AZURE_OPENAI = "MIDSCENE_USE_AZURE_OPENAI";
|
|
4316
|
+
var MIDSCENE_AZURE_OPENAI_SCOPE = "MIDSCENE_AZURE_OPENAI_SCOPE";
|
|
4317
|
+
var MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON";
|
|
4318
|
+
var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
|
|
4314
4319
|
var allConfigFromEnv = () => {
|
|
4315
4320
|
return {
|
|
4316
4321
|
[MIDSCENE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON] || void 0,
|
|
@@ -4326,7 +4331,10 @@ var allConfigFromEnv = () => {
|
|
|
4326
4331
|
[MIDSCENE_CACHE]: process.env[MIDSCENE_CACHE] || void 0,
|
|
4327
4332
|
[MATCH_BY_POSITION]: process.env[MATCH_BY_POSITION] || void 0,
|
|
4328
4333
|
[MIDSCENE_REPORT_TAG_NAME]: process.env[MIDSCENE_REPORT_TAG_NAME] || void 0,
|
|
4329
|
-
[MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0
|
|
4334
|
+
[MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0,
|
|
4335
|
+
[MIDSCENE_USE_AZURE_OPENAI]: process.env[MIDSCENE_USE_AZURE_OPENAI] || void 0,
|
|
4336
|
+
[MIDSCENE_AZURE_OPENAI_SCOPE]: process.env[MIDSCENE_AZURE_OPENAI_SCOPE] || "https://cognitiveservices.azure.com/.default",
|
|
4337
|
+
[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON] || void 0
|
|
4330
4338
|
};
|
|
4331
4339
|
};
|
|
4332
4340
|
var userConfig = {};
|
|
@@ -4841,7 +4849,7 @@ You are a versatile professional in software UI automation. Your outstanding con
|
|
|
4841
4849
|
|
|
4842
4850
|
- All the actions you composed MUST be based on the page context information you get.
|
|
4843
4851
|
- Trust the "What have been done" field about the task (if any), don't repeat actions in it.
|
|
4844
|
-
- Respond only with valid JSON. Do not write an introduction or summary
|
|
4852
|
+
- Respond only with valid JSON. Do not write an introduction or summary or markdown prefix like \`\`\`json\`.
|
|
4845
4853
|
- If you cannot plan any action at all (i.e. empty actions array), set reason in the \`error\` field.
|
|
4846
4854
|
|
|
4847
4855
|
## About the \`actions\` field
|
|
@@ -4929,7 +4937,6 @@ By viewing the page screenshot and description, you should consider this and out
|
|
|
4929
4937
|
* The "English" option button is not shown in the screenshot now, it means it may only show after the previous actions are finished. So the last action will have a \`null\` value in the \`locate\` field.
|
|
4930
4938
|
* The task cannot be accomplished (because we cannot see the "English" option now), so a \`furtherPlan\` field is needed.
|
|
4931
4939
|
|
|
4932
|
-
\`\`\`json
|
|
4933
4940
|
{
|
|
4934
4941
|
"actions":[
|
|
4935
4942
|
{
|
|
@@ -4960,8 +4967,6 @@ By viewing the page screenshot and description, you should consider this and out
|
|
|
4960
4967
|
"whatHaveDone": "Click the language switch button and wait 1s"
|
|
4961
4968
|
}
|
|
4962
4969
|
}
|
|
4963
|
-
\`\`\`
|
|
4964
|
-
|
|
4965
4970
|
|
|
4966
4971
|
## Example #2 : Tolerate the error situation only when the instruction is an "if" statement
|
|
4967
4972
|
|
|
@@ -4970,7 +4975,6 @@ If the user says "If there is a popup, close it", you should consider this and o
|
|
|
4970
4975
|
* By viewing the page screenshot and description, you cannot find the popup, so the condition is falsy.
|
|
4971
4976
|
* The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`FalsyConditionStatement\` action.
|
|
4972
4977
|
|
|
4973
|
-
\`\`\`json
|
|
4974
4978
|
{
|
|
4975
4979
|
"actions": [{
|
|
4976
4980
|
"thought": "There is no popup on the page",
|
|
@@ -4981,18 +4985,15 @@ If the user says "If there is a popup, close it", you should consider this and o
|
|
|
4981
4985
|
"taskWillBeAccomplished": true,
|
|
4982
4986
|
"furtherPlan": null
|
|
4983
4987
|
}
|
|
4984
|
-
\`\`\`
|
|
4985
4988
|
|
|
4986
4989
|
For contrast, if the user says "Close the popup" in this situation, you should consider this and output the JSON:
|
|
4987
4990
|
|
|
4988
|
-
\`\`\`json
|
|
4989
4991
|
{
|
|
4990
4992
|
"actions": [],
|
|
4991
4993
|
"error": "The instruction and page context are irrelevant, there is no popup on the page",
|
|
4992
4994
|
"taskWillBeAccomplished": true,
|
|
4993
4995
|
"furtherPlan": null
|
|
4994
4996
|
}
|
|
4995
|
-
\`\`\`
|
|
4996
4997
|
|
|
4997
4998
|
## Example #3 : When task is accomplished, don't plan more actions
|
|
4998
4999
|
|
|
@@ -5013,6 +5014,7 @@ When the user ask to "Wait 4s", you should consider this:
|
|
|
5013
5014
|
## Bad case #1 : Missing \`prompt\` in the 'Locate' field; Missing \`furtherPlan\` field when the task won't be accomplished
|
|
5014
5015
|
|
|
5015
5016
|
Wrong output:
|
|
5017
|
+
|
|
5016
5018
|
{
|
|
5017
5019
|
"actions":[
|
|
5018
5020
|
{
|
|
@@ -5129,6 +5131,8 @@ function preferOpenAIModel(preferVendor) {
|
|
|
5129
5131
|
return false;
|
|
5130
5132
|
if (getAIConfig(OPENAI_API_KEY))
|
|
5131
5133
|
return true;
|
|
5134
|
+
if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI))
|
|
5135
|
+
return true;
|
|
5132
5136
|
return Boolean(getAIConfig(MIDSCENE_OPENAI_INIT_CONFIG_JSON));
|
|
5133
5137
|
}
|
|
5134
5138
|
var defaultModel = "gpt-4o-2024-08-06";
|
|
@@ -5153,6 +5157,23 @@ async function createOpenAI() {
|
|
|
5153
5157
|
...extraConfig,
|
|
5154
5158
|
dangerouslyAllowBrowser: true
|
|
5155
5159
|
});
|
|
5160
|
+
} else if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI)) {
|
|
5161
|
+
const scope = getAIConfig(MIDSCENE_AZURE_OPENAI_SCOPE);
|
|
5162
|
+
(0, import_node_assert2.default)(
|
|
5163
|
+
!import_utils.ifInBrowser,
|
|
5164
|
+
"Azure OpenAI is not supported in browser with Midscene."
|
|
5165
|
+
);
|
|
5166
|
+
const credential = new import_identity.DefaultAzureCredential();
|
|
5167
|
+
(0, import_node_assert2.default)(scope, "MIDSCENE_AZURE_OPENAI_SCOPE is required");
|
|
5168
|
+
const tokenProvider = (0, import_identity.getBearerTokenProvider)(credential, scope);
|
|
5169
|
+
const extraAzureConfig = getAIConfigInJson(
|
|
5170
|
+
MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON
|
|
5171
|
+
);
|
|
5172
|
+
openai = new import_openai2.AzureOpenAI({
|
|
5173
|
+
azureADTokenProvider: tokenProvider,
|
|
5174
|
+
...extraConfig,
|
|
5175
|
+
...extraAzureConfig
|
|
5176
|
+
});
|
|
5156
5177
|
} else {
|
|
5157
5178
|
openai = new import_openai2.default({
|
|
5158
5179
|
baseURL: getAIConfig(OPENAI_BASE_URL),
|
|
@@ -5231,12 +5252,20 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
|
|
|
5231
5252
|
let jsonContent = safeJsonParse(response.content);
|
|
5232
5253
|
if (jsonContent)
|
|
5233
5254
|
return { content: jsonContent, usage: response.usage };
|
|
5234
|
-
|
|
5255
|
+
const cleanJsonString = extractJSONFromCodeBlock(response.content);
|
|
5235
5256
|
try {
|
|
5236
|
-
|
|
5257
|
+
jsonContent = JSON.parse(cleanJsonString);
|
|
5237
5258
|
} catch (e) {
|
|
5238
|
-
throw Error(`failed to parse json response: ${response.content}`);
|
|
5239
5259
|
}
|
|
5260
|
+
if (jsonContent)
|
|
5261
|
+
return { content: jsonContent, usage: response.usage };
|
|
5262
|
+
try {
|
|
5263
|
+
jsonContent = import_dirty_json.default.parse(cleanJsonString);
|
|
5264
|
+
} catch (e) {
|
|
5265
|
+
}
|
|
5266
|
+
if (jsonContent)
|
|
5267
|
+
return { content: jsonContent, usage: response.usage };
|
|
5268
|
+
throw Error(`failed to parse json response: ${response.content}`);
|
|
5240
5269
|
}
|
|
5241
5270
|
function extractJSONFromCodeBlock(response) {
|
|
5242
5271
|
const jsonMatch = response.match(/^\s*(\{[\s\S]*\})\s*$/);
|
package/dist/lib/env.js
CHANGED
|
@@ -21,6 +21,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
21
21
|
var env_exports = {};
|
|
22
22
|
__export(env_exports, {
|
|
23
23
|
MATCH_BY_POSITION: () => MATCH_BY_POSITION,
|
|
24
|
+
MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON: () => MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
25
|
+
MIDSCENE_AZURE_OPENAI_SCOPE: () => MIDSCENE_AZURE_OPENAI_SCOPE,
|
|
24
26
|
MIDSCENE_CACHE: () => MIDSCENE_CACHE,
|
|
25
27
|
MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG: () => MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG,
|
|
26
28
|
MIDSCENE_DEBUG_AI_PROFILE: () => MIDSCENE_DEBUG_AI_PROFILE,
|
|
@@ -31,6 +33,7 @@ __export(env_exports, {
|
|
|
31
33
|
MIDSCENE_OPENAI_INIT_CONFIG_JSON: () => MIDSCENE_OPENAI_INIT_CONFIG_JSON,
|
|
32
34
|
MIDSCENE_OPENAI_SOCKS_PROXY: () => MIDSCENE_OPENAI_SOCKS_PROXY,
|
|
33
35
|
MIDSCENE_REPORT_TAG_NAME: () => MIDSCENE_REPORT_TAG_NAME,
|
|
36
|
+
MIDSCENE_USE_AZURE_OPENAI: () => MIDSCENE_USE_AZURE_OPENAI,
|
|
34
37
|
OPENAI_API_KEY: () => OPENAI_API_KEY,
|
|
35
38
|
OPENAI_BASE_URL: () => OPENAI_BASE_URL,
|
|
36
39
|
OPENAI_USE_AZURE: () => OPENAI_USE_AZURE,
|
|
@@ -50,10 +53,13 @@ var MIDSCENE_OPENAI_SOCKS_PROXY = "MIDSCENE_OPENAI_SOCKS_PROXY";
|
|
|
50
53
|
var OPENAI_API_KEY = "OPENAI_API_KEY";
|
|
51
54
|
var OPENAI_BASE_URL = "OPENAI_BASE_URL";
|
|
52
55
|
var MIDSCENE_MODEL_TEXT_ONLY = "MIDSCENE_MODEL_TEXT_ONLY";
|
|
53
|
-
var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
|
|
54
56
|
var MIDSCENE_CACHE = "MIDSCENE_CACHE";
|
|
55
57
|
var MATCH_BY_POSITION = "MATCH_BY_POSITION";
|
|
56
58
|
var MIDSCENE_REPORT_TAG_NAME = "MIDSCENE_REPORT_TAG_NAME";
|
|
59
|
+
var MIDSCENE_USE_AZURE_OPENAI = "MIDSCENE_USE_AZURE_OPENAI";
|
|
60
|
+
var MIDSCENE_AZURE_OPENAI_SCOPE = "MIDSCENE_AZURE_OPENAI_SCOPE";
|
|
61
|
+
var MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON";
|
|
62
|
+
var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
|
|
57
63
|
var allConfigFromEnv = () => {
|
|
58
64
|
return {
|
|
59
65
|
[MIDSCENE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON] || void 0,
|
|
@@ -69,7 +75,10 @@ var allConfigFromEnv = () => {
|
|
|
69
75
|
[MIDSCENE_CACHE]: process.env[MIDSCENE_CACHE] || void 0,
|
|
70
76
|
[MATCH_BY_POSITION]: process.env[MATCH_BY_POSITION] || void 0,
|
|
71
77
|
[MIDSCENE_REPORT_TAG_NAME]: process.env[MIDSCENE_REPORT_TAG_NAME] || void 0,
|
|
72
|
-
[MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0
|
|
78
|
+
[MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0,
|
|
79
|
+
[MIDSCENE_USE_AZURE_OPENAI]: process.env[MIDSCENE_USE_AZURE_OPENAI] || void 0,
|
|
80
|
+
[MIDSCENE_AZURE_OPENAI_SCOPE]: process.env[MIDSCENE_AZURE_OPENAI_SCOPE] || "https://cognitiveservices.azure.com/.default",
|
|
81
|
+
[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON] || void 0
|
|
73
82
|
};
|
|
74
83
|
};
|
|
75
84
|
var userConfig = {};
|
|
@@ -101,6 +110,8 @@ var overrideAIConfig = (newConfig, extendMode) => {
|
|
|
101
110
|
// Annotate the CommonJS export names for ESM import in node:
|
|
102
111
|
0 && (module.exports = {
|
|
103
112
|
MATCH_BY_POSITION,
|
|
113
|
+
MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
114
|
+
MIDSCENE_AZURE_OPENAI_SCOPE,
|
|
104
115
|
MIDSCENE_CACHE,
|
|
105
116
|
MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG,
|
|
106
117
|
MIDSCENE_DEBUG_AI_PROFILE,
|
|
@@ -111,6 +122,7 @@ var overrideAIConfig = (newConfig, extendMode) => {
|
|
|
111
122
|
MIDSCENE_OPENAI_INIT_CONFIG_JSON,
|
|
112
123
|
MIDSCENE_OPENAI_SOCKS_PROXY,
|
|
113
124
|
MIDSCENE_REPORT_TAG_NAME,
|
|
125
|
+
MIDSCENE_USE_AZURE_OPENAI,
|
|
114
126
|
OPENAI_API_KEY,
|
|
115
127
|
OPENAI_BASE_URL,
|
|
116
128
|
OPENAI_USE_AZURE,
|
package/dist/lib/index.js
CHANGED
|
@@ -4316,10 +4316,13 @@ var MIDSCENE_OPENAI_SOCKS_PROXY = "MIDSCENE_OPENAI_SOCKS_PROXY";
|
|
|
4316
4316
|
var OPENAI_API_KEY = "OPENAI_API_KEY";
|
|
4317
4317
|
var OPENAI_BASE_URL = "OPENAI_BASE_URL";
|
|
4318
4318
|
var MIDSCENE_MODEL_TEXT_ONLY = "MIDSCENE_MODEL_TEXT_ONLY";
|
|
4319
|
-
var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
|
|
4320
4319
|
var MIDSCENE_CACHE = "MIDSCENE_CACHE";
|
|
4321
4320
|
var MATCH_BY_POSITION = "MATCH_BY_POSITION";
|
|
4322
4321
|
var MIDSCENE_REPORT_TAG_NAME = "MIDSCENE_REPORT_TAG_NAME";
|
|
4322
|
+
var MIDSCENE_USE_AZURE_OPENAI = "MIDSCENE_USE_AZURE_OPENAI";
|
|
4323
|
+
var MIDSCENE_AZURE_OPENAI_SCOPE = "MIDSCENE_AZURE_OPENAI_SCOPE";
|
|
4324
|
+
var MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON";
|
|
4325
|
+
var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
|
|
4323
4326
|
var allConfigFromEnv = () => {
|
|
4324
4327
|
return {
|
|
4325
4328
|
[MIDSCENE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON] || void 0,
|
|
@@ -4335,7 +4338,10 @@ var allConfigFromEnv = () => {
|
|
|
4335
4338
|
[MIDSCENE_CACHE]: process.env[MIDSCENE_CACHE] || void 0,
|
|
4336
4339
|
[MATCH_BY_POSITION]: process.env[MATCH_BY_POSITION] || void 0,
|
|
4337
4340
|
[MIDSCENE_REPORT_TAG_NAME]: process.env[MIDSCENE_REPORT_TAG_NAME] || void 0,
|
|
4338
|
-
[MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0
|
|
4341
|
+
[MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0,
|
|
4342
|
+
[MIDSCENE_USE_AZURE_OPENAI]: process.env[MIDSCENE_USE_AZURE_OPENAI] || void 0,
|
|
4343
|
+
[MIDSCENE_AZURE_OPENAI_SCOPE]: process.env[MIDSCENE_AZURE_OPENAI_SCOPE] || "https://cognitiveservices.azure.com/.default",
|
|
4344
|
+
[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON] || void 0
|
|
4339
4345
|
};
|
|
4340
4346
|
};
|
|
4341
4347
|
var userConfig = {};
|
|
@@ -4506,7 +4512,7 @@ function stringifyDumpData(data, indents) {
|
|
|
4506
4512
|
return JSON.stringify(data, replacerForPageObject, indents);
|
|
4507
4513
|
}
|
|
4508
4514
|
function getVersion() {
|
|
4509
|
-
return "0.8.
|
|
4515
|
+
return "0.8.8-beta-20241223034944.0";
|
|
4510
4516
|
}
|
|
4511
4517
|
|
|
4512
4518
|
// src/action/executor.ts
|
|
@@ -4685,7 +4691,9 @@ var UIContext = class {
|
|
|
4685
4691
|
};
|
|
4686
4692
|
|
|
4687
4693
|
// src/ai-model/openai/index.ts
|
|
4694
|
+
var import_identity = require("@azure/identity");
|
|
4688
4695
|
var import_utils3 = require("@midscene/shared/utils");
|
|
4696
|
+
var import_dirty_json = __toESM(require("dirty-json"));
|
|
4689
4697
|
var import_openai = __toESM(require("openai"));
|
|
4690
4698
|
var import_socks_proxy_agent = require("socks-proxy-agent");
|
|
4691
4699
|
|
|
@@ -5188,7 +5196,7 @@ You are a versatile professional in software UI automation. Your outstanding con
|
|
|
5188
5196
|
|
|
5189
5197
|
- All the actions you composed MUST be based on the page context information you get.
|
|
5190
5198
|
- Trust the "What have been done" field about the task (if any), don't repeat actions in it.
|
|
5191
|
-
- Respond only with valid JSON. Do not write an introduction or summary
|
|
5199
|
+
- Respond only with valid JSON. Do not write an introduction or summary or markdown prefix like \`\`\`json\`.
|
|
5192
5200
|
- If you cannot plan any action at all (i.e. empty actions array), set reason in the \`error\` field.
|
|
5193
5201
|
|
|
5194
5202
|
## About the \`actions\` field
|
|
@@ -5276,7 +5284,6 @@ By viewing the page screenshot and description, you should consider this and out
|
|
|
5276
5284
|
* The "English" option button is not shown in the screenshot now, it means it may only show after the previous actions are finished. So the last action will have a \`null\` value in the \`locate\` field.
|
|
5277
5285
|
* The task cannot be accomplished (because we cannot see the "English" option now), so a \`furtherPlan\` field is needed.
|
|
5278
5286
|
|
|
5279
|
-
\`\`\`json
|
|
5280
5287
|
{
|
|
5281
5288
|
"actions":[
|
|
5282
5289
|
{
|
|
@@ -5307,8 +5314,6 @@ By viewing the page screenshot and description, you should consider this and out
|
|
|
5307
5314
|
"whatHaveDone": "Click the language switch button and wait 1s"
|
|
5308
5315
|
}
|
|
5309
5316
|
}
|
|
5310
|
-
\`\`\`
|
|
5311
|
-
|
|
5312
5317
|
|
|
5313
5318
|
## Example #2 : Tolerate the error situation only when the instruction is an "if" statement
|
|
5314
5319
|
|
|
@@ -5317,7 +5322,6 @@ If the user says "If there is a popup, close it", you should consider this and o
|
|
|
5317
5322
|
* By viewing the page screenshot and description, you cannot find the popup, so the condition is falsy.
|
|
5318
5323
|
* The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`FalsyConditionStatement\` action.
|
|
5319
5324
|
|
|
5320
|
-
\`\`\`json
|
|
5321
5325
|
{
|
|
5322
5326
|
"actions": [{
|
|
5323
5327
|
"thought": "There is no popup on the page",
|
|
@@ -5328,18 +5332,15 @@ If the user says "If there is a popup, close it", you should consider this and o
|
|
|
5328
5332
|
"taskWillBeAccomplished": true,
|
|
5329
5333
|
"furtherPlan": null
|
|
5330
5334
|
}
|
|
5331
|
-
\`\`\`
|
|
5332
5335
|
|
|
5333
5336
|
For contrast, if the user says "Close the popup" in this situation, you should consider this and output the JSON:
|
|
5334
5337
|
|
|
5335
|
-
\`\`\`json
|
|
5336
5338
|
{
|
|
5337
5339
|
"actions": [],
|
|
5338
5340
|
"error": "The instruction and page context are irrelevant, there is no popup on the page",
|
|
5339
5341
|
"taskWillBeAccomplished": true,
|
|
5340
5342
|
"furtherPlan": null
|
|
5341
5343
|
}
|
|
5342
|
-
\`\`\`
|
|
5343
5344
|
|
|
5344
5345
|
## Example #3 : When task is accomplished, don't plan more actions
|
|
5345
5346
|
|
|
@@ -5360,6 +5361,7 @@ When the user ask to "Wait 4s", you should consider this:
|
|
|
5360
5361
|
## Bad case #1 : Missing \`prompt\` in the 'Locate' field; Missing \`furtherPlan\` field when the task won't be accomplished
|
|
5361
5362
|
|
|
5362
5363
|
Wrong output:
|
|
5364
|
+
|
|
5363
5365
|
{
|
|
5364
5366
|
"actions":[
|
|
5365
5367
|
{
|
|
@@ -5476,6 +5478,8 @@ function preferOpenAIModel(preferVendor) {
|
|
|
5476
5478
|
return false;
|
|
5477
5479
|
if (getAIConfig(OPENAI_API_KEY))
|
|
5478
5480
|
return true;
|
|
5481
|
+
if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI))
|
|
5482
|
+
return true;
|
|
5479
5483
|
return Boolean(getAIConfig(MIDSCENE_OPENAI_INIT_CONFIG_JSON));
|
|
5480
5484
|
}
|
|
5481
5485
|
var defaultModel = "gpt-4o-2024-08-06";
|
|
@@ -5500,6 +5504,23 @@ async function createOpenAI() {
|
|
|
5500
5504
|
...extraConfig,
|
|
5501
5505
|
dangerouslyAllowBrowser: true
|
|
5502
5506
|
});
|
|
5507
|
+
} else if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI)) {
|
|
5508
|
+
const scope = getAIConfig(MIDSCENE_AZURE_OPENAI_SCOPE);
|
|
5509
|
+
(0, import_node_assert4.default)(
|
|
5510
|
+
!import_utils3.ifInBrowser,
|
|
5511
|
+
"Azure OpenAI is not supported in browser with Midscene."
|
|
5512
|
+
);
|
|
5513
|
+
const credential = new import_identity.DefaultAzureCredential();
|
|
5514
|
+
(0, import_node_assert4.default)(scope, "MIDSCENE_AZURE_OPENAI_SCOPE is required");
|
|
5515
|
+
const tokenProvider = (0, import_identity.getBearerTokenProvider)(credential, scope);
|
|
5516
|
+
const extraAzureConfig = getAIConfigInJson(
|
|
5517
|
+
MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON
|
|
5518
|
+
);
|
|
5519
|
+
openai = new import_openai.AzureOpenAI({
|
|
5520
|
+
azureADTokenProvider: tokenProvider,
|
|
5521
|
+
...extraConfig,
|
|
5522
|
+
...extraAzureConfig
|
|
5523
|
+
});
|
|
5503
5524
|
} else {
|
|
5504
5525
|
openai = new import_openai.default({
|
|
5505
5526
|
baseURL: getAIConfig(OPENAI_BASE_URL),
|
|
@@ -5578,12 +5599,20 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
|
|
|
5578
5599
|
let jsonContent = safeJsonParse(response.content);
|
|
5579
5600
|
if (jsonContent)
|
|
5580
5601
|
return { content: jsonContent, usage: response.usage };
|
|
5581
|
-
|
|
5602
|
+
const cleanJsonString = extractJSONFromCodeBlock(response.content);
|
|
5582
5603
|
try {
|
|
5583
|
-
|
|
5604
|
+
jsonContent = JSON.parse(cleanJsonString);
|
|
5584
5605
|
} catch (e) {
|
|
5585
|
-
throw Error(`failed to parse json response: ${response.content}`);
|
|
5586
5606
|
}
|
|
5607
|
+
if (jsonContent)
|
|
5608
|
+
return { content: jsonContent, usage: response.usage };
|
|
5609
|
+
try {
|
|
5610
|
+
jsonContent = import_dirty_json.default.parse(cleanJsonString);
|
|
5611
|
+
} catch (e) {
|
|
5612
|
+
}
|
|
5613
|
+
if (jsonContent)
|
|
5614
|
+
return { content: jsonContent, usage: response.usage };
|
|
5615
|
+
throw Error(`failed to parse json response: ${response.content}`);
|
|
5587
5616
|
}
|
|
5588
5617
|
function extractJSONFromCodeBlock(response) {
|
|
5589
5618
|
const jsonMatch = response.match(/^\s*(\{[\s\S]*\})\s*$/);
|
package/dist/lib/types/env.d.ts
CHANGED
|
@@ -8,10 +8,13 @@ declare const MIDSCENE_OPENAI_SOCKS_PROXY = "MIDSCENE_OPENAI_SOCKS_PROXY";
|
|
|
8
8
|
declare const OPENAI_API_KEY = "OPENAI_API_KEY";
|
|
9
9
|
declare const OPENAI_BASE_URL = "OPENAI_BASE_URL";
|
|
10
10
|
declare const MIDSCENE_MODEL_TEXT_ONLY = "MIDSCENE_MODEL_TEXT_ONLY";
|
|
11
|
-
declare const OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
|
|
12
11
|
declare const MIDSCENE_CACHE = "MIDSCENE_CACHE";
|
|
13
12
|
declare const MATCH_BY_POSITION = "MATCH_BY_POSITION";
|
|
14
13
|
declare const MIDSCENE_REPORT_TAG_NAME = "MIDSCENE_REPORT_TAG_NAME";
|
|
14
|
+
declare const MIDSCENE_USE_AZURE_OPENAI = "MIDSCENE_USE_AZURE_OPENAI";
|
|
15
|
+
declare const MIDSCENE_AZURE_OPENAI_SCOPE = "MIDSCENE_AZURE_OPENAI_SCOPE";
|
|
16
|
+
declare const MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON";
|
|
17
|
+
declare const OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
|
|
15
18
|
declare const allConfigFromEnv: () => {
|
|
16
19
|
MIDSCENE_OPENAI_INIT_CONFIG_JSON: string | undefined;
|
|
17
20
|
MIDSCENE_MODEL_NAME: string | undefined;
|
|
@@ -27,6 +30,9 @@ declare const allConfigFromEnv: () => {
|
|
|
27
30
|
MATCH_BY_POSITION: string | undefined;
|
|
28
31
|
MIDSCENE_REPORT_TAG_NAME: string | undefined;
|
|
29
32
|
MIDSCENE_OPENAI_SOCKS_PROXY: string | undefined;
|
|
33
|
+
MIDSCENE_USE_AZURE_OPENAI: string | undefined;
|
|
34
|
+
MIDSCENE_AZURE_OPENAI_SCOPE: string;
|
|
35
|
+
MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON: string | undefined;
|
|
30
36
|
};
|
|
31
37
|
declare let userConfig: ReturnType<typeof allConfigFromEnv>;
|
|
32
38
|
declare const getAIConfig: (configKey: keyof typeof userConfig) => string | undefined;
|
|
@@ -46,7 +52,10 @@ declare const allAIConfig: () => {
|
|
|
46
52
|
MATCH_BY_POSITION: string | undefined;
|
|
47
53
|
MIDSCENE_REPORT_TAG_NAME: string | undefined;
|
|
48
54
|
MIDSCENE_OPENAI_SOCKS_PROXY: string | undefined;
|
|
55
|
+
MIDSCENE_USE_AZURE_OPENAI: string | undefined;
|
|
56
|
+
MIDSCENE_AZURE_OPENAI_SCOPE: string;
|
|
57
|
+
MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON: string | undefined;
|
|
49
58
|
};
|
|
50
59
|
declare const overrideAIConfig: (newConfig: ReturnType<typeof allConfigFromEnv>, extendMode?: boolean) => void;
|
|
51
60
|
|
|
52
|
-
export { MATCH_BY_POSITION, MIDSCENE_CACHE, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_AI_PROFILE, MIDSCENE_DEBUG_MODE, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_TEXT_ONLY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_REPORT_TAG_NAME, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_USE_AZURE, allAIConfig, getAIConfig, getAIConfigInJson, overrideAIConfig };
|
|
61
|
+
export { MATCH_BY_POSITION, MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_AZURE_OPENAI_SCOPE, MIDSCENE_CACHE, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_AI_PROFILE, MIDSCENE_DEBUG_MODE, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_TEXT_ONLY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_USE_AZURE_OPENAI, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_USE_AZURE, allAIConfig, getAIConfig, getAIConfigInJson, overrideAIConfig };
|
package/dist/lib/utils.js
CHANGED
|
@@ -67,10 +67,13 @@ var MIDSCENE_OPENAI_SOCKS_PROXY = "MIDSCENE_OPENAI_SOCKS_PROXY";
|
|
|
67
67
|
var OPENAI_API_KEY = "OPENAI_API_KEY";
|
|
68
68
|
var OPENAI_BASE_URL = "OPENAI_BASE_URL";
|
|
69
69
|
var MIDSCENE_MODEL_TEXT_ONLY = "MIDSCENE_MODEL_TEXT_ONLY";
|
|
70
|
-
var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
|
|
71
70
|
var MIDSCENE_CACHE = "MIDSCENE_CACHE";
|
|
72
71
|
var MATCH_BY_POSITION = "MATCH_BY_POSITION";
|
|
73
72
|
var MIDSCENE_REPORT_TAG_NAME = "MIDSCENE_REPORT_TAG_NAME";
|
|
73
|
+
var MIDSCENE_USE_AZURE_OPENAI = "MIDSCENE_USE_AZURE_OPENAI";
|
|
74
|
+
var MIDSCENE_AZURE_OPENAI_SCOPE = "MIDSCENE_AZURE_OPENAI_SCOPE";
|
|
75
|
+
var MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON";
|
|
76
|
+
var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
|
|
74
77
|
var allConfigFromEnv = () => {
|
|
75
78
|
return {
|
|
76
79
|
[MIDSCENE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON] || void 0,
|
|
@@ -86,7 +89,10 @@ var allConfigFromEnv = () => {
|
|
|
86
89
|
[MIDSCENE_CACHE]: process.env[MIDSCENE_CACHE] || void 0,
|
|
87
90
|
[MATCH_BY_POSITION]: process.env[MATCH_BY_POSITION] || void 0,
|
|
88
91
|
[MIDSCENE_REPORT_TAG_NAME]: process.env[MIDSCENE_REPORT_TAG_NAME] || void 0,
|
|
89
|
-
[MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0
|
|
92
|
+
[MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0,
|
|
93
|
+
[MIDSCENE_USE_AZURE_OPENAI]: process.env[MIDSCENE_USE_AZURE_OPENAI] || void 0,
|
|
94
|
+
[MIDSCENE_AZURE_OPENAI_SCOPE]: process.env[MIDSCENE_AZURE_OPENAI_SCOPE] || "https://cognitiveservices.azure.com/.default",
|
|
95
|
+
[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON] || void 0
|
|
90
96
|
};
|
|
91
97
|
};
|
|
92
98
|
var userConfig = {};
|
|
@@ -272,7 +278,7 @@ function stringifyDumpData(data, indents) {
|
|
|
272
278
|
return JSON.stringify(data, replacerForPageObject, indents);
|
|
273
279
|
}
|
|
274
280
|
function getVersion() {
|
|
275
|
-
return "0.8.
|
|
281
|
+
return "0.8.8-beta-20241223034944.0";
|
|
276
282
|
}
|
|
277
283
|
function debugLog(...message) {
|
|
278
284
|
const debugMode = getAIConfig(MIDSCENE_DEBUG_MODE);
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@midscene/core",
|
|
3
3
|
"description": "An AI-powered automation SDK can control the page, perform assertions, and extract data in JSON format using natural language. See https://midscenejs.com/ for details.",
|
|
4
|
-
"version": "0.8.
|
|
4
|
+
"version": "0.8.8-beta-20241223034944.0",
|
|
5
5
|
"repository": "https://github.com/web-infra-dev/midscene",
|
|
6
6
|
"homepage": "https://midscenejs.com/",
|
|
7
7
|
"jsnext:source": "./src/index.ts",
|
|
@@ -36,10 +36,12 @@
|
|
|
36
36
|
}
|
|
37
37
|
},
|
|
38
38
|
"dependencies": {
|
|
39
|
+
"@azure/identity": "4.5.0",
|
|
40
|
+
"dirty-json": "0.9.2",
|
|
39
41
|
"openai": "4.57.1",
|
|
40
42
|
"optional": "0.1.4",
|
|
41
43
|
"socks-proxy-agent": "8.0.4",
|
|
42
|
-
"@midscene/shared": "0.8.
|
|
44
|
+
"@midscene/shared": "0.8.8-beta-20241223034944.0"
|
|
43
45
|
},
|
|
44
46
|
"devDependencies": {
|
|
45
47
|
"@modern-js/module-tools": "2.60.6",
|