@midscene/core 0.8.7 → 0.8.8-beta-20241223034944.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4292,7 +4292,9 @@ module.exports = __toCommonJS(ai_model_exports);
4292
4292
 
4293
4293
  // src/ai-model/openai/index.ts
4294
4294
  var import_node_assert2 = __toESM(require("assert"));
4295
+ var import_identity = require("@azure/identity");
4295
4296
  var import_utils = require("@midscene/shared/utils");
4297
+ var import_dirty_json = __toESM(require("dirty-json"));
4296
4298
  var import_openai2 = __toESM(require("openai"));
4297
4299
  var import_socks_proxy_agent = require("socks-proxy-agent");
4298
4300
 
@@ -4307,10 +4309,13 @@ var MIDSCENE_OPENAI_SOCKS_PROXY = "MIDSCENE_OPENAI_SOCKS_PROXY";
4307
4309
  var OPENAI_API_KEY = "OPENAI_API_KEY";
4308
4310
  var OPENAI_BASE_URL = "OPENAI_BASE_URL";
4309
4311
  var MIDSCENE_MODEL_TEXT_ONLY = "MIDSCENE_MODEL_TEXT_ONLY";
4310
- var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
4311
4312
  var MIDSCENE_CACHE = "MIDSCENE_CACHE";
4312
4313
  var MATCH_BY_POSITION = "MATCH_BY_POSITION";
4313
4314
  var MIDSCENE_REPORT_TAG_NAME = "MIDSCENE_REPORT_TAG_NAME";
4315
+ var MIDSCENE_USE_AZURE_OPENAI = "MIDSCENE_USE_AZURE_OPENAI";
4316
+ var MIDSCENE_AZURE_OPENAI_SCOPE = "MIDSCENE_AZURE_OPENAI_SCOPE";
4317
+ var MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON";
4318
+ var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
4314
4319
  var allConfigFromEnv = () => {
4315
4320
  return {
4316
4321
  [MIDSCENE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON] || void 0,
@@ -4326,7 +4331,10 @@ var allConfigFromEnv = () => {
4326
4331
  [MIDSCENE_CACHE]: process.env[MIDSCENE_CACHE] || void 0,
4327
4332
  [MATCH_BY_POSITION]: process.env[MATCH_BY_POSITION] || void 0,
4328
4333
  [MIDSCENE_REPORT_TAG_NAME]: process.env[MIDSCENE_REPORT_TAG_NAME] || void 0,
4329
- [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0
4334
+ [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0,
4335
+ [MIDSCENE_USE_AZURE_OPENAI]: process.env[MIDSCENE_USE_AZURE_OPENAI] || void 0,
4336
+ [MIDSCENE_AZURE_OPENAI_SCOPE]: process.env[MIDSCENE_AZURE_OPENAI_SCOPE] || "https://cognitiveservices.azure.com/.default",
4337
+ [MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON] || void 0
4330
4338
  };
4331
4339
  };
4332
4340
  var userConfig = {};
@@ -4841,7 +4849,7 @@ You are a versatile professional in software UI automation. Your outstanding con
4841
4849
 
4842
4850
  - All the actions you composed MUST be based on the page context information you get.
4843
4851
  - Trust the "What have been done" field about the task (if any), don't repeat actions in it.
4844
- - Respond only with valid JSON. Do not write an introduction or summary.
4852
+ - Respond only with valid JSON. Do not write an introduction or summary or markdown prefix like \`\`\`json\`.
4845
4853
  - If you cannot plan any action at all (i.e. empty actions array), set reason in the \`error\` field.
4846
4854
 
4847
4855
  ## About the \`actions\` field
@@ -4929,7 +4937,6 @@ By viewing the page screenshot and description, you should consider this and out
4929
4937
  * The "English" option button is not shown in the screenshot now, it means it may only show after the previous actions are finished. So the last action will have a \`null\` value in the \`locate\` field.
4930
4938
  * The task cannot be accomplished (because we cannot see the "English" option now), so a \`furtherPlan\` field is needed.
4931
4939
 
4932
- \`\`\`json
4933
4940
  {
4934
4941
  "actions":[
4935
4942
  {
@@ -4960,8 +4967,6 @@ By viewing the page screenshot and description, you should consider this and out
4960
4967
  "whatHaveDone": "Click the language switch button and wait 1s"
4961
4968
  }
4962
4969
  }
4963
- \`\`\`
4964
-
4965
4970
 
4966
4971
  ## Example #2 : Tolerate the error situation only when the instruction is an "if" statement
4967
4972
 
@@ -4970,7 +4975,6 @@ If the user says "If there is a popup, close it", you should consider this and o
4970
4975
  * By viewing the page screenshot and description, you cannot find the popup, so the condition is falsy.
4971
4976
  * The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`FalsyConditionStatement\` action.
4972
4977
 
4973
- \`\`\`json
4974
4978
  {
4975
4979
  "actions": [{
4976
4980
  "thought": "There is no popup on the page",
@@ -4981,18 +4985,15 @@ If the user says "If there is a popup, close it", you should consider this and o
4981
4985
  "taskWillBeAccomplished": true,
4982
4986
  "furtherPlan": null
4983
4987
  }
4984
- \`\`\`
4985
4988
 
4986
4989
  For contrast, if the user says "Close the popup" in this situation, you should consider this and output the JSON:
4987
4990
 
4988
- \`\`\`json
4989
4991
  {
4990
4992
  "actions": [],
4991
4993
  "error": "The instruction and page context are irrelevant, there is no popup on the page",
4992
4994
  "taskWillBeAccomplished": true,
4993
4995
  "furtherPlan": null
4994
4996
  }
4995
- \`\`\`
4996
4997
 
4997
4998
  ## Example #3 : When task is accomplished, don't plan more actions
4998
4999
 
@@ -5013,6 +5014,7 @@ When the user ask to "Wait 4s", you should consider this:
5013
5014
  ## Bad case #1 : Missing \`prompt\` in the 'Locate' field; Missing \`furtherPlan\` field when the task won't be accomplished
5014
5015
 
5015
5016
  Wrong output:
5017
+
5016
5018
  {
5017
5019
  "actions":[
5018
5020
  {
@@ -5129,6 +5131,8 @@ function preferOpenAIModel(preferVendor) {
5129
5131
  return false;
5130
5132
  if (getAIConfig(OPENAI_API_KEY))
5131
5133
  return true;
5134
+ if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI))
5135
+ return true;
5132
5136
  return Boolean(getAIConfig(MIDSCENE_OPENAI_INIT_CONFIG_JSON));
5133
5137
  }
5134
5138
  var defaultModel = "gpt-4o-2024-08-06";
@@ -5153,6 +5157,23 @@ async function createOpenAI() {
5153
5157
  ...extraConfig,
5154
5158
  dangerouslyAllowBrowser: true
5155
5159
  });
5160
+ } else if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI)) {
5161
+ const scope = getAIConfig(MIDSCENE_AZURE_OPENAI_SCOPE);
5162
+ (0, import_node_assert2.default)(
5163
+ !import_utils.ifInBrowser,
5164
+ "Azure OpenAI is not supported in browser with Midscene."
5165
+ );
5166
+ const credential = new import_identity.DefaultAzureCredential();
5167
+ (0, import_node_assert2.default)(scope, "MIDSCENE_AZURE_OPENAI_SCOPE is required");
5168
+ const tokenProvider = (0, import_identity.getBearerTokenProvider)(credential, scope);
5169
+ const extraAzureConfig = getAIConfigInJson(
5170
+ MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON
5171
+ );
5172
+ openai = new import_openai2.AzureOpenAI({
5173
+ azureADTokenProvider: tokenProvider,
5174
+ ...extraConfig,
5175
+ ...extraAzureConfig
5176
+ });
5156
5177
  } else {
5157
5178
  openai = new import_openai2.default({
5158
5179
  baseURL: getAIConfig(OPENAI_BASE_URL),
@@ -5231,12 +5252,20 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
5231
5252
  let jsonContent = safeJsonParse(response.content);
5232
5253
  if (jsonContent)
5233
5254
  return { content: jsonContent, usage: response.usage };
5234
- jsonContent = extractJSONFromCodeBlock(response.content);
5255
+ const cleanJsonString = extractJSONFromCodeBlock(response.content);
5235
5256
  try {
5236
- return { content: JSON.parse(jsonContent), usage: response.usage };
5257
+ jsonContent = JSON.parse(cleanJsonString);
5237
5258
  } catch (e) {
5238
- throw Error(`failed to parse json response: ${response.content}`);
5239
5259
  }
5260
+ if (jsonContent)
5261
+ return { content: jsonContent, usage: response.usage };
5262
+ try {
5263
+ jsonContent = import_dirty_json.default.parse(cleanJsonString);
5264
+ } catch (e) {
5265
+ }
5266
+ if (jsonContent)
5267
+ return { content: jsonContent, usage: response.usage };
5268
+ throw Error(`failed to parse json response: ${response.content}`);
5240
5269
  }
5241
5270
  function extractJSONFromCodeBlock(response) {
5242
5271
  const jsonMatch = response.match(/^\s*(\{[\s\S]*\})\s*$/);
package/dist/lib/env.js CHANGED
@@ -21,6 +21,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
21
21
  var env_exports = {};
22
22
  __export(env_exports, {
23
23
  MATCH_BY_POSITION: () => MATCH_BY_POSITION,
24
+ MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON: () => MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
25
+ MIDSCENE_AZURE_OPENAI_SCOPE: () => MIDSCENE_AZURE_OPENAI_SCOPE,
24
26
  MIDSCENE_CACHE: () => MIDSCENE_CACHE,
25
27
  MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG: () => MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG,
26
28
  MIDSCENE_DEBUG_AI_PROFILE: () => MIDSCENE_DEBUG_AI_PROFILE,
@@ -31,6 +33,7 @@ __export(env_exports, {
31
33
  MIDSCENE_OPENAI_INIT_CONFIG_JSON: () => MIDSCENE_OPENAI_INIT_CONFIG_JSON,
32
34
  MIDSCENE_OPENAI_SOCKS_PROXY: () => MIDSCENE_OPENAI_SOCKS_PROXY,
33
35
  MIDSCENE_REPORT_TAG_NAME: () => MIDSCENE_REPORT_TAG_NAME,
36
+ MIDSCENE_USE_AZURE_OPENAI: () => MIDSCENE_USE_AZURE_OPENAI,
34
37
  OPENAI_API_KEY: () => OPENAI_API_KEY,
35
38
  OPENAI_BASE_URL: () => OPENAI_BASE_URL,
36
39
  OPENAI_USE_AZURE: () => OPENAI_USE_AZURE,
@@ -50,10 +53,13 @@ var MIDSCENE_OPENAI_SOCKS_PROXY = "MIDSCENE_OPENAI_SOCKS_PROXY";
50
53
  var OPENAI_API_KEY = "OPENAI_API_KEY";
51
54
  var OPENAI_BASE_URL = "OPENAI_BASE_URL";
52
55
  var MIDSCENE_MODEL_TEXT_ONLY = "MIDSCENE_MODEL_TEXT_ONLY";
53
- var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
54
56
  var MIDSCENE_CACHE = "MIDSCENE_CACHE";
55
57
  var MATCH_BY_POSITION = "MATCH_BY_POSITION";
56
58
  var MIDSCENE_REPORT_TAG_NAME = "MIDSCENE_REPORT_TAG_NAME";
59
+ var MIDSCENE_USE_AZURE_OPENAI = "MIDSCENE_USE_AZURE_OPENAI";
60
+ var MIDSCENE_AZURE_OPENAI_SCOPE = "MIDSCENE_AZURE_OPENAI_SCOPE";
61
+ var MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON";
62
+ var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
57
63
  var allConfigFromEnv = () => {
58
64
  return {
59
65
  [MIDSCENE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON] || void 0,
@@ -69,7 +75,10 @@ var allConfigFromEnv = () => {
69
75
  [MIDSCENE_CACHE]: process.env[MIDSCENE_CACHE] || void 0,
70
76
  [MATCH_BY_POSITION]: process.env[MATCH_BY_POSITION] || void 0,
71
77
  [MIDSCENE_REPORT_TAG_NAME]: process.env[MIDSCENE_REPORT_TAG_NAME] || void 0,
72
- [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0
78
+ [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0,
79
+ [MIDSCENE_USE_AZURE_OPENAI]: process.env[MIDSCENE_USE_AZURE_OPENAI] || void 0,
80
+ [MIDSCENE_AZURE_OPENAI_SCOPE]: process.env[MIDSCENE_AZURE_OPENAI_SCOPE] || "https://cognitiveservices.azure.com/.default",
81
+ [MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON] || void 0
73
82
  };
74
83
  };
75
84
  var userConfig = {};
@@ -101,6 +110,8 @@ var overrideAIConfig = (newConfig, extendMode) => {
101
110
  // Annotate the CommonJS export names for ESM import in node:
102
111
  0 && (module.exports = {
103
112
  MATCH_BY_POSITION,
113
+ MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
114
+ MIDSCENE_AZURE_OPENAI_SCOPE,
104
115
  MIDSCENE_CACHE,
105
116
  MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG,
106
117
  MIDSCENE_DEBUG_AI_PROFILE,
@@ -111,6 +122,7 @@ var overrideAIConfig = (newConfig, extendMode) => {
111
122
  MIDSCENE_OPENAI_INIT_CONFIG_JSON,
112
123
  MIDSCENE_OPENAI_SOCKS_PROXY,
113
124
  MIDSCENE_REPORT_TAG_NAME,
125
+ MIDSCENE_USE_AZURE_OPENAI,
114
126
  OPENAI_API_KEY,
115
127
  OPENAI_BASE_URL,
116
128
  OPENAI_USE_AZURE,
package/dist/lib/index.js CHANGED
@@ -4316,10 +4316,13 @@ var MIDSCENE_OPENAI_SOCKS_PROXY = "MIDSCENE_OPENAI_SOCKS_PROXY";
4316
4316
  var OPENAI_API_KEY = "OPENAI_API_KEY";
4317
4317
  var OPENAI_BASE_URL = "OPENAI_BASE_URL";
4318
4318
  var MIDSCENE_MODEL_TEXT_ONLY = "MIDSCENE_MODEL_TEXT_ONLY";
4319
- var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
4320
4319
  var MIDSCENE_CACHE = "MIDSCENE_CACHE";
4321
4320
  var MATCH_BY_POSITION = "MATCH_BY_POSITION";
4322
4321
  var MIDSCENE_REPORT_TAG_NAME = "MIDSCENE_REPORT_TAG_NAME";
4322
+ var MIDSCENE_USE_AZURE_OPENAI = "MIDSCENE_USE_AZURE_OPENAI";
4323
+ var MIDSCENE_AZURE_OPENAI_SCOPE = "MIDSCENE_AZURE_OPENAI_SCOPE";
4324
+ var MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON";
4325
+ var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
4323
4326
  var allConfigFromEnv = () => {
4324
4327
  return {
4325
4328
  [MIDSCENE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON] || void 0,
@@ -4335,7 +4338,10 @@ var allConfigFromEnv = () => {
4335
4338
  [MIDSCENE_CACHE]: process.env[MIDSCENE_CACHE] || void 0,
4336
4339
  [MATCH_BY_POSITION]: process.env[MATCH_BY_POSITION] || void 0,
4337
4340
  [MIDSCENE_REPORT_TAG_NAME]: process.env[MIDSCENE_REPORT_TAG_NAME] || void 0,
4338
- [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0
4341
+ [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0,
4342
+ [MIDSCENE_USE_AZURE_OPENAI]: process.env[MIDSCENE_USE_AZURE_OPENAI] || void 0,
4343
+ [MIDSCENE_AZURE_OPENAI_SCOPE]: process.env[MIDSCENE_AZURE_OPENAI_SCOPE] || "https://cognitiveservices.azure.com/.default",
4344
+ [MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON] || void 0
4339
4345
  };
4340
4346
  };
4341
4347
  var userConfig = {};
@@ -4506,7 +4512,7 @@ function stringifyDumpData(data, indents) {
4506
4512
  return JSON.stringify(data, replacerForPageObject, indents);
4507
4513
  }
4508
4514
  function getVersion() {
4509
- return "0.8.7";
4515
+ return "0.8.8-beta-20241223034944.0";
4510
4516
  }
4511
4517
 
4512
4518
  // src/action/executor.ts
@@ -4685,7 +4691,9 @@ var UIContext = class {
4685
4691
  };
4686
4692
 
4687
4693
  // src/ai-model/openai/index.ts
4694
+ var import_identity = require("@azure/identity");
4688
4695
  var import_utils3 = require("@midscene/shared/utils");
4696
+ var import_dirty_json = __toESM(require("dirty-json"));
4689
4697
  var import_openai = __toESM(require("openai"));
4690
4698
  var import_socks_proxy_agent = require("socks-proxy-agent");
4691
4699
 
@@ -5188,7 +5196,7 @@ You are a versatile professional in software UI automation. Your outstanding con
5188
5196
 
5189
5197
  - All the actions you composed MUST be based on the page context information you get.
5190
5198
  - Trust the "What have been done" field about the task (if any), don't repeat actions in it.
5191
- - Respond only with valid JSON. Do not write an introduction or summary.
5199
+ - Respond only with valid JSON. Do not write an introduction or summary or markdown prefix like \`\`\`json\`.
5192
5200
  - If you cannot plan any action at all (i.e. empty actions array), set reason in the \`error\` field.
5193
5201
 
5194
5202
  ## About the \`actions\` field
@@ -5276,7 +5284,6 @@ By viewing the page screenshot and description, you should consider this and out
5276
5284
  * The "English" option button is not shown in the screenshot now, it means it may only show after the previous actions are finished. So the last action will have a \`null\` value in the \`locate\` field.
5277
5285
  * The task cannot be accomplished (because we cannot see the "English" option now), so a \`furtherPlan\` field is needed.
5278
5286
 
5279
- \`\`\`json
5280
5287
  {
5281
5288
  "actions":[
5282
5289
  {
@@ -5307,8 +5314,6 @@ By viewing the page screenshot and description, you should consider this and out
5307
5314
  "whatHaveDone": "Click the language switch button and wait 1s"
5308
5315
  }
5309
5316
  }
5310
- \`\`\`
5311
-
5312
5317
 
5313
5318
  ## Example #2 : Tolerate the error situation only when the instruction is an "if" statement
5314
5319
 
@@ -5317,7 +5322,6 @@ If the user says "If there is a popup, close it", you should consider this and o
5317
5322
  * By viewing the page screenshot and description, you cannot find the popup, so the condition is falsy.
5318
5323
  * The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`FalsyConditionStatement\` action.
5319
5324
 
5320
- \`\`\`json
5321
5325
  {
5322
5326
  "actions": [{
5323
5327
  "thought": "There is no popup on the page",
@@ -5328,18 +5332,15 @@ If the user says "If there is a popup, close it", you should consider this and o
5328
5332
  "taskWillBeAccomplished": true,
5329
5333
  "furtherPlan": null
5330
5334
  }
5331
- \`\`\`
5332
5335
 
5333
5336
  For contrast, if the user says "Close the popup" in this situation, you should consider this and output the JSON:
5334
5337
 
5335
- \`\`\`json
5336
5338
  {
5337
5339
  "actions": [],
5338
5340
  "error": "The instruction and page context are irrelevant, there is no popup on the page",
5339
5341
  "taskWillBeAccomplished": true,
5340
5342
  "furtherPlan": null
5341
5343
  }
5342
- \`\`\`
5343
5344
 
5344
5345
  ## Example #3 : When task is accomplished, don't plan more actions
5345
5346
 
@@ -5360,6 +5361,7 @@ When the user ask to "Wait 4s", you should consider this:
5360
5361
  ## Bad case #1 : Missing \`prompt\` in the 'Locate' field; Missing \`furtherPlan\` field when the task won't be accomplished
5361
5362
 
5362
5363
  Wrong output:
5364
+
5363
5365
  {
5364
5366
  "actions":[
5365
5367
  {
@@ -5476,6 +5478,8 @@ function preferOpenAIModel(preferVendor) {
5476
5478
  return false;
5477
5479
  if (getAIConfig(OPENAI_API_KEY))
5478
5480
  return true;
5481
+ if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI))
5482
+ return true;
5479
5483
  return Boolean(getAIConfig(MIDSCENE_OPENAI_INIT_CONFIG_JSON));
5480
5484
  }
5481
5485
  var defaultModel = "gpt-4o-2024-08-06";
@@ -5500,6 +5504,23 @@ async function createOpenAI() {
5500
5504
  ...extraConfig,
5501
5505
  dangerouslyAllowBrowser: true
5502
5506
  });
5507
+ } else if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI)) {
5508
+ const scope = getAIConfig(MIDSCENE_AZURE_OPENAI_SCOPE);
5509
+ (0, import_node_assert4.default)(
5510
+ !import_utils3.ifInBrowser,
5511
+ "Azure OpenAI is not supported in browser with Midscene."
5512
+ );
5513
+ const credential = new import_identity.DefaultAzureCredential();
5514
+ (0, import_node_assert4.default)(scope, "MIDSCENE_AZURE_OPENAI_SCOPE is required");
5515
+ const tokenProvider = (0, import_identity.getBearerTokenProvider)(credential, scope);
5516
+ const extraAzureConfig = getAIConfigInJson(
5517
+ MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON
5518
+ );
5519
+ openai = new import_openai.AzureOpenAI({
5520
+ azureADTokenProvider: tokenProvider,
5521
+ ...extraConfig,
5522
+ ...extraAzureConfig
5523
+ });
5503
5524
  } else {
5504
5525
  openai = new import_openai.default({
5505
5526
  baseURL: getAIConfig(OPENAI_BASE_URL),
@@ -5578,12 +5599,20 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
5578
5599
  let jsonContent = safeJsonParse(response.content);
5579
5600
  if (jsonContent)
5580
5601
  return { content: jsonContent, usage: response.usage };
5581
- jsonContent = extractJSONFromCodeBlock(response.content);
5602
+ const cleanJsonString = extractJSONFromCodeBlock(response.content);
5582
5603
  try {
5583
- return { content: JSON.parse(jsonContent), usage: response.usage };
5604
+ jsonContent = JSON.parse(cleanJsonString);
5584
5605
  } catch (e) {
5585
- throw Error(`failed to parse json response: ${response.content}`);
5586
5606
  }
5607
+ if (jsonContent)
5608
+ return { content: jsonContent, usage: response.usage };
5609
+ try {
5610
+ jsonContent = import_dirty_json.default.parse(cleanJsonString);
5611
+ } catch (e) {
5612
+ }
5613
+ if (jsonContent)
5614
+ return { content: jsonContent, usage: response.usage };
5615
+ throw Error(`failed to parse json response: ${response.content}`);
5587
5616
  }
5588
5617
  function extractJSONFromCodeBlock(response) {
5589
5618
  const jsonMatch = response.match(/^\s*(\{[\s\S]*\})\s*$/);
@@ -8,10 +8,13 @@ declare const MIDSCENE_OPENAI_SOCKS_PROXY = "MIDSCENE_OPENAI_SOCKS_PROXY";
8
8
  declare const OPENAI_API_KEY = "OPENAI_API_KEY";
9
9
  declare const OPENAI_BASE_URL = "OPENAI_BASE_URL";
10
10
  declare const MIDSCENE_MODEL_TEXT_ONLY = "MIDSCENE_MODEL_TEXT_ONLY";
11
- declare const OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
12
11
  declare const MIDSCENE_CACHE = "MIDSCENE_CACHE";
13
12
  declare const MATCH_BY_POSITION = "MATCH_BY_POSITION";
14
13
  declare const MIDSCENE_REPORT_TAG_NAME = "MIDSCENE_REPORT_TAG_NAME";
14
+ declare const MIDSCENE_USE_AZURE_OPENAI = "MIDSCENE_USE_AZURE_OPENAI";
15
+ declare const MIDSCENE_AZURE_OPENAI_SCOPE = "MIDSCENE_AZURE_OPENAI_SCOPE";
16
+ declare const MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON";
17
+ declare const OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
15
18
  declare const allConfigFromEnv: () => {
16
19
  MIDSCENE_OPENAI_INIT_CONFIG_JSON: string | undefined;
17
20
  MIDSCENE_MODEL_NAME: string | undefined;
@@ -27,6 +30,9 @@ declare const allConfigFromEnv: () => {
27
30
  MATCH_BY_POSITION: string | undefined;
28
31
  MIDSCENE_REPORT_TAG_NAME: string | undefined;
29
32
  MIDSCENE_OPENAI_SOCKS_PROXY: string | undefined;
33
+ MIDSCENE_USE_AZURE_OPENAI: string | undefined;
34
+ MIDSCENE_AZURE_OPENAI_SCOPE: string;
35
+ MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON: string | undefined;
30
36
  };
31
37
  declare let userConfig: ReturnType<typeof allConfigFromEnv>;
32
38
  declare const getAIConfig: (configKey: keyof typeof userConfig) => string | undefined;
@@ -46,7 +52,10 @@ declare const allAIConfig: () => {
46
52
  MATCH_BY_POSITION: string | undefined;
47
53
  MIDSCENE_REPORT_TAG_NAME: string | undefined;
48
54
  MIDSCENE_OPENAI_SOCKS_PROXY: string | undefined;
55
+ MIDSCENE_USE_AZURE_OPENAI: string | undefined;
56
+ MIDSCENE_AZURE_OPENAI_SCOPE: string;
57
+ MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON: string | undefined;
49
58
  };
50
59
  declare const overrideAIConfig: (newConfig: ReturnType<typeof allConfigFromEnv>, extendMode?: boolean) => void;
51
60
 
52
- export { MATCH_BY_POSITION, MIDSCENE_CACHE, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_AI_PROFILE, MIDSCENE_DEBUG_MODE, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_TEXT_ONLY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_REPORT_TAG_NAME, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_USE_AZURE, allAIConfig, getAIConfig, getAIConfigInJson, overrideAIConfig };
61
+ export { MATCH_BY_POSITION, MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_AZURE_OPENAI_SCOPE, MIDSCENE_CACHE, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_AI_PROFILE, MIDSCENE_DEBUG_MODE, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_TEXT_ONLY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_USE_AZURE_OPENAI, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_USE_AZURE, allAIConfig, getAIConfig, getAIConfigInJson, overrideAIConfig };
package/dist/lib/utils.js CHANGED
@@ -67,10 +67,13 @@ var MIDSCENE_OPENAI_SOCKS_PROXY = "MIDSCENE_OPENAI_SOCKS_PROXY";
67
67
  var OPENAI_API_KEY = "OPENAI_API_KEY";
68
68
  var OPENAI_BASE_URL = "OPENAI_BASE_URL";
69
69
  var MIDSCENE_MODEL_TEXT_ONLY = "MIDSCENE_MODEL_TEXT_ONLY";
70
- var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
71
70
  var MIDSCENE_CACHE = "MIDSCENE_CACHE";
72
71
  var MATCH_BY_POSITION = "MATCH_BY_POSITION";
73
72
  var MIDSCENE_REPORT_TAG_NAME = "MIDSCENE_REPORT_TAG_NAME";
73
+ var MIDSCENE_USE_AZURE_OPENAI = "MIDSCENE_USE_AZURE_OPENAI";
74
+ var MIDSCENE_AZURE_OPENAI_SCOPE = "MIDSCENE_AZURE_OPENAI_SCOPE";
75
+ var MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON";
76
+ var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
74
77
  var allConfigFromEnv = () => {
75
78
  return {
76
79
  [MIDSCENE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON] || void 0,
@@ -86,7 +89,10 @@ var allConfigFromEnv = () => {
86
89
  [MIDSCENE_CACHE]: process.env[MIDSCENE_CACHE] || void 0,
87
90
  [MATCH_BY_POSITION]: process.env[MATCH_BY_POSITION] || void 0,
88
91
  [MIDSCENE_REPORT_TAG_NAME]: process.env[MIDSCENE_REPORT_TAG_NAME] || void 0,
89
- [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0
92
+ [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0,
93
+ [MIDSCENE_USE_AZURE_OPENAI]: process.env[MIDSCENE_USE_AZURE_OPENAI] || void 0,
94
+ [MIDSCENE_AZURE_OPENAI_SCOPE]: process.env[MIDSCENE_AZURE_OPENAI_SCOPE] || "https://cognitiveservices.azure.com/.default",
95
+ [MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON] || void 0
90
96
  };
91
97
  };
92
98
  var userConfig = {};
@@ -272,7 +278,7 @@ function stringifyDumpData(data, indents) {
272
278
  return JSON.stringify(data, replacerForPageObject, indents);
273
279
  }
274
280
  function getVersion() {
275
- return "0.8.7";
281
+ return "0.8.8-beta-20241223034944.0";
276
282
  }
277
283
  function debugLog(...message) {
278
284
  const debugMode = getAIConfig(MIDSCENE_DEBUG_MODE);
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "An AI-powered automation SDK can control the page, perform assertions, and extract data in JSON format using natural language. See https://midscenejs.com/ for details.",
4
- "version": "0.8.7",
4
+ "version": "0.8.8-beta-20241223034944.0",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "jsnext:source": "./src/index.ts",
@@ -36,10 +36,12 @@
36
36
  }
37
37
  },
38
38
  "dependencies": {
39
+ "@azure/identity": "4.5.0",
40
+ "dirty-json": "0.9.2",
39
41
  "openai": "4.57.1",
40
42
  "optional": "0.1.4",
41
43
  "socks-proxy-agent": "8.0.4",
42
- "@midscene/shared": "0.8.7"
44
+ "@midscene/shared": "0.8.8-beta-20241223034944.0"
43
45
  },
44
46
  "devDependencies": {
45
47
  "@modern-js/module-tools": "2.60.6",