npm - @midscene/core - Versions diffs - 0.8.7 → 0.8.8-beta-20241223034944.0 - Mend

@midscene/core 0.8.7 → 0.8.8-beta-20241223034944.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/lib/ai-model.js CHANGED Viewed

@@ -4292,7 +4292,9 @@ module.exports = __toCommonJS(ai_model_exports);
 // src/ai-model/openai/index.ts
 var import_node_assert2 = __toESM(require("assert"));
+var import_identity = require("@azure/identity");
 var import_utils = require("@midscene/shared/utils");
+var import_dirty_json = __toESM(require("dirty-json"));
 var import_openai2 = __toESM(require("openai"));
 var import_socks_proxy_agent = require("socks-proxy-agent");
@@ -4307,10 +4309,13 @@ var MIDSCENE_OPENAI_SOCKS_PROXY = "MIDSCENE_OPENAI_SOCKS_PROXY";
 var OPENAI_API_KEY = "OPENAI_API_KEY";
 var OPENAI_BASE_URL = "OPENAI_BASE_URL";
 var MIDSCENE_MODEL_TEXT_ONLY = "MIDSCENE_MODEL_TEXT_ONLY";
-var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
 var MIDSCENE_CACHE = "MIDSCENE_CACHE";
 var MATCH_BY_POSITION = "MATCH_BY_POSITION";
 var MIDSCENE_REPORT_TAG_NAME = "MIDSCENE_REPORT_TAG_NAME";
+var MIDSCENE_USE_AZURE_OPENAI = "MIDSCENE_USE_AZURE_OPENAI";
+var MIDSCENE_AZURE_OPENAI_SCOPE = "MIDSCENE_AZURE_OPENAI_SCOPE";
+var MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON";
+var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
 var allConfigFromEnv = () => {
   return {
     [MIDSCENE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON] || void 0,
@@ -4326,7 +4331,10 @@ var allConfigFromEnv = () => {
     [MIDSCENE_CACHE]: process.env[MIDSCENE_CACHE] || void 0,
     [MATCH_BY_POSITION]: process.env[MATCH_BY_POSITION] || void 0,
     [MIDSCENE_REPORT_TAG_NAME]: process.env[MIDSCENE_REPORT_TAG_NAME] || void 0,
-    [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0
+    [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0,
+    [MIDSCENE_USE_AZURE_OPENAI]: process.env[MIDSCENE_USE_AZURE_OPENAI] || void 0,
+    [MIDSCENE_AZURE_OPENAI_SCOPE]: process.env[MIDSCENE_AZURE_OPENAI_SCOPE] || "https://cognitiveservices.azure.com/.default",
+    [MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON] || void 0
   };
 };
 var userConfig = {};
@@ -4841,7 +4849,7 @@ You are a versatile professional in software UI automation. Your outstanding con
 - All the actions you composed MUST be based on the page context information you get.
 - Trust the "What have been done" field about the task (if any), don't repeat actions in it.
-- Respond only with valid JSON. Do not write an introduction or summary.
+- Respond only with valid JSON. Do not write an introduction or summary or markdown prefix like \`\`\`json\`.
 - If you cannot plan any action at all (i.e. empty actions array), set reason in the \`error\` field.
 ## About the \`actions\` field
@@ -4929,7 +4937,6 @@ By viewing the page screenshot and description, you should consider this and out
 * The "English" option button is not shown in the screenshot now, it means it may only show after the previous actions are finished. So the last action will have a \`null\` value in the \`locate\` field.
 * The task cannot be accomplished (because we cannot see the "English" option now), so a \`furtherPlan\` field is needed.
-\`\`\`json
 {
   "actions":[
     {
@@ -4960,8 +4967,6 @@ By viewing the page screenshot and description, you should consider this and out
     "whatHaveDone": "Click the language switch button and wait 1s"
   }
 }
-\`\`\`
 ## Example #2 : Tolerate the error situation only when the instruction is an "if" statement
@@ -4970,7 +4975,6 @@ If the user says "If there is a popup, close it", you should consider this and o
 * By viewing the page screenshot and description, you cannot find the popup, so the condition is falsy.
 * The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`FalsyConditionStatement\` action.
-\`\`\`json
 {
   "actions": [{
       "thought": "There is no popup on the page",
@@ -4981,18 +4985,15 @@ If the user says "If there is a popup, close it", you should consider this and o
   "taskWillBeAccomplished": true,
   "furtherPlan": null
 }
-\`\`\`
 For contrast, if the user says "Close the popup" in this situation, you should consider this and output the JSON:
-\`\`\`json
 {
   "actions": [],
   "error": "The instruction and page context are irrelevant, there is no popup on the page",
   "taskWillBeAccomplished": true,
   "furtherPlan": null
 }
-\`\`\`
 ## Example #3 : When task is accomplished, don't plan more actions
@@ -5013,6 +5014,7 @@ When the user ask to "Wait 4s", you should consider this:
 ## Bad case #1 : Missing \`prompt\` in the 'Locate' field; Missing \`furtherPlan\` field when the task won't be accomplished
 Wrong output:
 {
   "actions":[
     {
@@ -5129,6 +5131,8 @@ function preferOpenAIModel(preferVendor) {
     return false;
   if (getAIConfig(OPENAI_API_KEY))
     return true;
+  if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI))
+    return true;
   return Boolean(getAIConfig(MIDSCENE_OPENAI_INIT_CONFIG_JSON));
 }
 var defaultModel = "gpt-4o-2024-08-06";
@@ -5153,6 +5157,23 @@ async function createOpenAI() {
       ...extraConfig,
       dangerouslyAllowBrowser: true
     });
+  } else if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI)) {
+    const scope = getAIConfig(MIDSCENE_AZURE_OPENAI_SCOPE);
+    (0, import_node_assert2.default)(
+      !import_utils.ifInBrowser,
+      "Azure OpenAI is not supported in browser with Midscene."
+    );
+    const credential = new import_identity.DefaultAzureCredential();
+    (0, import_node_assert2.default)(scope, "MIDSCENE_AZURE_OPENAI_SCOPE is required");
+    const tokenProvider = (0, import_identity.getBearerTokenProvider)(credential, scope);
+    const extraAzureConfig = getAIConfigInJson(
+      MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON
+    );
+    openai = new import_openai2.AzureOpenAI({
+      azureADTokenProvider: tokenProvider,
+      ...extraConfig,
+      ...extraAzureConfig
+    });
   } else {
     openai = new import_openai2.default({
       baseURL: getAIConfig(OPENAI_BASE_URL),
@@ -5231,12 +5252,20 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
   let jsonContent = safeJsonParse(response.content);
   if (jsonContent)
     return { content: jsonContent, usage: response.usage };
-  jsonContent = extractJSONFromCodeBlock(response.content);
+  const cleanJsonString = extractJSONFromCodeBlock(response.content);
   try {
-    return { content: JSON.parse(jsonContent), usage: response.usage };
+    jsonContent = JSON.parse(cleanJsonString);
   } catch (e) {
-    throw Error(`failed to parse json response: ${response.content}`);
   }
+  if (jsonContent)
+    return { content: jsonContent, usage: response.usage };
+  try {
+    jsonContent = import_dirty_json.default.parse(cleanJsonString);
+  } catch (e) {
+  }
+  if (jsonContent)
+    return { content: jsonContent, usage: response.usage };
+  throw Error(`failed to parse json response: ${response.content}`);
 }
 function extractJSONFromCodeBlock(response) {
   const jsonMatch = response.match(/^\s*(\{[\s\S]*\})\s*$/);

package/dist/lib/env.js CHANGED Viewed

@@ -21,6 +21,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
 var env_exports = {};
 __export(env_exports, {
   MATCH_BY_POSITION: () => MATCH_BY_POSITION,
+  MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON: () => MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
+  MIDSCENE_AZURE_OPENAI_SCOPE: () => MIDSCENE_AZURE_OPENAI_SCOPE,
   MIDSCENE_CACHE: () => MIDSCENE_CACHE,
   MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG: () => MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG,
   MIDSCENE_DEBUG_AI_PROFILE: () => MIDSCENE_DEBUG_AI_PROFILE,
@@ -31,6 +33,7 @@ __export(env_exports, {
   MIDSCENE_OPENAI_INIT_CONFIG_JSON: () => MIDSCENE_OPENAI_INIT_CONFIG_JSON,
   MIDSCENE_OPENAI_SOCKS_PROXY: () => MIDSCENE_OPENAI_SOCKS_PROXY,
   MIDSCENE_REPORT_TAG_NAME: () => MIDSCENE_REPORT_TAG_NAME,
+  MIDSCENE_USE_AZURE_OPENAI: () => MIDSCENE_USE_AZURE_OPENAI,
   OPENAI_API_KEY: () => OPENAI_API_KEY,
   OPENAI_BASE_URL: () => OPENAI_BASE_URL,
   OPENAI_USE_AZURE: () => OPENAI_USE_AZURE,
@@ -50,10 +53,13 @@ var MIDSCENE_OPENAI_SOCKS_PROXY = "MIDSCENE_OPENAI_SOCKS_PROXY";
 var OPENAI_API_KEY = "OPENAI_API_KEY";
 var OPENAI_BASE_URL = "OPENAI_BASE_URL";
 var MIDSCENE_MODEL_TEXT_ONLY = "MIDSCENE_MODEL_TEXT_ONLY";
-var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
 var MIDSCENE_CACHE = "MIDSCENE_CACHE";
 var MATCH_BY_POSITION = "MATCH_BY_POSITION";
 var MIDSCENE_REPORT_TAG_NAME = "MIDSCENE_REPORT_TAG_NAME";
+var MIDSCENE_USE_AZURE_OPENAI = "MIDSCENE_USE_AZURE_OPENAI";
+var MIDSCENE_AZURE_OPENAI_SCOPE = "MIDSCENE_AZURE_OPENAI_SCOPE";
+var MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON";
+var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
 var allConfigFromEnv = () => {
   return {
     [MIDSCENE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON] || void 0,
@@ -69,7 +75,10 @@ var allConfigFromEnv = () => {
     [MIDSCENE_CACHE]: process.env[MIDSCENE_CACHE] || void 0,
     [MATCH_BY_POSITION]: process.env[MATCH_BY_POSITION] || void 0,
     [MIDSCENE_REPORT_TAG_NAME]: process.env[MIDSCENE_REPORT_TAG_NAME] || void 0,
-    [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0
+    [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0,
+    [MIDSCENE_USE_AZURE_OPENAI]: process.env[MIDSCENE_USE_AZURE_OPENAI] || void 0,
+    [MIDSCENE_AZURE_OPENAI_SCOPE]: process.env[MIDSCENE_AZURE_OPENAI_SCOPE] || "https://cognitiveservices.azure.com/.default",
+    [MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON] || void 0
   };
 };
 var userConfig = {};
@@ -101,6 +110,8 @@ var overrideAIConfig = (newConfig, extendMode) => {
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
   MATCH_BY_POSITION,
+  MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
+  MIDSCENE_AZURE_OPENAI_SCOPE,
   MIDSCENE_CACHE,
   MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG,
   MIDSCENE_DEBUG_AI_PROFILE,
@@ -111,6 +122,7 @@ var overrideAIConfig = (newConfig, extendMode) => {
   MIDSCENE_OPENAI_INIT_CONFIG_JSON,
   MIDSCENE_OPENAI_SOCKS_PROXY,
   MIDSCENE_REPORT_TAG_NAME,
+  MIDSCENE_USE_AZURE_OPENAI,
   OPENAI_API_KEY,
   OPENAI_BASE_URL,
   OPENAI_USE_AZURE,

package/dist/lib/index.js CHANGED Viewed

@@ -4316,10 +4316,13 @@ var MIDSCENE_OPENAI_SOCKS_PROXY = "MIDSCENE_OPENAI_SOCKS_PROXY";
 var OPENAI_API_KEY = "OPENAI_API_KEY";
 var OPENAI_BASE_URL = "OPENAI_BASE_URL";
 var MIDSCENE_MODEL_TEXT_ONLY = "MIDSCENE_MODEL_TEXT_ONLY";
-var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
 var MIDSCENE_CACHE = "MIDSCENE_CACHE";
 var MATCH_BY_POSITION = "MATCH_BY_POSITION";
 var MIDSCENE_REPORT_TAG_NAME = "MIDSCENE_REPORT_TAG_NAME";
+var MIDSCENE_USE_AZURE_OPENAI = "MIDSCENE_USE_AZURE_OPENAI";
+var MIDSCENE_AZURE_OPENAI_SCOPE = "MIDSCENE_AZURE_OPENAI_SCOPE";
+var MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON";
+var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
 var allConfigFromEnv = () => {
   return {
     [MIDSCENE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON] || void 0,
@@ -4335,7 +4338,10 @@ var allConfigFromEnv = () => {
     [MIDSCENE_CACHE]: process.env[MIDSCENE_CACHE] || void 0,
     [MATCH_BY_POSITION]: process.env[MATCH_BY_POSITION] || void 0,
     [MIDSCENE_REPORT_TAG_NAME]: process.env[MIDSCENE_REPORT_TAG_NAME] || void 0,
-    [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0
+    [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0,
+    [MIDSCENE_USE_AZURE_OPENAI]: process.env[MIDSCENE_USE_AZURE_OPENAI] || void 0,
+    [MIDSCENE_AZURE_OPENAI_SCOPE]: process.env[MIDSCENE_AZURE_OPENAI_SCOPE] || "https://cognitiveservices.azure.com/.default",
+    [MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON] || void 0
   };
 };
 var userConfig = {};
@@ -4506,7 +4512,7 @@ function stringifyDumpData(data, indents) {
   return JSON.stringify(data, replacerForPageObject, indents);
 }
 function getVersion() {
-  return "0.8.7";
+  return "0.8.8-beta-20241223034944.0";
 }
 // src/action/executor.ts
@@ -4685,7 +4691,9 @@ var UIContext = class {
 };
 // src/ai-model/openai/index.ts
+var import_identity = require("@azure/identity");
 var import_utils3 = require("@midscene/shared/utils");
+var import_dirty_json = __toESM(require("dirty-json"));
 var import_openai = __toESM(require("openai"));
 var import_socks_proxy_agent = require("socks-proxy-agent");
@@ -5188,7 +5196,7 @@ You are a versatile professional in software UI automation. Your outstanding con
 - All the actions you composed MUST be based on the page context information you get.
 - Trust the "What have been done" field about the task (if any), don't repeat actions in it.
-- Respond only with valid JSON. Do not write an introduction or summary.
+- Respond only with valid JSON. Do not write an introduction or summary or markdown prefix like \`\`\`json\`.
 - If you cannot plan any action at all (i.e. empty actions array), set reason in the \`error\` field.
 ## About the \`actions\` field
@@ -5276,7 +5284,6 @@ By viewing the page screenshot and description, you should consider this and out
 * The "English" option button is not shown in the screenshot now, it means it may only show after the previous actions are finished. So the last action will have a \`null\` value in the \`locate\` field.
 * The task cannot be accomplished (because we cannot see the "English" option now), so a \`furtherPlan\` field is needed.
-\`\`\`json
 {
   "actions":[
     {
@@ -5307,8 +5314,6 @@ By viewing the page screenshot and description, you should consider this and out
     "whatHaveDone": "Click the language switch button and wait 1s"
   }
 }
-\`\`\`
 ## Example #2 : Tolerate the error situation only when the instruction is an "if" statement
@@ -5317,7 +5322,6 @@ If the user says "If there is a popup, close it", you should consider this and o
 * By viewing the page screenshot and description, you cannot find the popup, so the condition is falsy.
 * The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`FalsyConditionStatement\` action.
-\`\`\`json
 {
   "actions": [{
       "thought": "There is no popup on the page",
@@ -5328,18 +5332,15 @@ If the user says "If there is a popup, close it", you should consider this and o
   "taskWillBeAccomplished": true,
   "furtherPlan": null
 }
-\`\`\`
 For contrast, if the user says "Close the popup" in this situation, you should consider this and output the JSON:
-\`\`\`json
 {
   "actions": [],
   "error": "The instruction and page context are irrelevant, there is no popup on the page",
   "taskWillBeAccomplished": true,
   "furtherPlan": null
 }
-\`\`\`
 ## Example #3 : When task is accomplished, don't plan more actions
@@ -5360,6 +5361,7 @@ When the user ask to "Wait 4s", you should consider this:
 ## Bad case #1 : Missing \`prompt\` in the 'Locate' field; Missing \`furtherPlan\` field when the task won't be accomplished
 Wrong output:
 {
   "actions":[
     {
@@ -5476,6 +5478,8 @@ function preferOpenAIModel(preferVendor) {
     return false;
   if (getAIConfig(OPENAI_API_KEY))
     return true;
+  if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI))
+    return true;
   return Boolean(getAIConfig(MIDSCENE_OPENAI_INIT_CONFIG_JSON));
 }
 var defaultModel = "gpt-4o-2024-08-06";
@@ -5500,6 +5504,23 @@ async function createOpenAI() {
       ...extraConfig,
       dangerouslyAllowBrowser: true
     });
+  } else if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI)) {
+    const scope = getAIConfig(MIDSCENE_AZURE_OPENAI_SCOPE);
+    (0, import_node_assert4.default)(
+      !import_utils3.ifInBrowser,
+      "Azure OpenAI is not supported in browser with Midscene."
+    );
+    const credential = new import_identity.DefaultAzureCredential();
+    (0, import_node_assert4.default)(scope, "MIDSCENE_AZURE_OPENAI_SCOPE is required");
+    const tokenProvider = (0, import_identity.getBearerTokenProvider)(credential, scope);
+    const extraAzureConfig = getAIConfigInJson(
+      MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON
+    );
+    openai = new import_openai.AzureOpenAI({
+      azureADTokenProvider: tokenProvider,
+      ...extraConfig,
+      ...extraAzureConfig
+    });
   } else {
     openai = new import_openai.default({
       baseURL: getAIConfig(OPENAI_BASE_URL),
@@ -5578,12 +5599,20 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
   let jsonContent = safeJsonParse(response.content);
   if (jsonContent)
     return { content: jsonContent, usage: response.usage };
-  jsonContent = extractJSONFromCodeBlock(response.content);
+  const cleanJsonString = extractJSONFromCodeBlock(response.content);
   try {
-    return { content: JSON.parse(jsonContent), usage: response.usage };
+    jsonContent = JSON.parse(cleanJsonString);
   } catch (e) {
-    throw Error(`failed to parse json response: ${response.content}`);
   }
+  if (jsonContent)
+    return { content: jsonContent, usage: response.usage };
+  try {
+    jsonContent = import_dirty_json.default.parse(cleanJsonString);
+  } catch (e) {
+  }
+  if (jsonContent)
+    return { content: jsonContent, usage: response.usage };
+  throw Error(`failed to parse json response: ${response.content}`);
 }
 function extractJSONFromCodeBlock(response) {
   const jsonMatch = response.match(/^\s*(\{[\s\S]*\})\s*$/);

package/dist/lib/types/env.d.ts CHANGED Viewed

@@ -8,10 +8,13 @@ declare const MIDSCENE_OPENAI_SOCKS_PROXY = "MIDSCENE_OPENAI_SOCKS_PROXY";
 declare const OPENAI_API_KEY = "OPENAI_API_KEY";
 declare const OPENAI_BASE_URL = "OPENAI_BASE_URL";
 declare const MIDSCENE_MODEL_TEXT_ONLY = "MIDSCENE_MODEL_TEXT_ONLY";
-declare const OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
 declare const MIDSCENE_CACHE = "MIDSCENE_CACHE";
 declare const MATCH_BY_POSITION = "MATCH_BY_POSITION";
 declare const MIDSCENE_REPORT_TAG_NAME = "MIDSCENE_REPORT_TAG_NAME";
+declare const MIDSCENE_USE_AZURE_OPENAI = "MIDSCENE_USE_AZURE_OPENAI";
+declare const MIDSCENE_AZURE_OPENAI_SCOPE = "MIDSCENE_AZURE_OPENAI_SCOPE";
+declare const MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON";
+declare const OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
 declare const allConfigFromEnv: () => {
     MIDSCENE_OPENAI_INIT_CONFIG_JSON: string | undefined;
     MIDSCENE_MODEL_NAME: string | undefined;
@@ -27,6 +30,9 @@ declare const allConfigFromEnv: () => {
     MATCH_BY_POSITION: string | undefined;
     MIDSCENE_REPORT_TAG_NAME: string | undefined;
     MIDSCENE_OPENAI_SOCKS_PROXY: string | undefined;
+    MIDSCENE_USE_AZURE_OPENAI: string | undefined;
+    MIDSCENE_AZURE_OPENAI_SCOPE: string;
+    MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON: string | undefined;
 };
 declare let userConfig: ReturnType<typeof allConfigFromEnv>;
 declare const getAIConfig: (configKey: keyof typeof userConfig) => string | undefined;
@@ -46,7 +52,10 @@ declare const allAIConfig: () => {
     MATCH_BY_POSITION: string | undefined;
     MIDSCENE_REPORT_TAG_NAME: string | undefined;
     MIDSCENE_OPENAI_SOCKS_PROXY: string | undefined;
+    MIDSCENE_USE_AZURE_OPENAI: string | undefined;
+    MIDSCENE_AZURE_OPENAI_SCOPE: string;
+    MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON: string | undefined;
 };
 declare const overrideAIConfig: (newConfig: ReturnType<typeof allConfigFromEnv>, extendMode?: boolean) => void;
-export { MATCH_BY_POSITION, MIDSCENE_CACHE, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_AI_PROFILE, MIDSCENE_DEBUG_MODE, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_TEXT_ONLY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_REPORT_TAG_NAME, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_USE_AZURE, allAIConfig, getAIConfig, getAIConfigInJson, overrideAIConfig };
+export { MATCH_BY_POSITION, MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_AZURE_OPENAI_SCOPE, MIDSCENE_CACHE, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_AI_PROFILE, MIDSCENE_DEBUG_MODE, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_TEXT_ONLY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_USE_AZURE_OPENAI, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_USE_AZURE, allAIConfig, getAIConfig, getAIConfigInJson, overrideAIConfig };

package/dist/lib/utils.js CHANGED Viewed

@@ -67,10 +67,13 @@ var MIDSCENE_OPENAI_SOCKS_PROXY = "MIDSCENE_OPENAI_SOCKS_PROXY";
 var OPENAI_API_KEY = "OPENAI_API_KEY";
 var OPENAI_BASE_URL = "OPENAI_BASE_URL";
 var MIDSCENE_MODEL_TEXT_ONLY = "MIDSCENE_MODEL_TEXT_ONLY";
-var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
 var MIDSCENE_CACHE = "MIDSCENE_CACHE";
 var MATCH_BY_POSITION = "MATCH_BY_POSITION";
 var MIDSCENE_REPORT_TAG_NAME = "MIDSCENE_REPORT_TAG_NAME";
+var MIDSCENE_USE_AZURE_OPENAI = "MIDSCENE_USE_AZURE_OPENAI";
+var MIDSCENE_AZURE_OPENAI_SCOPE = "MIDSCENE_AZURE_OPENAI_SCOPE";
+var MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON";
+var OPENAI_USE_AZURE = "OPENAI_USE_AZURE";
 var allConfigFromEnv = () => {
   return {
     [MIDSCENE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_OPENAI_INIT_CONFIG_JSON] || void 0,
@@ -86,7 +89,10 @@ var allConfigFromEnv = () => {
     [MIDSCENE_CACHE]: process.env[MIDSCENE_CACHE] || void 0,
     [MATCH_BY_POSITION]: process.env[MATCH_BY_POSITION] || void 0,
     [MIDSCENE_REPORT_TAG_NAME]: process.env[MIDSCENE_REPORT_TAG_NAME] || void 0,
-    [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0
+    [MIDSCENE_OPENAI_SOCKS_PROXY]: process.env[MIDSCENE_OPENAI_SOCKS_PROXY] || void 0,
+    [MIDSCENE_USE_AZURE_OPENAI]: process.env[MIDSCENE_USE_AZURE_OPENAI] || void 0,
+    [MIDSCENE_AZURE_OPENAI_SCOPE]: process.env[MIDSCENE_AZURE_OPENAI_SCOPE] || "https://cognitiveservices.azure.com/.default",
+    [MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON]: process.env[MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON] || void 0
   };
 };
 var userConfig = {};
@@ -272,7 +278,7 @@ function stringifyDumpData(data, indents) {
   return JSON.stringify(data, replacerForPageObject, indents);
 }
 function getVersion() {
-  return "0.8.7";
+  return "0.8.8-beta-20241223034944.0";
 }
 function debugLog(...message) {
   const debugMode = getAIConfig(MIDSCENE_DEBUG_MODE);

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@midscene/core",
   "description": "An AI-powered automation SDK can control the page, perform assertions, and extract data in JSON format using natural language. See https://midscenejs.com/ for details.",
-  "version": "0.8.7",
+  "version": "0.8.8-beta-20241223034944.0",
   "repository": "https://github.com/web-infra-dev/midscene",
   "homepage": "https://midscenejs.com/",
   "jsnext:source": "./src/index.ts",
@@ -36,10 +36,12 @@
     }
   },
   "dependencies": {
+    "@azure/identity": "4.5.0",
+    "dirty-json": "0.9.2",
     "openai": "4.57.1",
     "optional": "0.1.4",
     "socks-proxy-agent": "8.0.4",
-    "@midscene/shared": "0.8.7"
+    "@midscene/shared": "0.8.8-beta-20241223034944.0"
   },
   "devDependencies": {
     "@modern-js/module-tools": "2.60.6",