@midscene/core 0.26.7-beta-20250818081955.0 → 0.26.7-beta-20250820105545.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/es/ai-model/common.mjs +58 -16
  2. package/dist/es/ai-model/common.mjs.map +1 -1
  3. package/dist/es/ai-model/index.mjs +3 -3
  4. package/dist/es/ai-model/inspect.mjs +28 -16
  5. package/dist/es/ai-model/inspect.mjs.map +1 -1
  6. package/dist/es/ai-model/llm-planning.mjs +26 -23
  7. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  8. package/dist/es/ai-model/prompt/llm-planning.mjs +50 -23
  9. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  10. package/dist/es/ai-model/prompt/playwright-generator.mjs +9 -3
  11. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
  12. package/dist/es/ai-model/prompt/util.mjs +2 -2
  13. package/dist/es/ai-model/prompt/util.mjs.map +1 -1
  14. package/dist/es/ai-model/prompt/yaml-generator.mjs +9 -3
  15. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
  16. package/dist/es/ai-model/service-caller/index.mjs +72 -118
  17. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  18. package/dist/es/ai-model/ui-tars-planning.mjs +5 -5
  19. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
  20. package/dist/es/index.mjs +3 -2
  21. package/dist/es/index.mjs.map +1 -1
  22. package/dist/es/insight/index.mjs +13 -61
  23. package/dist/es/insight/index.mjs.map +1 -1
  24. package/dist/es/types.mjs.map +1 -1
  25. package/dist/es/utils.mjs +5 -6
  26. package/dist/es/utils.mjs.map +1 -1
  27. package/dist/lib/ai-model/common.js +80 -20
  28. package/dist/lib/ai-model/common.js.map +1 -1
  29. package/dist/lib/ai-model/index.js +14 -5
  30. package/dist/lib/ai-model/inspect.js +27 -15
  31. package/dist/lib/ai-model/inspect.js.map +1 -1
  32. package/dist/lib/ai-model/llm-planning.js +25 -22
  33. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  34. package/dist/lib/ai-model/prompt/llm-planning.js +52 -25
  35. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  36. package/dist/lib/ai-model/prompt/playwright-generator.js +9 -3
  37. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
  38. package/dist/lib/ai-model/prompt/util.js +2 -2
  39. package/dist/lib/ai-model/prompt/util.js.map +1 -1
  40. package/dist/lib/ai-model/prompt/yaml-generator.js +9 -3
  41. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
  42. package/dist/lib/ai-model/service-caller/index.js +75 -124
  43. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  44. package/dist/lib/ai-model/ui-tars-planning.js +5 -5
  45. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
  46. package/dist/lib/index.js +20 -4
  47. package/dist/lib/index.js.map +1 -1
  48. package/dist/lib/insight/index.js +10 -58
  49. package/dist/lib/insight/index.js.map +1 -1
  50. package/dist/lib/types.js.map +1 -1
  51. package/dist/lib/utils.js +4 -5
  52. package/dist/lib/utils.js.map +1 -1
  53. package/dist/types/ai-model/common.d.ts +160 -7
  54. package/dist/types/ai-model/index.d.ts +2 -2
  55. package/dist/types/ai-model/inspect.d.ts +2 -0
  56. package/dist/types/ai-model/llm-planning.d.ts +1 -1
  57. package/dist/types/ai-model/prompt/llm-planning.d.ts +2 -2
  58. package/dist/types/ai-model/prompt/util.d.ts +2 -1
  59. package/dist/types/ai-model/service-caller/index.d.ts +6 -6
  60. package/dist/types/ai-model/ui-tars-planning.d.ts +3 -1
  61. package/dist/types/index.d.ts +3 -1
  62. package/dist/types/insight/index.d.ts +1 -4
  63. package/dist/types/types.d.ts +8 -11
  64. package/dist/types/yaml.d.ts +1 -0
  65. package/package.json +4 -3
@@ -1,9 +1,9 @@
1
1
  import { AIResponseFormat } from "../../types.mjs";
2
2
  import { Anthropic } from "@anthropic-ai/sdk";
3
3
  import { DefaultAzureCredential, getBearerTokenProvider } from "@azure/identity";
4
- import { ANTHROPIC_API_KEY, AZURE_OPENAI_API_VERSION, AZURE_OPENAI_DEPLOYMENT, AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_KEY, MIDSCENE_API_TYPE, MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_AZURE_OPENAI_SCOPE, MIDSCENE_DEBUG_AI_PROFILE, MIDSCENE_DEBUG_AI_RESPONSE, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MODEL_NAME, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_USE_ANTHROPIC_SDK, MIDSCENE_USE_AZURE_OPENAI, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, OPENAI_USE_AZURE, getAIConfig, getAIConfigInBoolean, getAIConfigInJson, uiTarsModelVersion, vlLocateMode } from "@midscene/shared/env";
4
+ import { MIDSCENE_API_TYPE, MIDSCENE_LANGSMITH_DEBUG, OPENAI_MAX_TOKENS, decideModelConfig, getAIConfig, getAIConfigInBoolean, uiTarsModelVersion, vlLocateMode } from "@midscene/shared/env";
5
5
  import { parseBase64 } from "@midscene/shared/img";
6
- import { enableDebug, getDebug } from "@midscene/shared/logger";
6
+ import { getDebug } from "@midscene/shared/logger";
7
7
  import { assert, ifInBrowser } from "@midscene/shared/utils";
8
8
  import { HttpsProxyAgent } from "https-proxy-agent";
9
9
  import { jsonrepair } from "jsonrepair";
@@ -13,46 +13,9 @@ import { AIActionType } from "../common.mjs";
13
13
  import { assertSchema } from "../prompt/assertion.mjs";
14
14
  import { locatorSchema } from "../prompt/llm-locator.mjs";
15
15
  import { planSchema } from "../prompt/llm-planning.mjs";
16
- function checkAIConfig() {
17
- const openaiKey = getAIConfig(OPENAI_API_KEY);
18
- const azureConfig = getAIConfig(MIDSCENE_USE_AZURE_OPENAI);
19
- const anthropicKey = getAIConfig(ANTHROPIC_API_KEY);
20
- const initConfigJson = getAIConfig(MIDSCENE_OPENAI_INIT_CONFIG_JSON);
21
- if (openaiKey) return true;
22
- if (azureConfig) return true;
23
- if (anthropicKey) return true;
24
- return Boolean(initConfigJson);
25
- }
26
- let debugConfigInitialized = false;
27
- function initDebugConfig() {
28
- if (debugConfigInitialized) return;
29
- const shouldPrintTiming = getAIConfigInBoolean(MIDSCENE_DEBUG_AI_PROFILE);
30
- let debugConfig = '';
31
- if (shouldPrintTiming) {
32
- console.warn('MIDSCENE_DEBUG_AI_PROFILE is deprecated, use DEBUG=midscene:ai:profile instead');
33
- debugConfig = 'ai:profile';
34
- }
35
- const shouldPrintAIResponse = getAIConfigInBoolean(MIDSCENE_DEBUG_AI_RESPONSE);
36
- if (shouldPrintAIResponse) {
37
- console.warn('MIDSCENE_DEBUG_AI_RESPONSE is deprecated, use DEBUG=midscene:ai:response instead');
38
- debugConfig = debugConfig ? 'ai:*' : 'ai:call';
39
- }
40
- if (debugConfig) enableDebug(debugConfig);
41
- debugConfigInitialized = true;
42
- }
43
- const defaultModel = 'gpt-4o';
44
- function getModelName() {
45
- let modelName = defaultModel;
46
- const nameInConfig = getAIConfig(MIDSCENE_MODEL_NAME);
47
- if (nameInConfig) modelName = nameInConfig;
48
- return modelName;
49
- }
50
- async function createChatClient({ AIActionTypeValue }) {
51
- initDebugConfig();
16
+ async function createChatClient({ AIActionTypeValue, modelPreferences }) {
17
+ const { socksProxy, httpProxy, modelName, openaiBaseURL, openaiApiKey, openaiExtraConfig, openaiUseAzureDeprecated, useAzureOpenai, azureOpenaiScope, azureOpenaiKey, azureOpenaiEndpoint, azureOpenaiApiVersion, azureOpenaiDeployment, azureExtraConfig, useAnthropicSdk, anthropicApiKey } = decideModelConfig(modelPreferences, true);
52
18
  let openai;
53
- const extraConfig = getAIConfigInJson(MIDSCENE_OPENAI_INIT_CONFIG_JSON);
54
- const socksProxy = getAIConfig(MIDSCENE_OPENAI_SOCKS_PROXY);
55
- const httpProxy = getAIConfig(MIDSCENE_OPENAI_HTTP_PROXY);
56
19
  let proxyAgent;
57
20
  const debugProxy = getDebug('ai:call:proxy');
58
21
  if (httpProxy) {
@@ -62,56 +25,47 @@ async function createChatClient({ AIActionTypeValue }) {
62
25
  debugProxy('using socks proxy', socksProxy);
63
26
  proxyAgent = new SocksProxyAgent(socksProxy);
64
27
  }
65
- if (getAIConfig(OPENAI_USE_AZURE)) openai = new AzureOpenAI({
66
- baseURL: getAIConfig(OPENAI_BASE_URL),
67
- apiKey: getAIConfig(OPENAI_API_KEY),
28
+ if (openaiUseAzureDeprecated) openai = new AzureOpenAI({
29
+ baseURL: openaiBaseURL,
30
+ apiKey: openaiApiKey,
68
31
  httpAgent: proxyAgent,
69
- ...extraConfig,
32
+ ...openaiExtraConfig,
70
33
  dangerouslyAllowBrowser: true
71
34
  });
72
- else if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI)) {
73
- const extraAzureConfig = getAIConfigInJson(MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON);
74
- const scope = getAIConfig(MIDSCENE_AZURE_OPENAI_SCOPE);
35
+ else if (useAzureOpenai) {
75
36
  let tokenProvider;
76
- if (scope) {
37
+ if (azureOpenaiScope) {
77
38
  assert(!ifInBrowser, 'Azure OpenAI is not supported in browser with Midscene.');
78
39
  const credential = new DefaultAzureCredential();
79
- assert(scope, 'MIDSCENE_AZURE_OPENAI_SCOPE is required');
80
- tokenProvider = getBearerTokenProvider(credential, scope);
40
+ tokenProvider = getBearerTokenProvider(credential, azureOpenaiScope);
81
41
  openai = new AzureOpenAI({
82
42
  azureADTokenProvider: tokenProvider,
83
- endpoint: getAIConfig(AZURE_OPENAI_ENDPOINT),
84
- apiVersion: getAIConfig(AZURE_OPENAI_API_VERSION),
85
- deployment: getAIConfig(AZURE_OPENAI_DEPLOYMENT),
86
- ...extraConfig,
87
- ...extraAzureConfig
43
+ endpoint: azureOpenaiEndpoint,
44
+ apiVersion: azureOpenaiApiVersion,
45
+ deployment: azureOpenaiDeployment,
46
+ ...openaiExtraConfig,
47
+ ...azureExtraConfig
88
48
  });
89
49
  } else openai = new AzureOpenAI({
90
- apiKey: getAIConfig(AZURE_OPENAI_KEY),
91
- endpoint: getAIConfig(AZURE_OPENAI_ENDPOINT),
92
- apiVersion: getAIConfig(AZURE_OPENAI_API_VERSION),
93
- deployment: getAIConfig(AZURE_OPENAI_DEPLOYMENT),
50
+ apiKey: azureOpenaiKey,
51
+ endpoint: azureOpenaiEndpoint,
52
+ apiVersion: azureOpenaiApiVersion,
53
+ deployment: azureOpenaiDeployment,
94
54
  dangerouslyAllowBrowser: true,
95
- ...extraConfig,
96
- ...extraAzureConfig
97
- });
98
- } else if (!getAIConfig(MIDSCENE_USE_ANTHROPIC_SDK)) {
99
- const baseURL = getAIConfig(OPENAI_BASE_URL);
100
- if ('string' == typeof baseURL) {
101
- if (!/^https?:\/\//.test(baseURL)) throw new Error(`OPENAI_BASE_URL must be a valid URL starting with http:// or https://, but got: ${baseURL}\nPlease check your config.`);
102
- }
103
- openai = new openai_0({
104
- baseURL: getAIConfig(OPENAI_BASE_URL),
105
- apiKey: getAIConfig(OPENAI_API_KEY),
106
- httpAgent: proxyAgent,
107
- ...extraConfig,
108
- defaultHeaders: {
109
- ...(null == extraConfig ? void 0 : extraConfig.defaultHeaders) || {},
110
- [MIDSCENE_API_TYPE]: AIActionTypeValue.toString()
111
- },
112
- dangerouslyAllowBrowser: true
55
+ ...openaiExtraConfig,
56
+ ...azureExtraConfig
113
57
  });
114
- }
58
+ } else if (!useAnthropicSdk) openai = new openai_0({
59
+ baseURL: openaiBaseURL,
60
+ apiKey: openaiApiKey,
61
+ httpAgent: proxyAgent,
62
+ ...openaiExtraConfig,
63
+ defaultHeaders: {
64
+ ...(null == openaiExtraConfig ? void 0 : openaiExtraConfig.defaultHeaders) || {},
65
+ [MIDSCENE_API_TYPE]: AIActionTypeValue.toString()
66
+ },
67
+ dangerouslyAllowBrowser: true
68
+ });
115
69
  if (openai && getAIConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)) {
116
70
  if (ifInBrowser) throw new Error('langsmith is not supported in browser');
117
71
  console.log('DEBUGGING MODE: langsmith wrapper enabled');
@@ -120,53 +74,51 @@ async function createChatClient({ AIActionTypeValue }) {
120
74
  }
121
75
  if (void 0 !== openai) return {
122
76
  completion: openai.chat.completions,
123
- style: 'openai'
77
+ style: 'openai',
78
+ modelName
124
79
  };
125
- if (getAIConfig(MIDSCENE_USE_ANTHROPIC_SDK)) {
126
- const apiKey = getAIConfig(ANTHROPIC_API_KEY);
127
- assert(apiKey, 'ANTHROPIC_API_KEY is required');
128
- openai = new Anthropic({
129
- apiKey,
130
- httpAgent: proxyAgent,
131
- dangerouslyAllowBrowser: true
132
- });
133
- }
80
+ if (useAnthropicSdk) openai = new Anthropic({
81
+ apiKey: anthropicApiKey,
82
+ httpAgent: proxyAgent,
83
+ dangerouslyAllowBrowser: true
84
+ });
134
85
  if (void 0 !== openai && openai.messages) return {
135
86
  completion: openai.messages,
136
- style: 'anthropic'
87
+ style: 'anthropic',
88
+ modelName
137
89
  };
138
90
  throw new Error('Openai SDK or Anthropic SDK is not initialized');
139
91
  }
140
- async function call(messages, AIActionTypeValue, responseFormat, options) {
141
- assert(checkAIConfig(), 'Cannot find config for AI model service. If you are using a self-hosted model without validating the API key, please set `OPENAI_API_KEY` to any non-null value. https://midscenejs.com/model-provider.html');
142
- const { completion, style } = await createChatClient({
143
- AIActionTypeValue
92
+ async function call(messages, AIActionTypeValue, modelPreferences, options) {
93
+ const { completion, style, modelName } = await createChatClient({
94
+ AIActionTypeValue,
95
+ modelPreferences
144
96
  });
97
+ const responseFormat = getResponseFormat(modelName, AIActionTypeValue);
145
98
  const maxTokens = getAIConfig(OPENAI_MAX_TOKENS);
146
99
  const debugCall = getDebug('ai:call');
147
100
  const debugProfileStats = getDebug('ai:profile:stats');
148
101
  const debugProfileDetail = getDebug('ai:profile:detail');
149
102
  const startTime = Date.now();
150
- const model = getModelName();
151
103
  const isStreaming = (null == options ? void 0 : options.stream) && (null == options ? void 0 : options.onChunk);
152
104
  let content;
153
105
  let accumulated = '';
154
106
  let usage;
155
107
  let timeCost;
156
108
  const commonConfig = {
157
- temperature: 'vlm-ui-tars' === vlLocateMode() ? 0.0 : 0.1,
109
+ temperature: 'vlm-ui-tars' === vlLocateMode(modelPreferences) ? 0.0 : 0.1,
158
110
  stream: !!isStreaming,
159
111
  max_tokens: 'number' == typeof maxTokens ? maxTokens : Number.parseInt(maxTokens || '2048', 10),
160
- ...'qwen-vl' === vlLocateMode() ? {
112
+ ...'qwen-vl' === vlLocateMode(modelPreferences) ? {
161
113
  vl_high_resolution_images: true
162
114
  } : {}
163
115
  };
164
116
  try {
165
117
  if ('openai' === style) {
166
- debugCall(`sending ${isStreaming ? 'streaming ' : ''}request to ${model}`);
118
+ debugCall(`sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`);
167
119
  if (isStreaming) {
168
120
  const stream = await completion.create({
169
- model,
121
+ model: modelName,
170
122
  messages,
171
123
  response_format: responseFormat,
172
124
  ...commonConfig
@@ -209,7 +161,7 @@ async function call(messages, AIActionTypeValue, responseFormat, options) {
209
161
  completion_tokens: usage.completion_tokens ?? 0,
210
162
  total_tokens: usage.total_tokens ?? 0,
211
163
  time_cost: timeCost ?? 0,
212
- model_name: model
164
+ model_name: modelName
213
165
  }
214
166
  };
215
167
  options.onChunk(finalChunk);
@@ -217,17 +169,17 @@ async function call(messages, AIActionTypeValue, responseFormat, options) {
217
169
  }
218
170
  }
219
171
  content = accumulated;
220
- debugProfileStats(`streaming model, ${model}, mode, ${vlLocateMode() || 'default'}, cost-ms, ${timeCost}`);
172
+ debugProfileStats(`streaming model, ${modelName}, mode, ${vlLocateMode(modelPreferences) || 'default'}, cost-ms, ${timeCost}`);
221
173
  } else {
222
174
  var _result_usage, _result_usage1, _result_usage2;
223
175
  const result = await completion.create({
224
- model,
176
+ model: modelName,
225
177
  messages,
226
178
  response_format: responseFormat,
227
179
  ...commonConfig
228
180
  });
229
181
  timeCost = Date.now() - startTime;
230
- debugProfileStats(`model, ${model}, mode, ${vlLocateMode() || 'default'}, ui-tars-version, ${uiTarsModelVersion()}, prompt-tokens, ${(null == (_result_usage = result.usage) ? void 0 : _result_usage.prompt_tokens) || ''}, completion-tokens, ${(null == (_result_usage1 = result.usage) ? void 0 : _result_usage1.completion_tokens) || ''}, total-tokens, ${(null == (_result_usage2 = result.usage) ? void 0 : _result_usage2.total_tokens) || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`);
182
+ debugProfileStats(`model, ${modelName}, mode, ${vlLocateMode(modelPreferences) || 'default'}, ui-tars-version, ${uiTarsModelVersion(modelPreferences)}, prompt-tokens, ${(null == (_result_usage = result.usage) ? void 0 : _result_usage.prompt_tokens) || ''}, completion-tokens, ${(null == (_result_usage1 = result.usage) ? void 0 : _result_usage1.completion_tokens) || ''}, total-tokens, ${(null == (_result_usage2 = result.usage) ? void 0 : _result_usage2.total_tokens) || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`);
231
183
  debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);
232
184
  assert(result.choices, `invalid response from LLM service: ${JSON.stringify(result)}`);
233
185
  content = result.choices[0].message.content;
@@ -254,7 +206,7 @@ async function call(messages, AIActionTypeValue, responseFormat, options) {
254
206
  };
255
207
  if (isStreaming) {
256
208
  const stream = await completion.create({
257
- model,
209
+ model: modelName,
258
210
  system: 'You are a versatile professional in software UI automation',
259
211
  messages: messages.map((m)=>({
260
212
  role: 'user',
@@ -290,7 +242,7 @@ async function call(messages, AIActionTypeValue, responseFormat, options) {
290
242
  completion_tokens: anthropicUsage.output_tokens ?? 0,
291
243
  total_tokens: (anthropicUsage.input_tokens ?? 0) + (anthropicUsage.output_tokens ?? 0),
292
244
  time_cost: timeCost ?? 0,
293
- model_name: model
245
+ model_name: modelName
294
246
  } : void 0
295
247
  };
296
248
  options.onChunk(finalChunk);
@@ -300,7 +252,7 @@ async function call(messages, AIActionTypeValue, responseFormat, options) {
300
252
  content = accumulated;
301
253
  } else {
302
254
  const result = await completion.create({
303
- model,
255
+ model: modelName,
304
256
  system: 'You are a versatile professional in software UI automation',
305
257
  messages: messages.map((m)=>({
306
258
  role: 'user',
@@ -330,7 +282,7 @@ async function call(messages, AIActionTypeValue, responseFormat, options) {
330
282
  completion_tokens: usage.completion_tokens ?? 0,
331
283
  total_tokens: usage.total_tokens ?? 0,
332
284
  time_cost: timeCost ?? 0,
333
- model_name: model
285
+ model_name: modelName
334
286
  } : void 0,
335
287
  isStreamed: !!isStreaming
336
288
  };
@@ -342,10 +294,9 @@ async function call(messages, AIActionTypeValue, responseFormat, options) {
342
294
  throw newError;
343
295
  }
344
296
  }
345
- async function callToGetJSONObject(messages, AIActionTypeValue) {
297
+ const getResponseFormat = (modelName, AIActionTypeValue)=>{
346
298
  let responseFormat;
347
- const model = getModelName();
348
- if (model.includes('gpt-4')) switch(AIActionTypeValue){
299
+ if (modelName.includes('gpt-4')) switch(AIActionTypeValue){
349
300
  case AIActionType.ASSERT:
350
301
  responseFormat = assertSchema;
351
302
  break;
@@ -362,19 +313,22 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
362
313
  };
363
314
  break;
364
315
  }
365
- if ('gpt-4o-2024-05-13' === model) responseFormat = {
316
+ if ('gpt-4o-2024-05-13' === modelName) responseFormat = {
366
317
  type: AIResponseFormat.JSON
367
318
  };
368
- const response = await call(messages, AIActionTypeValue, responseFormat);
319
+ return responseFormat;
320
+ };
321
+ async function callToGetJSONObject(messages, AIActionTypeValue, modelPreferences) {
322
+ const response = await call(messages, AIActionTypeValue, modelPreferences);
369
323
  assert(response, 'empty response');
370
- const jsonContent = safeParseJson(response.content);
324
+ const jsonContent = safeParseJson(response.content, modelPreferences);
371
325
  return {
372
326
  content: jsonContent,
373
327
  usage: response.usage
374
328
  };
375
329
  }
376
- async function callAiFnWithStringResponse(msgs, AIActionTypeValue) {
377
- const { content, usage } = await call(msgs, AIActionTypeValue);
330
+ async function callAiFnWithStringResponse(msgs, AIActionTypeValue, modelPreferences) {
331
+ const { content, usage } = await call(msgs, AIActionTypeValue, modelPreferences);
378
332
  return {
379
333
  content,
380
334
  usage
@@ -395,7 +349,7 @@ function preprocessDoubaoBboxJson(input) {
395
349
  if (input.includes('bbox')) while(/\d+\s+\d+/.test(input))input = input.replace(/(\d+)\s+(\d+)/g, '$1,$2');
396
350
  return input;
397
351
  }
398
- function safeParseJson(input) {
352
+ function safeParseJson(input, modelPreferences) {
399
353
  const cleanJsonString = extractJSONFromCodeBlock(input);
400
354
  if (null == cleanJsonString ? void 0 : cleanJsonString.match(/\((\d+),(\d+)\)/)) {
401
355
  var _cleanJsonString_match;
@@ -407,12 +361,12 @@ function safeParseJson(input) {
407
361
  try {
408
362
  return JSON.parse(jsonrepair(cleanJsonString));
409
363
  } catch (e) {}
410
- if ('doubao-vision' === vlLocateMode() || 'vlm-ui-tars' === vlLocateMode()) {
364
+ if ('doubao-vision' === vlLocateMode(modelPreferences) || 'vlm-ui-tars' === vlLocateMode(modelPreferences)) {
411
365
  const jsonString = preprocessDoubaoBboxJson(cleanJsonString);
412
366
  return JSON.parse(jsonrepair(jsonString));
413
367
  }
414
368
  throw Error(`failed to parse json response: ${input}`);
415
369
  }
416
- export { call, callAiFnWithStringResponse, callToGetJSONObject, checkAIConfig, extractJSONFromCodeBlock, getModelName, preprocessDoubaoBboxJson, safeParseJson };
370
+ export { call, callAiFnWithStringResponse, callToGetJSONObject, extractJSONFromCodeBlock, getResponseFormat, preprocessDoubaoBboxJson, safeParseJson };
417
371
 
418
372
  //# sourceMappingURL=index.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/service-caller/index.mjs","sources":["webpack://@midscene/core/./src/ai-model/service-caller/index.ts"],"sourcesContent":["import { AIResponseFormat, type AIUsageInfo } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport { Anthropic } from '@anthropic-ai/sdk';\nimport {\n DefaultAzureCredential,\n getBearerTokenProvider,\n} from '@azure/identity';\nimport {\n ANTHROPIC_API_KEY,\n AZURE_OPENAI_API_VERSION,\n AZURE_OPENAI_DEPLOYMENT,\n AZURE_OPENAI_ENDPOINT,\n AZURE_OPENAI_KEY,\n MIDSCENE_API_TYPE,\n MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,\n MIDSCENE_AZURE_OPENAI_SCOPE,\n MIDSCENE_DEBUG_AI_PROFILE,\n MIDSCENE_DEBUG_AI_RESPONSE,\n MIDSCENE_LANGSMITH_DEBUG,\n MIDSCENE_MODEL_NAME,\n MIDSCENE_OPENAI_HTTP_PROXY,\n MIDSCENE_OPENAI_INIT_CONFIG_JSON,\n MIDSCENE_OPENAI_SOCKS_PROXY,\n MIDSCENE_USE_ANTHROPIC_SDK,\n MIDSCENE_USE_AZURE_OPENAI,\n OPENAI_API_KEY,\n OPENAI_BASE_URL,\n OPENAI_MAX_TOKENS,\n OPENAI_USE_AZURE,\n getAIConfig,\n getAIConfigInBoolean,\n getAIConfigInJson,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { parseBase64 } from '@midscene/shared/img';\nimport { enableDebug, getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ifInBrowser } from '@midscene/shared/utils';\nimport { HttpsProxyAgent } from 'https-proxy-agent';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI, { AzureOpenAI } from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport { SocksProxyAgent } from 'socks-proxy-agent';\nimport { AIActionType, type AIArgs } from '../common';\nimport { assertSchema } from '../prompt/assertion';\nimport { locatorSchema } from '../prompt/llm-locator';\nimport { planSchema } from '../prompt/llm-planning';\n\nexport function checkAIConfig() {\n const openaiKey = getAIConfig(OPENAI_API_KEY);\n const azureConfig = getAIConfig(MIDSCENE_USE_AZURE_OPENAI);\n const anthropicKey = getAIConfig(ANTHROPIC_API_KEY);\n const initConfigJson = getAIConfig(MIDSCENE_OPENAI_INIT_CONFIG_JSON);\n\n if (openaiKey) return true;\n if (azureConfig) return true;\n if (anthropicKey) return true;\n\n return Boolean(initConfigJson);\n}\n\n// if debug config is initialized\nlet debugConfigInitialized = false;\n\nfunction initDebugConfig() {\n // if debug config is initialized, return\n if (debugConfigInitialized) return;\n\n const shouldPrintTiming = getAIConfigInBoolean(MIDSCENE_DEBUG_AI_PROFILE);\n let debugConfig = '';\n if (shouldPrintTiming) {\n console.warn(\n 'MIDSCENE_DEBUG_AI_PROFILE is deprecated, use DEBUG=midscene:ai:profile instead',\n );\n debugConfig = 'ai:profile';\n }\n const shouldPrintAIResponse = getAIConfigInBoolean(\n MIDSCENE_DEBUG_AI_RESPONSE,\n );\n if (shouldPrintAIResponse) {\n console.warn(\n 'MIDSCENE_DEBUG_AI_RESPONSE is deprecated, use DEBUG=midscene:ai:response instead',\n );\n if (debugConfig) {\n debugConfig = 'ai:*';\n } else {\n debugConfig = 'ai:call';\n }\n }\n if (debugConfig) {\n enableDebug(debugConfig);\n }\n\n // mark as initialized\n debugConfigInitialized = true;\n}\n\n// default model\nconst defaultModel = 'gpt-4o';\nexport function getModelName() {\n let modelName = defaultModel;\n const nameInConfig = getAIConfig(MIDSCENE_MODEL_NAME);\n if (nameInConfig) {\n modelName = nameInConfig;\n }\n return modelName;\n}\n\nasync function createChatClient({\n AIActionTypeValue,\n}: {\n AIActionTypeValue: AIActionType;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n style: 'openai' | 'anthropic';\n}> {\n initDebugConfig();\n let openai: OpenAI | AzureOpenAI | undefined;\n const extraConfig = getAIConfigInJson(MIDSCENE_OPENAI_INIT_CONFIG_JSON);\n\n const socksProxy = getAIConfig(MIDSCENE_OPENAI_SOCKS_PROXY);\n const httpProxy = getAIConfig(MIDSCENE_OPENAI_HTTP_PROXY);\n\n let proxyAgent = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n if (httpProxy) {\n debugProxy('using http proxy', httpProxy);\n proxyAgent = new HttpsProxyAgent(httpProxy);\n } else if (socksProxy) {\n debugProxy('using socks proxy', socksProxy);\n proxyAgent = new SocksProxyAgent(socksProxy);\n }\n\n if (getAIConfig(OPENAI_USE_AZURE)) {\n // this is deprecated\n openai = new AzureOpenAI({\n baseURL: getAIConfig(OPENAI_BASE_URL),\n apiKey: getAIConfig(OPENAI_API_KEY),\n httpAgent: proxyAgent,\n ...extraConfig,\n dangerouslyAllowBrowser: true,\n }) as OpenAI;\n } else if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI)) {\n const extraAzureConfig = getAIConfigInJson(\n MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,\n );\n\n // https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart?tabs=bash%2Cjavascript-key%2Ctypescript-keyless%2Cpython&pivots=programming-language-javascript#rest-api\n // keyless authentication\n const scope = getAIConfig(MIDSCENE_AZURE_OPENAI_SCOPE);\n let tokenProvider: any = undefined;\n if (scope) {\n assert(\n !ifInBrowser,\n 'Azure OpenAI is not supported in browser with Midscene.',\n );\n const credential = new DefaultAzureCredential();\n\n assert(scope, 'MIDSCENE_AZURE_OPENAI_SCOPE is required');\n tokenProvider = getBearerTokenProvider(credential, scope);\n\n openai = new AzureOpenAI({\n azureADTokenProvider: tokenProvider,\n endpoint: getAIConfig(AZURE_OPENAI_ENDPOINT),\n apiVersion: getAIConfig(AZURE_OPENAI_API_VERSION),\n deployment: getAIConfig(AZURE_OPENAI_DEPLOYMENT),\n ...extraConfig,\n ...extraAzureConfig,\n });\n } else {\n // endpoint, apiKey, apiVersion, deployment\n openai = new AzureOpenAI({\n apiKey: getAIConfig(AZURE_OPENAI_KEY),\n endpoint: getAIConfig(AZURE_OPENAI_ENDPOINT),\n apiVersion: getAIConfig(AZURE_OPENAI_API_VERSION),\n deployment: getAIConfig(AZURE_OPENAI_DEPLOYMENT),\n dangerouslyAllowBrowser: true,\n ...extraConfig,\n ...extraAzureConfig,\n });\n }\n } else if (!getAIConfig(MIDSCENE_USE_ANTHROPIC_SDK)) {\n const baseURL = getAIConfig(OPENAI_BASE_URL);\n if (typeof baseURL === 'string') {\n if (!/^https?:\\/\\//.test(baseURL)) {\n throw new Error(\n `OPENAI_BASE_URL must be a valid URL starting with http:// or https://, but got: ${baseURL}\\nPlease check your config.`,\n );\n }\n }\n\n openai = new OpenAI({\n baseURL: getAIConfig(OPENAI_BASE_URL),\n apiKey: getAIConfig(OPENAI_API_KEY),\n httpAgent: proxyAgent,\n ...extraConfig,\n defaultHeaders: {\n ...(extraConfig?.defaultHeaders || {}),\n [MIDSCENE_API_TYPE]: AIActionTypeValue.toString(),\n },\n dangerouslyAllowBrowser: true,\n });\n }\n\n if (openai && getAIConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n const { wrapOpenAI } = await import('langsmith/wrappers');\n openai = wrapOpenAI(openai);\n }\n\n if (typeof openai !== 'undefined') {\n return {\n completion: openai.chat.completions,\n style: 'openai',\n };\n }\n\n // Anthropic\n if (getAIConfig(MIDSCENE_USE_ANTHROPIC_SDK)) {\n const apiKey = getAIConfig(ANTHROPIC_API_KEY);\n assert(apiKey, 'ANTHROPIC_API_KEY is required');\n openai = new Anthropic({\n apiKey,\n httpAgent: proxyAgent,\n dangerouslyAllowBrowser: true,\n }) as any;\n }\n\n if (typeof openai !== 'undefined' && (openai as any).messages) {\n return {\n completion: (openai as any).messages,\n style: 'anthropic',\n };\n }\n\n throw new Error('Openai SDK or Anthropic SDK is not initialized');\n}\n\nexport async function call(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n responseFormat?:\n | OpenAI.ChatCompletionCreateParams['response_format']\n | OpenAI.ResponseFormatJSONObject,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n },\n): Promise<{ content: string; usage?: AIUsageInfo; isStreamed: boolean }> {\n assert(\n checkAIConfig(),\n 'Cannot find config for AI model service. If you are using a self-hosted model without validating the API key, please set `OPENAI_API_KEY` to any non-null value. https://midscenejs.com/model-provider.html',\n );\n\n const { completion, style } = await createChatClient({\n AIActionTypeValue,\n });\n\n const maxTokens = getAIConfig(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n const model = getModelName();\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n\n const commonConfig = {\n temperature: vlLocateMode() === 'vlm-ui-tars' ? 0.0 : 0.1,\n stream: !!isStreaming,\n max_tokens:\n typeof maxTokens === 'number'\n ? maxTokens\n : Number.parseInt(maxTokens || '2048', 10),\n ...(vlLocateMode() === 'qwen-vl' // qwen specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n try {\n if (style === 'openai') {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${model}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: {\n prompt_tokens: usage.prompt_tokens ?? 0,\n completion_tokens: usage.completion_tokens ?? 0,\n total_tokens: usage.total_tokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: model,\n },\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${model}, mode, ${vlLocateMode() || 'default'}, cost-ms, ${timeCost}`,\n );\n } else {\n const result = await completion.create({\n model,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${model}, mode, ${vlLocateMode() || 'default'}, ui-tars-version, ${uiTarsModelVersion()}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`,\n );\n\n debugProfileDetail(\n `model usage detail: ${JSON.stringify(result.usage)}`,\n );\n\n assert(\n result.choices,\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n content = result.choices[0].message.content!;\n usage = result.usage;\n }\n\n debugCall(`response: ${content}`);\n assert(content, 'empty content');\n } else if (style === 'anthropic') {\n const convertImageContent = (content: any) => {\n if (content.type === 'image_url') {\n const imgBase64 = content.image_url.url;\n assert(imgBase64, 'image_url is required');\n const { mimeType, body } = parseBase64(content.image_url.url);\n return {\n source: {\n type: 'base64',\n media_type: mimeType,\n data: body,\n },\n type: 'image',\n };\n }\n return content;\n };\n\n if (isStreaming) {\n const stream = (await completion.create({\n model,\n system: 'You are a versatile professional in software UI automation',\n messages: messages.map((m) => ({\n role: 'user',\n content: Array.isArray(m.content)\n ? (m.content as any).map(convertImageContent)\n : m.content,\n })),\n response_format: responseFormat,\n ...commonConfig,\n } as any)) as any;\n\n for await (const chunk of stream) {\n const content = chunk.delta?.text || '';\n if (content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n accumulated,\n reasoning_content: '',\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.type === 'message_stop') {\n timeCost = Date.now() - startTime;\n const anthropicUsage = chunk.usage;\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: anthropicUsage\n ? {\n prompt_tokens: anthropicUsage.input_tokens ?? 0,\n completion_tokens: anthropicUsage.output_tokens ?? 0,\n total_tokens:\n (anthropicUsage.input_tokens ?? 0) +\n (anthropicUsage.output_tokens ?? 0),\n time_cost: timeCost ?? 0,\n model_name: model,\n }\n : undefined,\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n } else {\n const result = await completion.create({\n model,\n system: 'You are a versatile professional in software UI automation',\n messages: messages.map((m) => ({\n role: 'user',\n content: Array.isArray(m.content)\n ? (m.content as any).map(convertImageContent)\n : m.content,\n })),\n response_format: responseFormat,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n content = (result as any).content[0].text as string;\n usage = result.usage;\n }\n\n assert(content, 'empty content');\n }\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n return {\n content: content || '',\n usage: usage\n ? {\n prompt_tokens: usage.prompt_tokens ?? 0,\n completion_tokens: usage.completion_tokens ?? 0,\n total_tokens: usage.total_tokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: model,\n }\n : undefined,\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport async function callToGetJSONObject<T>(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n): Promise<{ content: T; usage?: AIUsageInfo }> {\n let responseFormat:\n | OpenAI.ChatCompletionCreateParams['response_format']\n | OpenAI.ResponseFormatJSONObject\n | undefined;\n\n const model = getModelName();\n\n if (model.includes('gpt-4')) {\n switch (AIActionTypeValue) {\n case AIActionType.ASSERT:\n responseFormat = assertSchema;\n break;\n case AIActionType.INSPECT_ELEMENT:\n responseFormat = locatorSchema;\n break;\n case AIActionType.PLAN:\n responseFormat = planSchema;\n break;\n case AIActionType.EXTRACT_DATA:\n case AIActionType.DESCRIBE_ELEMENT:\n responseFormat = { type: AIResponseFormat.JSON };\n break;\n }\n }\n\n // gpt-4o-2024-05-13 only supports json_object response format\n if (model === 'gpt-4o-2024-05-13') {\n responseFormat = { type: AIResponseFormat.JSON };\n }\n\n const response = await call(messages, AIActionTypeValue, responseFormat);\n assert(response, 'empty response');\n const jsonContent = safeParseJson(response.content);\n return { content: jsonContent, usage: response.usage };\n}\n\nexport async function callAiFnWithStringResponse<T>(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await call(msgs, AIActionTypeValue);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nexport function safeParseJson(input: string) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n try {\n return JSON.parse(cleanJsonString);\n } catch {}\n try {\n return JSON.parse(jsonrepair(cleanJsonString));\n } catch (e) {}\n\n if (vlLocateMode() === 'doubao-vision' || vlLocateMode() === 'vlm-ui-tars') {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n return JSON.parse(jsonrepair(jsonString));\n }\n throw Error(`failed to parse json response: ${input}`);\n}\n"],"names":["checkAIConfig","openaiKey","getAIConfig","OPENAI_API_KEY","azureConfig","MIDSCENE_USE_AZURE_OPENAI","anthropicKey","ANTHROPIC_API_KEY","initConfigJson","MIDSCENE_OPENAI_INIT_CONFIG_JSON","Boolean","debugConfigInitialized","initDebugConfig","shouldPrintTiming","getAIConfigInBoolean","MIDSCENE_DEBUG_AI_PROFILE","debugConfig","console","shouldPrintAIResponse","MIDSCENE_DEBUG_AI_RESPONSE","enableDebug","defaultModel","getModelName","modelName","nameInConfig","MIDSCENE_MODEL_NAME","createChatClient","AIActionTypeValue","openai","extraConfig","getAIConfigInJson","socksProxy","MIDSCENE_OPENAI_SOCKS_PROXY","httpProxy","MIDSCENE_OPENAI_HTTP_PROXY","proxyAgent","debugProxy","getDebug","HttpsProxyAgent","SocksProxyAgent","OPENAI_USE_AZURE","AzureOpenAI","OPENAI_BASE_URL","extraAzureConfig","MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON","scope","MIDSCENE_AZURE_OPENAI_SCOPE","tokenProvider","assert","ifInBrowser","credential","DefaultAzureCredential","getBearerTokenProvider","AZURE_OPENAI_ENDPOINT","AZURE_OPENAI_API_VERSION","AZURE_OPENAI_DEPLOYMENT","AZURE_OPENAI_KEY","MIDSCENE_USE_ANTHROPIC_SDK","baseURL","Error","OpenAI","MIDSCENE_API_TYPE","MIDSCENE_LANGSMITH_DEBUG","wrapOpenAI","apiKey","Anthropic","call","messages","responseFormat","options","completion","style","maxTokens","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","model","isStreaming","content","accumulated","usage","timeCost","commonConfig","vlLocateMode","Number","stream","chunk","_chunk_choices__delta","_chunk_choices__delta1","_chunk_choices_2","reasoning_content","chunkData","undefined","estimatedTokens","Math","finalChunk","_result_usage","_result_usage1","_result_usage2","result","uiTarsModelVersion","JSON","convertImageContent","imgBase64","mimeType","body","parseBase64","m","Array","_chunk_delta","anthropicUsage","e","newError","callToGetJSONObject","AIActionType","assertSchema","locatorSchema","planSchema","AIResponseFormat","response","jsonContent","safeParseJson","callAiFnWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","cleanJsonString","_cleanJsonString_match","jsonrepair","jsonString"],"mappings":";;;;;;;;;;;;;;;AAkDO,SAASA;IACd,MAAMC,YAAYC,YAAYC;IAC9B,MAAMC,cAAcF,YAAYG;IAChC,MAAMC,eAAeJ,YAAYK;IACjC,MAAMC,iBAAiBN,YAAYO;IAEnC,IAAIR,WAAW,OAAO;IACtB,IAAIG,aAAa,OAAO;IACxB,IAAIE,cAAc,OAAO;IAEzB,OAAOI,QAAQF;AACjB;AAGA,IAAIG,yBAAyB;AAE7B,SAASC;IAEP,IAAID,wBAAwB;IAE5B,MAAME,oBAAoBC,qBAAqBC;IAC/C,IAAIC,cAAc;IAClB,IAAIH,mBAAmB;QACrBI,QAAQ,IAAI,CACV;QAEFD,cAAc;IAChB;IACA,MAAME,wBAAwBJ,qBAC5BK;IAEF,IAAID,uBAAuB;QACzBD,QAAQ,IAAI,CACV;QAGAD,cADEA,cACY,SAEA;IAElB;IACA,IAAIA,aACFI,YAAYJ;IAIdL,yBAAyB;AAC3B;AAGA,MAAMU,eAAe;AACd,SAASC;IACd,IAAIC,YAAYF;IAChB,MAAMG,eAAetB,YAAYuB;IACjC,IAAID,cACFD,YAAYC;IAEd,OAAOD;AACT;AAEA,eAAeG,iBAAiB,EAC9BC,iBAAiB,EAGlB;IAICf;IACA,IAAIgB;IACJ,MAAMC,cAAcC,kBAAkBrB;IAEtC,MAAMsB,aAAa7B,YAAY8B;IAC/B,MAAMC,YAAY/B,YAAYgC;IAE9B,IAAIC;IACJ,MAAMC,aAAaC,SAAS;IAC5B,IAAIJ,WAAW;QACbG,WAAW,oBAAoBH;QAC/BE,aAAa,IAAIG,gBAAgBL;IACnC,OAAO,IAAIF,YAAY;QACrBK,WAAW,qBAAqBL;QAChCI,aAAa,IAAII,gBAAgBR;IACnC;IAEA,IAAI7B,YAAYsC,mBAEdZ,SAAS,IAAIa,YAAY;QACvB,SAASvC,YAAYwC;QACrB,QAAQxC,YAAYC;QACpB,WAAWgC;QACX,GAAGN,WAAW;QACd,yBAAyB;IAC3B;SACK,IAAI3B,YAAYG,4BAA4B;QACjD,MAAMsC,mBAAmBb,kBACvBc;QAKF,MAAMC,QAAQ3C,YAAY4C;QAC1B,IAAIC;QACJ,IAAIF,OAAO;YACTG,OACE,CAACC,aACD;YAEF,MAAMC,aAAa,IAAIC;YAEvBH,OAAOH,OAAO;YACdE,gBAAgBK,uBAAuBF,YAAYL;YAEnDjB,SAAS,IAAIa,YAAY;gBACvB,sBAAsBM;gBACtB,UAAU7C,YAAYmD;gBACtB,YAAYnD,YAAYoD;gBACxB,YAAYpD,YAAYqD;gBACxB,GAAG1B,WAAW;gBACd,GAAGc,gBAAgB;YACrB;QACF,OAEEf,SAAS,IAAIa,YAAY;YACvB,QAAQvC,YAAYsD;YACpB,UAAUtD,YAAYmD;YACtB,YAAYnD,YAAYoD;YACxB,YAAYpD,YAAYqD;YACxB,yBAAyB;YACzB,GAAG1B,WAAW;YACd,GAAGc,gBAAgB;QACrB;IAEJ,OAAO,IAAI,CAACzC,YAAYuD,6BAA6B;QACnD,MAAMC,UAAUxD,YAAYwC;QAC5B,IAAI,AAAmB,YAAnB,OAAOgB,SACT;YAAA,IAAI,CAAC,eAAe,IAAI,CAACA,UACvB,MAAM,IAAIC,MACR,CAAC,gFAAgF,EAAED,QAAQ,2BAA2B,CAAC;QAE3H;QAGF9B,SAAS,IAAIgC,SAAO;YAClB,SAAS1D,YAAYwC;YACrB,QAAQxC,YAAYC;YACpB,WAAWgC;YACX,GAAGN,WAAW;YACd,gBAAgB;gBACd,GAAIA,AAAAA,CAAAA,QAAAA,cAAAA,KAAAA,IAAAA,YAAa,cAAc,AAAD,KAAK,CAAC,CAAC;gBACrC,CAACgC,kBAAkB,EAAElC,kBAAkB,QAAQ;YACjD;YACA,yBAAyB;QAC3B;IACF;IAEA,IAAIC,UAAUd,qBAAqBgD,2BAA2B;QAC5D,IAAIb,aACF,MAAM,IAAIU,MAAM;QAElB1C,QAAQ,GAAG,CAAC;QACZ,MAAM,EAAE8C,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC;QACpCnC,SAASmC,WAAWnC;IACtB;IAEA,IAAI,AAAkB,WAAXA,QACT,OAAO;QACL,YAAYA,OAAO,IAAI,CAAC,WAAW;QACnC,OAAO;IACT;IAIF,IAAI1B,YAAYuD,6BAA6B;QAC3C,MAAMO,SAAS9D,YAAYK;QAC3ByC,OAAOgB,QAAQ;QACfpC,SAAS,IAAIqC,UAAU;YACrBD;YACA,WAAW7B;YACX,yBAAyB;QAC3B;IACF;IAEA,IAAI,AAAkB,WAAXP,UAA2BA,OAAe,QAAQ,EAC3D,OAAO;QACL,YAAaA,OAAe,QAAQ;QACpC,OAAO;IACT;IAGF,MAAM,IAAI+B,MAAM;AAClB;AAEO,eAAeO,KACpBC,QAAsC,EACtCxC,iBAA+B,EAC/ByC,cAEmC,EACnCC,OAGC;IAEDrB,OACEhD,iBACA;IAGF,MAAM,EAAEsE,UAAU,EAAEC,KAAK,EAAE,GAAG,MAAM7C,iBAAiB;QACnDC;IACF;IAEA,MAAM6C,YAAYtE,YAAYuE;IAC9B,MAAMC,YAAYrC,SAAS;IAC3B,MAAMsC,oBAAoBtC,SAAS;IACnC,MAAMuC,qBAAqBvC,SAAS;IAEpC,MAAMwC,YAAYC,KAAK,GAAG;IAC1B,MAAMC,QAAQzD;IACd,MAAM0D,cAAcX,AAAAA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,MAAM,AAAD,KAAKA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,AAAD;IACtD,IAAIY;IACJ,IAAIC,cAAc;IAClB,IAAIC;IACJ,IAAIC;IAEJ,MAAMC,eAAe;QACnB,aAAaC,AAAmB,kBAAnBA,iBAAmC,MAAM;QACtD,QAAQ,CAAC,CAACN;QACV,YACE,AAAqB,YAArB,OAAOR,YACHA,YACAe,OAAO,QAAQ,CAACf,aAAa,QAAQ;QAC3C,GAAIc,AAAmB,cAAnBA,iBACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAI;QACF,IAAIf,AAAU,aAAVA,OAAoB;YACtBG,UACE,CAAC,QAAQ,EAAEM,cAAc,eAAe,GAAG,WAAW,EAAED,OAAO;YAGjE,IAAIC,aAAa;gBACf,MAAMQ,SAAU,MAAMlB,WAAW,MAAM,CACrC;oBACES;oBACAZ;oBACA,iBAAiBC;oBACjB,GAAGiB,YAAY;gBACjB,GACA;oBACE,QAAQ;gBACV;gBAKF,WAAW,MAAMI,SAASD,OAAQ;wBAChBE,uBAAAA,iBAAAA,gBAEbC,wBAAAA,kBAAAA,iBAoBCC,kBAAAA;oBAtBJ,MAAMX,UAAUS,AAAAA,SAAAA,CAAAA,iBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,kBAAAA,cAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,QAAAA,CAAAA,wBAAAA,gBAAoB,KAAK,AAAD,IAAxBA,KAAAA,IAAAA,sBAA2B,OAAO,AAAD,KAAK;oBACtD,MAAMG,oBACJ,AAAC,SAAAF,CAAAA,kBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,mBAAAA,eAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,QAAAA,CAAAA,yBAAAA,iBAAoB,KAAK,AAAD,IAAxBA,KAAAA,IAAAA,uBAAmC,iBAAiB,AAAD,KAAK;oBAG3D,IAAIF,MAAM,KAAK,EACbN,QAAQM,MAAM,KAAK;oBAGrB,IAAIR,WAAWY,mBAAmB;wBAChCX,eAAeD;wBACf,MAAMa,YAAiC;4BACrCb;4BACAY;4BACAX;4BACA,YAAY;4BACZ,OAAOa;wBACT;wBACA1B,QAAQ,OAAO,CAAEyB;oBACnB;oBAGA,IAAI,QAAAF,CAAAA,kBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,mBAAAA,eAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,iBAAoB,aAAa,EAAE;wBACrCR,WAAWN,KAAK,GAAG,KAAKD;wBAGxB,IAAI,CAACM,OAAO;4BAEV,MAAMa,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACf,YAAY,MAAM,GAAG;4BAElCC,QAAQ;gCACN,eAAea;gCACf,mBAAmBA;gCACnB,cAAcA,AAAkB,IAAlBA;4BAChB;wBACF;wBAGA,MAAME,aAAkC;4BACtC,SAAS;4BACThB;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAO;gCACL,eAAeC,MAAM,aAAa,IAAI;gCACtC,mBAAmBA,MAAM,iBAAiB,IAAI;gCAC9C,cAAcA,MAAM,YAAY,IAAI;gCACpC,WAAWC,YAAY;gCACvB,YAAYL;4BACd;wBACF;wBACAV,QAAQ,OAAO,CAAE6B;wBACjB;oBACF;gBACF;gBACAjB,UAAUC;gBACVP,kBACE,CAAC,iBAAiB,EAAEI,MAAM,QAAQ,EAAEO,kBAAkB,UAAU,WAAW,EAAEF,UAAU;YAE3F,OAAO;oBAUgHe,eAAyDC,gBAAwDC;gBATtO,MAAMC,SAAS,MAAMhC,WAAW,MAAM,CAAC;oBACrCS;oBACAZ;oBACA,iBAAiBC;oBACjB,GAAGiB,YAAY;gBACjB;gBACAD,WAAWN,KAAK,GAAG,KAAKD;gBAExBF,kBACE,CAAC,OAAO,EAAEI,MAAM,QAAQ,EAAEO,kBAAkB,UAAU,mBAAmB,EAAEiB,qBAAqB,iBAAiB,EAAEJ,AAAAA,SAAAA,CAAAA,gBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,cAAc,aAAa,AAAD,KAAK,GAAG,qBAAqB,EAAEC,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,eAAc,iBAAiB,AAAD,KAAK,GAAG,gBAAgB,EAAEC,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,eAAc,YAAY,AAAD,KAAK,GAAG,WAAW,EAAEjB,SAAS,aAAa,EAAEkB,OAAO,WAAW,IAAI,IAAI;gBAGtU1B,mBACE,CAAC,oBAAoB,EAAE4B,KAAK,SAAS,CAACF,OAAO,KAAK,GAAG;gBAGvDtD,OACEsD,OAAO,OAAO,EACd,CAAC,mCAAmC,EAAEE,KAAK,SAAS,CAACF,SAAS;gBAEhErB,UAAUqB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;gBAC3CnB,QAAQmB,OAAO,KAAK;YACtB;YAEA5B,UAAU,CAAC,UAAU,EAAEO,SAAS;YAChCjC,OAAOiC,SAAS;QAClB,OAAO,IAAIV,AAAU,gBAAVA,OAAuB;YAChC,MAAMkC,sBAAsB,CAACxB;gBAC3B,IAAIA,AAAiB,gBAAjBA,QAAQ,IAAI,EAAkB;oBAChC,MAAMyB,YAAYzB,QAAQ,SAAS,CAAC,GAAG;oBACvCjC,OAAO0D,WAAW;oBAClB,MAAM,EAAEC,QAAQ,EAAEC,IAAI,EAAE,GAAGC,YAAY5B,QAAQ,SAAS,CAAC,GAAG;oBAC5D,OAAO;wBACL,QAAQ;4BACN,MAAM;4BACN,YAAY0B;4BACZ,MAAMC;wBACR;wBACA,MAAM;oBACR;gBACF;gBACA,OAAO3B;YACT;YAEA,IAAID,aAAa;gBACf,MAAMQ,SAAU,MAAMlB,WAAW,MAAM,CAAC;oBACtCS;oBACA,QAAQ;oBACR,UAAUZ,SAAS,GAAG,CAAC,CAAC2C,IAAO;4BAC7B,MAAM;4BACN,SAASC,MAAM,OAAO,CAACD,EAAE,OAAO,IAC3BA,EAAE,OAAO,CAAS,GAAG,CAACL,uBACvBK,EAAE,OAAO;wBACf;oBACA,iBAAiB1C;oBACjB,GAAGiB,YAAY;gBACjB;gBAEA,WAAW,MAAMI,SAASD,OAAQ;wBAChBwB;oBAAhB,MAAM/B,UAAU+B,AAAAA,SAAAA,CAAAA,eAAAA,MAAM,KAAK,AAAD,IAAVA,KAAAA,IAAAA,aAAa,IAAI,AAAD,KAAK;oBACrC,IAAI/B,SAAS;wBACXC,eAAeD;wBACf,MAAMa,YAAiC;4BACrCb;4BACAC;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAOa;wBACT;wBACA1B,QAAQ,OAAO,CAAEyB;oBACnB;oBAGA,IAAIL,AAAe,mBAAfA,MAAM,IAAI,EAAqB;wBACjCL,WAAWN,KAAK,GAAG,KAAKD;wBACxB,MAAMoC,iBAAiBxB,MAAM,KAAK;wBAGlC,MAAMS,aAAkC;4BACtC,SAAS;4BACThB;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAO+B,iBACH;gCACE,eAAeA,eAAe,YAAY,IAAI;gCAC9C,mBAAmBA,eAAe,aAAa,IAAI;gCACnD,cACGA,AAAAA,CAAAA,eAAe,YAAY,IAAI,KAC/BA,CAAAA,eAAe,aAAa,IAAI;gCACnC,WAAW7B,YAAY;gCACvB,YAAYL;4BACd,IACAgB;wBACN;wBACA1B,QAAQ,OAAO,CAAE6B;wBACjB;oBACF;gBACF;gBACAjB,UAAUC;YACZ,OAAO;gBACL,MAAMoB,SAAS,MAAMhC,WAAW,MAAM,CAAC;oBACrCS;oBACA,QAAQ;oBACR,UAAUZ,SAAS,GAAG,CAAC,CAAC2C,IAAO;4BAC7B,MAAM;4BACN,SAASC,MAAM,OAAO,CAACD,EAAE,OAAO,IAC3BA,EAAE,OAAO,CAAS,GAAG,CAACL,uBACvBK,EAAE,OAAO;wBACf;oBACA,iBAAiB1C;oBACjB,GAAGiB,YAAY;gBACjB;gBACAD,WAAWN,KAAK,GAAG,KAAKD;gBACxBI,UAAWqB,OAAe,OAAO,CAAC,EAAE,CAAC,IAAI;gBACzCnB,QAAQmB,OAAO,KAAK;YACtB;YAEAtD,OAAOiC,SAAS;QAClB;QAEA,IAAID,eAAe,CAACG,OAAO;YAEzB,MAAMa,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEhB,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtCE,QAAQ;gBACN,eAAea;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASf,WAAW;YACpB,OAAOE,QACH;gBACE,eAAeA,MAAM,aAAa,IAAI;gBACtC,mBAAmBA,MAAM,iBAAiB,IAAI;gBAC9C,cAAcA,MAAM,YAAY,IAAI;gBACpC,WAAWC,YAAY;gBACvB,YAAYL;YACd,IACAgB;YACJ,YAAY,CAAC,CAACf;QAChB;IACF,EAAE,OAAOkC,GAAQ;QACfjG,QAAQ,KAAK,CAAC,kBAAkBiG;QAChC,MAAMC,WAAW,IAAIxD,MACnB,CAAC,eAAe,EAAEqB,cAAc,eAAe,GAAG,kBAAkB,EAAEkC,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC/I;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,eAAeC,oBACpBjD,QAAsC,EACtCxC,iBAA+B;IAE/B,IAAIyC;IAKJ,MAAMW,QAAQzD;IAEd,IAAIyD,MAAM,QAAQ,CAAC,UACjB,OAAQpD;QACN,KAAK0F,aAAa,MAAM;YACtBjD,iBAAiBkD;YACjB;QACF,KAAKD,aAAa,eAAe;YAC/BjD,iBAAiBmD;YACjB;QACF,KAAKF,aAAa,IAAI;YACpBjD,iBAAiBoD;YACjB;QACF,KAAKH,aAAa,YAAY;QAC9B,KAAKA,aAAa,gBAAgB;YAChCjD,iBAAiB;gBAAE,MAAMqD,iBAAiB,IAAI;YAAC;YAC/C;IACJ;IAIF,IAAI1C,AAAU,wBAAVA,OACFX,iBAAiB;QAAE,MAAMqD,iBAAiB,IAAI;IAAC;IAGjD,MAAMC,WAAW,MAAMxD,KAAKC,UAAUxC,mBAAmByC;IACzDpB,OAAO0E,UAAU;IACjB,MAAMC,cAAcC,cAAcF,SAAS,OAAO;IAClD,OAAO;QAAE,SAASC;QAAa,OAAOD,SAAS,KAAK;IAAC;AACvD;AAEO,eAAeG,2BACpBC,IAAY,EACZnG,iBAA+B;IAE/B,MAAM,EAAEsD,OAAO,EAAEE,KAAK,EAAE,GAAG,MAAMjB,KAAK4D,MAAMnG;IAC5C,OAAO;QAAEsD;QAASE;IAAM;AAC1B;AAEO,SAAS4C,yBAAyBL,QAAgB;IACvD,IAAI;QAEF,MAAMM,YAAYN,SAAS,KAAK,CAAC;QACjC,IAAIM,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBP,SAAS,KAAK,CACnC;QAEF,IAAIO,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBR,SAAS,KAAK,CAAC;QACrC,IAAIQ,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOR;AACT;AAEO,SAASS,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEO,SAASR,cAAcQ,KAAa;IACzC,MAAMC,kBAAkBN,yBAAyBK;IAEjD,IAAIC,QAAAA,kBAAAA,KAAAA,IAAAA,gBAAiB,KAAK,CAAC,oBAAoB;YACtCC;QAAP,OAAO,QAAAA,CAAAA,yBAAAA,gBACJ,KAAK,CAAC,kBAAiB,IADnBA,KAAAA,IAAAA,uBAEH,KAAK,CAAC,GACP,GAAG,CAAC/C;IACT;IACA,IAAI;QACF,OAAOiB,KAAK,KAAK,CAAC6B;IACpB,EAAE,OAAM,CAAC;IACT,IAAI;QACF,OAAO7B,KAAK,KAAK,CAAC+B,WAAWF;IAC/B,EAAE,OAAOnB,GAAG,CAAC;IAEb,IAAI5B,AAAmB,oBAAnBA,kBAAsCA,AAAmB,kBAAnBA,gBAAkC;QAC1E,MAAMkD,aAAaL,yBAAyBE;QAC5C,OAAO7B,KAAK,KAAK,CAAC+B,WAAWC;IAC/B;IACA,MAAM7E,MAAM,CAAC,+BAA+B,EAAEyE,OAAO;AACvD"}
1
+ {"version":3,"file":"ai-model/service-caller/index.mjs","sources":["webpack://@midscene/core/./src/ai-model/service-caller/index.ts"],"sourcesContent":["import { AIResponseFormat, type AIUsageInfo } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport { Anthropic } from '@anthropic-ai/sdk';\nimport {\n DefaultAzureCredential,\n getBearerTokenProvider,\n} from '@azure/identity';\nimport {\n type IModelPreferences,\n MIDSCENE_API_TYPE,\n MIDSCENE_LANGSMITH_DEBUG,\n OPENAI_MAX_TOKENS,\n decideModelConfig,\n getAIConfig,\n getAIConfigInBoolean,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { parseBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ifInBrowser } from '@midscene/shared/utils';\nimport { HttpsProxyAgent } from 'https-proxy-agent';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI, { AzureOpenAI } from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport { SocksProxyAgent } from 'socks-proxy-agent';\nimport { AIActionType, type AIArgs } from '../common';\nimport { assertSchema } from '../prompt/assertion';\nimport { locatorSchema } from '../prompt/llm-locator';\nimport { planSchema } from '../prompt/llm-planning';\n\nasync function createChatClient({\n AIActionTypeValue,\n modelPreferences,\n}: {\n AIActionTypeValue: AIActionType;\n modelPreferences: IModelPreferences;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n style: 'openai' | 'anthropic';\n modelName: string;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n openaiUseAzureDeprecated,\n useAzureOpenai,\n azureOpenaiScope,\n azureOpenaiKey,\n azureOpenaiEndpoint,\n azureOpenaiApiVersion,\n azureOpenaiDeployment,\n azureExtraConfig,\n useAnthropicSdk,\n anthropicApiKey,\n } = decideModelConfig(modelPreferences, true);\n\n let openai: OpenAI | AzureOpenAI | undefined;\n\n let proxyAgent = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n if (httpProxy) {\n debugProxy('using http proxy', httpProxy);\n proxyAgent = new HttpsProxyAgent(httpProxy);\n } else if (socksProxy) {\n debugProxy('using socks proxy', socksProxy);\n proxyAgent = new SocksProxyAgent(socksProxy);\n }\n\n if (openaiUseAzureDeprecated) {\n // this is deprecated\n openai = new AzureOpenAI({\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n httpAgent: proxyAgent,\n ...openaiExtraConfig,\n dangerouslyAllowBrowser: true,\n }) as OpenAI;\n } else if (useAzureOpenai) {\n // https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart?tabs=bash%2Cjavascript-key%2Ctypescript-keyless%2Cpython&pivots=programming-language-javascript#rest-api\n // keyless authentication\n let tokenProvider: any = undefined;\n if (azureOpenaiScope) {\n assert(\n !ifInBrowser,\n 'Azure OpenAI is not supported in browser with Midscene.',\n );\n const credential = new DefaultAzureCredential();\n\n tokenProvider = getBearerTokenProvider(credential, azureOpenaiScope);\n\n openai = new AzureOpenAI({\n azureADTokenProvider: tokenProvider,\n endpoint: azureOpenaiEndpoint,\n apiVersion: azureOpenaiApiVersion,\n deployment: azureOpenaiDeployment,\n ...openaiExtraConfig,\n ...azureExtraConfig,\n });\n } else {\n // endpoint, apiKey, apiVersion, deployment\n openai = new AzureOpenAI({\n apiKey: azureOpenaiKey,\n endpoint: azureOpenaiEndpoint,\n apiVersion: azureOpenaiApiVersion,\n deployment: azureOpenaiDeployment,\n dangerouslyAllowBrowser: true,\n ...openaiExtraConfig,\n ...azureExtraConfig,\n });\n }\n } else if (!useAnthropicSdk) {\n openai = new OpenAI({\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n httpAgent: proxyAgent,\n ...openaiExtraConfig,\n defaultHeaders: {\n ...(openaiExtraConfig?.defaultHeaders || {}),\n [MIDSCENE_API_TYPE]: AIActionTypeValue.toString(),\n },\n dangerouslyAllowBrowser: true,\n });\n }\n\n if (openai && getAIConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n const { wrapOpenAI } = await import('langsmith/wrappers');\n openai = wrapOpenAI(openai);\n }\n\n if (typeof openai !== 'undefined') {\n return {\n completion: openai.chat.completions,\n style: 'openai',\n modelName,\n };\n }\n\n // Anthropic\n if (useAnthropicSdk) {\n openai = new Anthropic({\n apiKey: anthropicApiKey,\n httpAgent: proxyAgent,\n dangerouslyAllowBrowser: true,\n }) as any;\n }\n\n if (typeof openai !== 'undefined' && (openai as any).messages) {\n return {\n completion: (openai as any).messages,\n style: 'anthropic',\n modelName,\n };\n }\n\n throw new Error('Openai SDK or Anthropic SDK is not initialized');\n}\n\nexport async function call(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n modelPreferences: IModelPreferences,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n },\n): Promise<{ content: string; usage?: AIUsageInfo; isStreamed: boolean }> {\n const { completion, style, modelName } = await createChatClient({\n AIActionTypeValue,\n modelPreferences,\n });\n\n const responseFormat = getResponseFormat(modelName, AIActionTypeValue);\n\n const maxTokens = getAIConfig(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n\n const commonConfig = {\n temperature: vlLocateMode(modelPreferences) === 'vlm-ui-tars' ? 0.0 : 0.1,\n stream: !!isStreaming,\n max_tokens:\n typeof maxTokens === 'number'\n ? maxTokens\n : Number.parseInt(maxTokens || '2048', 10),\n ...(vlLocateMode(modelPreferences) === 'qwen-vl' // qwen specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n try {\n if (style === 'openai') {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model: modelName,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: {\n prompt_tokens: usage.prompt_tokens ?? 0,\n completion_tokens: usage.completion_tokens ?? 0,\n total_tokens: usage.total_tokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n },\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${vlLocateMode(modelPreferences) || 'default'}, cost-ms, ${timeCost}`,\n );\n } else {\n const result = await completion.create({\n model: modelName,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${vlLocateMode(modelPreferences) || 'default'}, ui-tars-version, ${uiTarsModelVersion(modelPreferences)}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`,\n );\n\n debugProfileDetail(\n `model usage detail: ${JSON.stringify(result.usage)}`,\n );\n\n assert(\n result.choices,\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n content = result.choices[0].message.content!;\n usage = result.usage;\n }\n\n debugCall(`response: ${content}`);\n assert(content, 'empty content');\n } else if (style === 'anthropic') {\n const convertImageContent = (content: any) => {\n if (content.type === 'image_url') {\n const imgBase64 = content.image_url.url;\n assert(imgBase64, 'image_url is required');\n const { mimeType, body } = parseBase64(content.image_url.url);\n return {\n source: {\n type: 'base64',\n media_type: mimeType,\n data: body,\n },\n type: 'image',\n };\n }\n return content;\n };\n\n if (isStreaming) {\n const stream = (await completion.create({\n model: modelName,\n system: 'You are a versatile professional in software UI automation',\n messages: messages.map((m) => ({\n role: 'user',\n content: Array.isArray(m.content)\n ? (m.content as any).map(convertImageContent)\n : m.content,\n })),\n response_format: responseFormat,\n ...commonConfig,\n } as any)) as any;\n\n for await (const chunk of stream) {\n const content = chunk.delta?.text || '';\n if (content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n accumulated,\n reasoning_content: '',\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.type === 'message_stop') {\n timeCost = Date.now() - startTime;\n const anthropicUsage = chunk.usage;\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: anthropicUsage\n ? {\n prompt_tokens: anthropicUsage.input_tokens ?? 0,\n completion_tokens: anthropicUsage.output_tokens ?? 0,\n total_tokens:\n (anthropicUsage.input_tokens ?? 0) +\n (anthropicUsage.output_tokens ?? 0),\n time_cost: timeCost ?? 0,\n model_name: modelName,\n }\n : undefined,\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n } else {\n const result = await completion.create({\n model: modelName,\n system: 'You are a versatile professional in software UI automation',\n messages: messages.map((m) => ({\n role: 'user',\n content: Array.isArray(m.content)\n ? (m.content as any).map(convertImageContent)\n : m.content,\n })),\n response_format: responseFormat,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n content = (result as any).content[0].text as string;\n usage = result.usage;\n }\n\n assert(content, 'empty content');\n }\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n return {\n content: content || '',\n usage: usage\n ? {\n prompt_tokens: usage.prompt_tokens ?? 0,\n completion_tokens: usage.completion_tokens ?? 0,\n total_tokens: usage.total_tokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n }\n : undefined,\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport const getResponseFormat = (\n modelName: string,\n AIActionTypeValue: AIActionType,\n):\n | OpenAI.ChatCompletionCreateParams['response_format']\n | OpenAI.ResponseFormatJSONObject => {\n let responseFormat:\n | OpenAI.ChatCompletionCreateParams['response_format']\n | OpenAI.ResponseFormatJSONObject\n | undefined;\n\n if (modelName.includes('gpt-4')) {\n switch (AIActionTypeValue) {\n case AIActionType.ASSERT:\n responseFormat = assertSchema;\n break;\n case AIActionType.INSPECT_ELEMENT:\n responseFormat = locatorSchema;\n break;\n case AIActionType.PLAN:\n responseFormat = planSchema;\n break;\n case AIActionType.EXTRACT_DATA:\n case AIActionType.DESCRIBE_ELEMENT:\n responseFormat = { type: AIResponseFormat.JSON };\n break;\n }\n }\n\n // gpt-4o-2024-05-13 only supports json_object response format\n if (modelName === 'gpt-4o-2024-05-13') {\n responseFormat = { type: AIResponseFormat.JSON };\n }\n\n return responseFormat;\n};\n\nexport async function callToGetJSONObject<T>(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n modelPreferences: IModelPreferences,\n): Promise<{ content: T; usage?: AIUsageInfo }> {\n const response = await call(messages, AIActionTypeValue, modelPreferences);\n assert(response, 'empty response');\n const jsonContent = safeParseJson(response.content, modelPreferences);\n return { content: jsonContent, usage: response.usage };\n}\n\nexport async function callAiFnWithStringResponse<T>(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n modelPreferences: IModelPreferences,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await call(\n msgs,\n AIActionTypeValue,\n modelPreferences,\n );\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nexport function safeParseJson(\n input: string,\n modelPreferences: IModelPreferences,\n) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n try {\n return JSON.parse(cleanJsonString);\n } catch {}\n try {\n return JSON.parse(jsonrepair(cleanJsonString));\n } catch (e) {}\n\n if (\n vlLocateMode(modelPreferences) === 'doubao-vision' ||\n vlLocateMode(modelPreferences) === 'vlm-ui-tars'\n ) {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n return JSON.parse(jsonrepair(jsonString));\n }\n throw Error(`failed to parse json response: ${input}`);\n}\n"],"names":["createChatClient","AIActionTypeValue","modelPreferences","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","openaiUseAzureDeprecated","useAzureOpenai","azureOpenaiScope","azureOpenaiKey","azureOpenaiEndpoint","azureOpenaiApiVersion","azureOpenaiDeployment","azureExtraConfig","useAnthropicSdk","anthropicApiKey","decideModelConfig","openai","proxyAgent","debugProxy","getDebug","HttpsProxyAgent","SocksProxyAgent","AzureOpenAI","tokenProvider","assert","ifInBrowser","credential","DefaultAzureCredential","getBearerTokenProvider","OpenAI","MIDSCENE_API_TYPE","getAIConfigInBoolean","MIDSCENE_LANGSMITH_DEBUG","Error","console","wrapOpenAI","Anthropic","call","messages","options","completion","style","responseFormat","getResponseFormat","maxTokens","getAIConfig","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","isStreaming","content","accumulated","usage","timeCost","commonConfig","vlLocateMode","Number","stream","chunk","_chunk_choices__delta","_chunk_choices__delta1","_chunk_choices_2","reasoning_content","chunkData","undefined","estimatedTokens","Math","finalChunk","_result_usage","_result_usage1","_result_usage2","result","uiTarsModelVersion","JSON","convertImageContent","imgBase64","mimeType","body","parseBase64","m","Array","_chunk_delta","anthropicUsage","e","newError","AIActionType","assertSchema","locatorSchema","planSchema","AIResponseFormat","callToGetJSONObject","response","jsonContent","safeParseJson","callAiFnWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","cleanJsonString","_cleanJsonString_match","jsonrepair","jsonString"],"mappings":";;;;;;;;;;;;;;;AAiCA,eAAeA,iBAAiB,EAC9BC,iBAAiB,EACjBC,gBAAgB,EAIjB;IAKC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,wBAAwB,EACxBC,cAAc,EACdC,gBAAgB,EAChBC,cAAc,EACdC,mBAAmB,EACnBC,qBAAqB,EACrBC,qBAAqB,EACrBC,gBAAgB,EAChBC,eAAe,EACfC,eAAe,EAChB,GAAGC,kBAAkBjB,kBAAkB;IAExC,IAAIkB;IAEJ,IAAIC;IACJ,MAAMC,aAAaC,SAAS;IAC5B,IAAInB,WAAW;QACbkB,WAAW,oBAAoBlB;QAC/BiB,aAAa,IAAIG,gBAAgBpB;IACnC,OAAO,IAAID,YAAY;QACrBmB,WAAW,qBAAqBnB;QAChCkB,aAAa,IAAII,gBAAgBtB;IACnC;IAEA,IAAIM,0BAEFW,SAAS,IAAIM,YAAY;QACvB,SAASpB;QACT,QAAQC;QACR,WAAWc;QACX,GAAGb,iBAAiB;QACpB,yBAAyB;IAC3B;SACK,IAAIE,gBAAgB;QAGzB,IAAIiB;QACJ,IAAIhB,kBAAkB;YACpBiB,OACE,CAACC,aACD;YAEF,MAAMC,aAAa,IAAIC;YAEvBJ,gBAAgBK,uBAAuBF,YAAYnB;YAEnDS,SAAS,IAAIM,YAAY;gBACvB,sBAAsBC;gBACtB,UAAUd;gBACV,YAAYC;gBACZ,YAAYC;gBACZ,GAAGP,iBAAiB;gBACpB,GAAGQ,gBAAgB;YACrB;QACF,OAEEI,SAAS,IAAIM,YAAY;YACvB,QAAQd;YACR,UAAUC;YACV,YAAYC;YACZ,YAAYC;YACZ,yBAAyB;YACzB,GAAGP,iBAAiB;YACpB,GAAGQ,gBAAgB;QACrB;IAEJ,OAAO,IAAI,CAACC,iBACVG,SAAS,IAAIa,SAAO;QAClB,SAAS3B;QACT,QAAQC;QACR,WAAWc;QACX,GAAGb,iBAAiB;QACpB,gBAAgB;YACd,GAAIA,AAAAA,CAAAA,QAAAA,oBAAAA,KAAAA,IAAAA,kBAAmB,cAAc,AAAD,KAAK,CAAC,CAAC;YAC3C,CAAC0B,kBAAkB,EAAEjC,kBAAkB,QAAQ;QACjD;QACA,yBAAyB;IAC3B;IAGF,IAAImB,UAAUe,qBAAqBC,2BAA2B;QAC5D,IAAIP,aACF,MAAM,IAAIQ,MAAM;QAElBC,QAAQ,GAAG,CAAC;QACZ,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC;QACpCnB,SAASmB,WAAWnB;IACtB;IAEA,IAAI,AAAkB,WAAXA,QACT,OAAO;QACL,YAAYA,OAAO,IAAI,CAAC,WAAW;QACnC,OAAO;QACPf;IACF;IAIF,IAAIY,iBACFG,SAAS,IAAIoB,UAAU;QACrB,QAAQtB;QACR,WAAWG;QACX,yBAAyB;IAC3B;IAGF,IAAI,AAAkB,WAAXD,UAA2BA,OAAe,QAAQ,EAC3D,OAAO;QACL,YAAaA,OAAe,QAAQ;QACpC,OAAO;QACPf;IACF;IAGF,MAAM,IAAIgC,MAAM;AAClB;AAEO,eAAeI,KACpBC,QAAsC,EACtCzC,iBAA+B,EAC/BC,gBAAmC,EACnCyC,OAGC;IAED,MAAM,EAAEC,UAAU,EAAEC,KAAK,EAAExC,SAAS,EAAE,GAAG,MAAML,iBAAiB;QAC9DC;QACAC;IACF;IAEA,MAAM4C,iBAAiBC,kBAAkB1C,WAAWJ;IAEpD,MAAM+C,YAAYC,YAAYC;IAC9B,MAAMC,YAAY5B,SAAS;IAC3B,MAAM6B,oBAAoB7B,SAAS;IACnC,MAAM8B,qBAAqB9B,SAAS;IAEpC,MAAM+B,YAAYC,KAAK,GAAG;IAE1B,MAAMC,cAAcb,AAAAA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,MAAM,AAAD,KAAKA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,AAAD;IACtD,IAAIc;IACJ,IAAIC,cAAc;IAClB,IAAIC;IACJ,IAAIC;IAEJ,MAAMC,eAAe;QACnB,aAAaC,AAAmC,kBAAnCA,aAAa5D,oBAAsC,MAAM;QACtE,QAAQ,CAAC,CAACsD;QACV,YACE,AAAqB,YAArB,OAAOR,YACHA,YACAe,OAAO,QAAQ,CAACf,aAAa,QAAQ;QAC3C,GAAIc,AAAmC,cAAnCA,aAAa5D,oBACb;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAI;QACF,IAAI2C,AAAU,aAAVA,OAAoB;YACtBM,UACE,CAAC,QAAQ,EAAEK,cAAc,eAAe,GAAG,WAAW,EAAEnD,WAAW;YAGrE,IAAImD,aAAa;gBACf,MAAMQ,SAAU,MAAMpB,WAAW,MAAM,CACrC;oBACE,OAAOvC;oBACPqC;oBACA,iBAAiBI;oBACjB,GAAGe,YAAY;gBACjB,GACA;oBACE,QAAQ;gBACV;gBAKF,WAAW,MAAMI,SAASD,OAAQ;wBAChBE,uBAAAA,iBAAAA,gBAEbC,wBAAAA,kBAAAA,iBAoBCC,kBAAAA;oBAtBJ,MAAMX,UAAUS,AAAAA,SAAAA,CAAAA,iBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,kBAAAA,cAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,QAAAA,CAAAA,wBAAAA,gBAAoB,KAAK,AAAD,IAAxBA,KAAAA,IAAAA,sBAA2B,OAAO,AAAD,KAAK;oBACtD,MAAMG,oBACJ,AAAC,SAAAF,CAAAA,kBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,mBAAAA,eAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,QAAAA,CAAAA,yBAAAA,iBAAoB,KAAK,AAAD,IAAxBA,KAAAA,IAAAA,uBAAmC,iBAAiB,AAAD,KAAK;oBAG3D,IAAIF,MAAM,KAAK,EACbN,QAAQM,MAAM,KAAK;oBAGrB,IAAIR,WAAWY,mBAAmB;wBAChCX,eAAeD;wBACf,MAAMa,YAAiC;4BACrCb;4BACAY;4BACAX;4BACA,YAAY;4BACZ,OAAOa;wBACT;wBACA5B,QAAQ,OAAO,CAAE2B;oBACnB;oBAGA,IAAI,QAAAF,CAAAA,kBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,mBAAAA,eAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,iBAAoB,aAAa,EAAE;wBACrCR,WAAWL,KAAK,GAAG,KAAKD;wBAGxB,IAAI,CAACK,OAAO;4BAEV,MAAMa,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACf,YAAY,MAAM,GAAG;4BAElCC,QAAQ;gCACN,eAAea;gCACf,mBAAmBA;gCACnB,cAAcA,AAAkB,IAAlBA;4BAChB;wBACF;wBAGA,MAAME,aAAkC;4BACtC,SAAS;4BACThB;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAO;gCACL,eAAeC,MAAM,aAAa,IAAI;gCACtC,mBAAmBA,MAAM,iBAAiB,IAAI;gCAC9C,cAAcA,MAAM,YAAY,IAAI;gCACpC,WAAWC,YAAY;gCACvB,YAAYvD;4BACd;wBACF;wBACAsC,QAAQ,OAAO,CAAE+B;wBACjB;oBACF;gBACF;gBACAjB,UAAUC;gBACVN,kBACE,CAAC,iBAAiB,EAAE/C,UAAU,QAAQ,EAAEyD,aAAa5D,qBAAqB,UAAU,WAAW,EAAE0D,UAAU;YAE/G,OAAO;oBAUoJe,eAAyDC,gBAAwDC;gBAT1Q,MAAMC,SAAS,MAAMlC,WAAW,MAAM,CAAC;oBACrC,OAAOvC;oBACPqC;oBACA,iBAAiBI;oBACjB,GAAGe,YAAY;gBACjB;gBACAD,WAAWL,KAAK,GAAG,KAAKD;gBAExBF,kBACE,CAAC,OAAO,EAAE/C,UAAU,QAAQ,EAAEyD,aAAa5D,qBAAqB,UAAU,mBAAmB,EAAE6E,mBAAmB7E,kBAAkB,iBAAiB,EAAEyE,AAAAA,SAAAA,CAAAA,gBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,cAAc,aAAa,AAAD,KAAK,GAAG,qBAAqB,EAAEC,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,eAAc,iBAAiB,AAAD,KAAK,GAAG,gBAAgB,EAAEC,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,eAAc,YAAY,AAAD,KAAK,GAAG,WAAW,EAAEjB,SAAS,aAAa,EAAEkB,OAAO,WAAW,IAAI,IAAI;gBAG1WzB,mBACE,CAAC,oBAAoB,EAAE2B,KAAK,SAAS,CAACF,OAAO,KAAK,GAAG;gBAGvDlD,OACEkD,OAAO,OAAO,EACd,CAAC,mCAAmC,EAAEE,KAAK,SAAS,CAACF,SAAS;gBAEhErB,UAAUqB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;gBAC3CnB,QAAQmB,OAAO,KAAK;YACtB;YAEA3B,UAAU,CAAC,UAAU,EAAEM,SAAS;YAChC7B,OAAO6B,SAAS;QAClB,OAAO,IAAIZ,AAAU,gBAAVA,OAAuB;YAChC,MAAMoC,sBAAsB,CAACxB;gBAC3B,IAAIA,AAAiB,gBAAjBA,QAAQ,IAAI,EAAkB;oBAChC,MAAMyB,YAAYzB,QAAQ,SAAS,CAAC,GAAG;oBACvC7B,OAAOsD,WAAW;oBAClB,MAAM,EAAEC,QAAQ,EAAEC,IAAI,EAAE,GAAGC,YAAY5B,QAAQ,SAAS,CAAC,GAAG;oBAC5D,OAAO;wBACL,QAAQ;4BACN,MAAM;4BACN,YAAY0B;4BACZ,MAAMC;wBACR;wBACA,MAAM;oBACR;gBACF;gBACA,OAAO3B;YACT;YAEA,IAAID,aAAa;gBACf,MAAMQ,SAAU,MAAMpB,WAAW,MAAM,CAAC;oBACtC,OAAOvC;oBACP,QAAQ;oBACR,UAAUqC,SAAS,GAAG,CAAC,CAAC4C,IAAO;4BAC7B,MAAM;4BACN,SAASC,MAAM,OAAO,CAACD,EAAE,OAAO,IAC3BA,EAAE,OAAO,CAAS,GAAG,CAACL,uBACvBK,EAAE,OAAO;wBACf;oBACA,iBAAiBxC;oBACjB,GAAGe,YAAY;gBACjB;gBAEA,WAAW,MAAMI,SAASD,OAAQ;wBAChBwB;oBAAhB,MAAM/B,UAAU+B,AAAAA,SAAAA,CAAAA,eAAAA,MAAM,KAAK,AAAD,IAAVA,KAAAA,IAAAA,aAAa,IAAI,AAAD,KAAK;oBACrC,IAAI/B,SAAS;wBACXC,eAAeD;wBACf,MAAMa,YAAiC;4BACrCb;4BACAC;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAOa;wBACT;wBACA5B,QAAQ,OAAO,CAAE2B;oBACnB;oBAGA,IAAIL,AAAe,mBAAfA,MAAM,IAAI,EAAqB;wBACjCL,WAAWL,KAAK,GAAG,KAAKD;wBACxB,MAAMmC,iBAAiBxB,MAAM,KAAK;wBAGlC,MAAMS,aAAkC;4BACtC,SAAS;4BACThB;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAO+B,iBACH;gCACE,eAAeA,eAAe,YAAY,IAAI;gCAC9C,mBAAmBA,eAAe,aAAa,IAAI;gCACnD,cACGA,AAAAA,CAAAA,eAAe,YAAY,IAAI,KAC/BA,CAAAA,eAAe,aAAa,IAAI;gCACnC,WAAW7B,YAAY;gCACvB,YAAYvD;4BACd,IACAkE;wBACN;wBACA5B,QAAQ,OAAO,CAAE+B;wBACjB;oBACF;gBACF;gBACAjB,UAAUC;YACZ,OAAO;gBACL,MAAMoB,SAAS,MAAMlC,WAAW,MAAM,CAAC;oBACrC,OAAOvC;oBACP,QAAQ;oBACR,UAAUqC,SAAS,GAAG,CAAC,CAAC4C,IAAO;4BAC7B,MAAM;4BACN,SAASC,MAAM,OAAO,CAACD,EAAE,OAAO,IAC3BA,EAAE,OAAO,CAAS,GAAG,CAACL,uBACvBK,EAAE,OAAO;wBACf;oBACA,iBAAiBxC;oBACjB,GAAGe,YAAY;gBACjB;gBACAD,WAAWL,KAAK,GAAG,KAAKD;gBACxBG,UAAWqB,OAAe,OAAO,CAAC,EAAE,CAAC,IAAI;gBACzCnB,QAAQmB,OAAO,KAAK;YACtB;YAEAlD,OAAO6B,SAAS;QAClB;QAEA,IAAID,eAAe,CAACG,OAAO;YAEzB,MAAMa,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEhB,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtCE,QAAQ;gBACN,eAAea;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASf,WAAW;YACpB,OAAOE,QACH;gBACE,eAAeA,MAAM,aAAa,IAAI;gBACtC,mBAAmBA,MAAM,iBAAiB,IAAI;gBAC9C,cAAcA,MAAM,YAAY,IAAI;gBACpC,WAAWC,YAAY;gBACvB,YAAYvD;YACd,IACAkE;YACJ,YAAY,CAAC,CAACf;QAChB;IACF,EAAE,OAAOkC,GAAQ;QACfpD,QAAQ,KAAK,CAAC,kBAAkBoD;QAChC,MAAMC,WAAW,IAAItD,MACnB,CAAC,eAAe,EAAEmB,cAAc,eAAe,GAAG,kBAAkB,EAAEkC,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC/I;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,MAAM5C,oBAAoB,CAC/B1C,WACAJ;IAIA,IAAI6C;IAKJ,IAAIzC,UAAU,QAAQ,CAAC,UACrB,OAAQJ;QACN,KAAK2F,aAAa,MAAM;YACtB9C,iBAAiB+C;YACjB;QACF,KAAKD,aAAa,eAAe;YAC/B9C,iBAAiBgD;YACjB;QACF,KAAKF,aAAa,IAAI;YACpB9C,iBAAiBiD;YACjB;QACF,KAAKH,aAAa,YAAY;QAC9B,KAAKA,aAAa,gBAAgB;YAChC9C,iBAAiB;gBAAE,MAAMkD,iBAAiB,IAAI;YAAC;YAC/C;IACJ;IAIF,IAAI3F,AAAc,wBAAdA,WACFyC,iBAAiB;QAAE,MAAMkD,iBAAiB,IAAI;IAAC;IAGjD,OAAOlD;AACT;AAEO,eAAemD,oBACpBvD,QAAsC,EACtCzC,iBAA+B,EAC/BC,gBAAmC;IAEnC,MAAMgG,WAAW,MAAMzD,KAAKC,UAAUzC,mBAAmBC;IACzD0B,OAAOsE,UAAU;IACjB,MAAMC,cAAcC,cAAcF,SAAS,OAAO,EAAEhG;IACpD,OAAO;QAAE,SAASiG;QAAa,OAAOD,SAAS,KAAK;IAAC;AACvD;AAEO,eAAeG,2BACpBC,IAAY,EACZrG,iBAA+B,EAC/BC,gBAAmC;IAEnC,MAAM,EAAEuD,OAAO,EAAEE,KAAK,EAAE,GAAG,MAAMlB,KAC/B6D,MACArG,mBACAC;IAEF,OAAO;QAAEuD;QAASE;IAAM;AAC1B;AAEO,SAAS4C,yBAAyBL,QAAgB;IACvD,IAAI;QAEF,MAAMM,YAAYN,SAAS,KAAK,CAAC;QACjC,IAAIM,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBP,SAAS,KAAK,CACnC;QAEF,IAAIO,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBR,SAAS,KAAK,CAAC;QACrC,IAAIQ,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOR;AACT;AAEO,SAASS,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEO,SAASR,cACdQ,KAAa,EACb1G,gBAAmC;IAEnC,MAAM2G,kBAAkBN,yBAAyBK;IAEjD,IAAIC,QAAAA,kBAAAA,KAAAA,IAAAA,gBAAiB,KAAK,CAAC,oBAAoB;YACtCC;QAAP,OAAO,QAAAA,CAAAA,yBAAAA,gBACJ,KAAK,CAAC,kBAAiB,IADnBA,KAAAA,IAAAA,uBAEH,KAAK,CAAC,GACP,GAAG,CAAC/C;IACT;IACA,IAAI;QACF,OAAOiB,KAAK,KAAK,CAAC6B;IACpB,EAAE,OAAM,CAAC;IACT,IAAI;QACF,OAAO7B,KAAK,KAAK,CAAC+B,WAAWF;IAC/B,EAAE,OAAOnB,GAAG,CAAC;IAEb,IACE5B,AAAmC,oBAAnCA,aAAa5D,qBACb4D,AAAmC,kBAAnCA,aAAa5D,mBACb;QACA,MAAM8G,aAAaL,yBAAyBE;QAC5C,OAAO7B,KAAK,KAAK,CAAC+B,WAAWC;IAC/B;IACA,MAAM3E,MAAM,CAAC,+BAA+B,EAAEuE,OAAO;AACvD"}
@@ -16,7 +16,7 @@ const pointToBbox = (point, width, height)=>[
16
16
  Math.round(Math.min(point.y + bboxSize / 2, height))
17
17
  ];
18
18
  async function vlmPlanning(options) {
19
- const { conversationHistory, userInstruction, size } = options;
19
+ const { conversationHistory, userInstruction, size, modelPreferences } = options;
20
20
  const systemPrompt = getUiTarsPlanningPrompt() + userInstruction;
21
21
  const res = await call([
22
22
  {
@@ -24,9 +24,9 @@ async function vlmPlanning(options) {
24
24
  content: systemPrompt
25
25
  },
26
26
  ...conversationHistory
27
- ], AIActionType.INSPECT_ELEMENT);
27
+ ], AIActionType.INSPECT_ELEMENT, modelPreferences);
28
28
  const convertedText = convertBboxToCoordinates(res.content);
29
- const modelVer = uiTarsModelVersion();
29
+ const modelVer = uiTarsModelVersion(modelPreferences);
30
30
  const { parsed } = actionParser({
31
31
  prediction: convertedText,
32
32
  factor: [
@@ -211,8 +211,8 @@ function getPoint(startBox, size) {
211
211
  y * size.height
212
212
  ];
213
213
  }
214
- async function resizeImageForUiTars(imageBase64, size) {
215
- if ('vlm-ui-tars' === vlLocateMode() && uiTarsModelVersion() === UITarsModelVersion.V1_5) {
214
+ async function resizeImageForUiTars(imageBase64, size, modelPreferences) {
215
+ if ('vlm-ui-tars' === vlLocateMode(modelPreferences) && uiTarsModelVersion(modelPreferences) === UITarsModelVersion.V1_5) {
216
216
  debug('ui-tars-v1.5, will check image size', size);
217
217
  const currentPixels = size.width * size.height;
218
218
  const maxPixels = 12845056;