@midscene/core 0.30.5-beta-20251020035347.0 → 0.30.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,21 @@
1
1
  "use strict";
2
- var __webpack_require__ = {};
2
+ var __webpack_modules__ = {
3
+ "langsmith/wrappers": function(module) {
4
+ module.exports = import("langsmith/wrappers").then(function(module) {
5
+ return module;
6
+ });
7
+ }
8
+ };
9
+ var __webpack_module_cache__ = {};
10
+ function __webpack_require__(moduleId) {
11
+ var cachedModule = __webpack_module_cache__[moduleId];
12
+ if (void 0 !== cachedModule) return cachedModule.exports;
13
+ var module = __webpack_module_cache__[moduleId] = {
14
+ exports: {}
15
+ };
16
+ __webpack_modules__[moduleId](module, module.exports, __webpack_require__);
17
+ return module.exports;
18
+ }
3
19
  (()=>{
4
20
  __webpack_require__.n = (module)=>{
5
21
  var getter = module && module.__esModule ? ()=>module['default'] : ()=>module;
@@ -31,266 +47,404 @@ var __webpack_require__ = {};
31
47
  };
32
48
  })();
33
49
  var __webpack_exports__ = {};
34
- __webpack_require__.r(__webpack_exports__);
35
- __webpack_require__.d(__webpack_exports__, {
36
- extractJSONFromCodeBlock: ()=>extractJSONFromCodeBlock,
37
- callAIWithStringResponse: ()=>callAIWithStringResponse,
38
- preprocessDoubaoBboxJson: ()=>preprocessDoubaoBboxJson,
39
- callAIWithObjectResponse: ()=>callAIWithObjectResponse,
40
- getResponseFormat: ()=>getResponseFormat,
41
- safeParseJson: ()=>safeParseJson,
42
- callAI: ()=>callAI
43
- });
44
- const external_types_js_namespaceObject = require("../../types.js");
45
- const env_namespaceObject = require("@midscene/shared/env");
46
- const logger_namespaceObject = require("@midscene/shared/logger");
47
- const utils_namespaceObject = require("@midscene/shared/utils");
48
- const external_https_proxy_agent_namespaceObject = require("https-proxy-agent");
49
- const external_jsonrepair_namespaceObject = require("jsonrepair");
50
- const external_openai_namespaceObject = require("openai");
51
- var external_openai_default = /*#__PURE__*/ __webpack_require__.n(external_openai_namespaceObject);
52
- const external_socks_proxy_agent_namespaceObject = require("socks-proxy-agent");
53
- const external_common_js_namespaceObject = require("../common.js");
54
- const assertion_js_namespaceObject = require("../prompt/assertion.js");
55
- const llm_locator_js_namespaceObject = require("../prompt/llm-locator.js");
56
- const llm_planning_js_namespaceObject = require("../prompt/llm-planning.js");
57
- async function createChatClient({ AIActionTypeValue, modelConfig }) {
58
- const { socksProxy, httpProxy, modelName, openaiBaseURL, openaiApiKey, openaiExtraConfig, modelDescription, uiTarsModelVersion: uiTarsVersion, vlMode } = modelConfig;
59
- let proxyAgent;
60
- const debugProxy = (0, logger_namespaceObject.getDebug)('ai:call:proxy');
61
- if (httpProxy) {
62
- debugProxy('using http proxy', httpProxy);
63
- proxyAgent = new external_https_proxy_agent_namespaceObject.HttpsProxyAgent(httpProxy);
64
- } else if (socksProxy) {
65
- debugProxy('using socks proxy', socksProxy);
66
- proxyAgent = new external_socks_proxy_agent_namespaceObject.SocksProxyAgent(socksProxy);
67
- }
68
- const openai = new (external_openai_default())({
69
- baseURL: openaiBaseURL,
70
- apiKey: openaiApiKey,
71
- ...proxyAgent ? {
72
- httpAgent: proxyAgent
73
- } : {},
74
- ...openaiExtraConfig,
75
- defaultHeaders: {
76
- ...(null == openaiExtraConfig ? void 0 : openaiExtraConfig.defaultHeaders) || {},
77
- [env_namespaceObject.MIDSCENE_API_TYPE]: AIActionTypeValue.toString()
78
- },
79
- dangerouslyAllowBrowser: true
80
- });
81
- return {
82
- completion: openai.chat.completions,
83
- modelName,
84
- modelDescription,
85
- uiTarsVersion,
86
- vlMode
87
- };
88
- }
89
- async function callAI(messages, AIActionTypeValue, modelConfig, options) {
90
- const { completion, modelName, modelDescription, uiTarsVersion, vlMode } = await createChatClient({
91
- AIActionTypeValue,
92
- modelConfig
50
+ (()=>{
51
+ __webpack_require__.r(__webpack_exports__);
52
+ __webpack_require__.d(__webpack_exports__, {
53
+ extractJSONFromCodeBlock: ()=>extractJSONFromCodeBlock,
54
+ callAIWithStringResponse: ()=>callAIWithStringResponse,
55
+ preprocessDoubaoBboxJson: ()=>preprocessDoubaoBboxJson,
56
+ callAIWithObjectResponse: ()=>callAIWithObjectResponse,
57
+ getResponseFormat: ()=>getResponseFormat,
58
+ safeParseJson: ()=>safeParseJson,
59
+ callAI: ()=>callAI
93
60
  });
94
- const responseFormat = getResponseFormat(modelName, AIActionTypeValue);
95
- const maxTokens = env_namespaceObject.globalConfigManager.getEnvConfigValue(env_namespaceObject.OPENAI_MAX_TOKENS);
96
- const debugCall = (0, logger_namespaceObject.getDebug)('ai:call');
97
- const debugProfileStats = (0, logger_namespaceObject.getDebug)('ai:profile:stats');
98
- const debugProfileDetail = (0, logger_namespaceObject.getDebug)('ai:profile:detail');
99
- const startTime = Date.now();
100
- const isStreaming = (null == options ? void 0 : options.stream) && (null == options ? void 0 : options.onChunk);
101
- let content;
102
- let accumulated = '';
103
- let usage;
104
- let timeCost;
105
- const commonConfig = {
106
- temperature: 'vlm-ui-tars' === vlMode ? 0.0 : 0.1,
107
- stream: !!isStreaming,
108
- max_tokens: 'number' == typeof maxTokens ? maxTokens : Number.parseInt(maxTokens || '2048', 10),
109
- ...'qwen-vl' === vlMode ? {
110
- vl_high_resolution_images: true
111
- } : {}
112
- };
113
- try {
114
- debugCall(`sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`);
115
- if (isStreaming) {
116
- const stream = await completion.create({
117
- model: modelName,
118
- messages,
119
- response_format: responseFormat,
120
- ...commonConfig
121
- }, {
122
- stream: true
61
+ const external_types_js_namespaceObject = require("../../types.js");
62
+ const sdk_namespaceObject = require("@anthropic-ai/sdk");
63
+ const identity_namespaceObject = require("@azure/identity");
64
+ const env_namespaceObject = require("@midscene/shared/env");
65
+ const img_namespaceObject = require("@midscene/shared/img");
66
+ const logger_namespaceObject = require("@midscene/shared/logger");
67
+ const utils_namespaceObject = require("@midscene/shared/utils");
68
+ const external_https_proxy_agent_namespaceObject = require("https-proxy-agent");
69
+ const external_jsonrepair_namespaceObject = require("jsonrepair");
70
+ const external_openai_namespaceObject = require("openai");
71
+ var external_openai_default = /*#__PURE__*/ __webpack_require__.n(external_openai_namespaceObject);
72
+ const external_socks_proxy_agent_namespaceObject = require("socks-proxy-agent");
73
+ const external_common_js_namespaceObject = require("../common.js");
74
+ const assertion_js_namespaceObject = require("../prompt/assertion.js");
75
+ const llm_locator_js_namespaceObject = require("../prompt/llm-locator.js");
76
+ const llm_planning_js_namespaceObject = require("../prompt/llm-planning.js");
77
+ async function createChatClient({ AIActionTypeValue, modelConfig }) {
78
+ const { socksProxy, httpProxy, modelName, openaiBaseURL, openaiApiKey, openaiExtraConfig, openaiUseAzureDeprecated, useAzureOpenai, azureOpenaiScope, azureOpenaiKey, azureOpenaiEndpoint, azureOpenaiApiVersion, azureOpenaiDeployment, azureExtraConfig, useAnthropicSdk, anthropicApiKey, modelDescription, uiTarsModelVersion: uiTarsVersion, vlMode } = modelConfig;
79
+ let openai;
80
+ let proxyAgent;
81
+ const debugProxy = (0, logger_namespaceObject.getDebug)('ai:call:proxy');
82
+ if (httpProxy) {
83
+ debugProxy('using http proxy', httpProxy);
84
+ proxyAgent = new external_https_proxy_agent_namespaceObject.HttpsProxyAgent(httpProxy);
85
+ } else if (socksProxy) {
86
+ debugProxy('using socks proxy', socksProxy);
87
+ proxyAgent = new external_socks_proxy_agent_namespaceObject.SocksProxyAgent(socksProxy);
88
+ }
89
+ if (openaiUseAzureDeprecated) openai = new external_openai_namespaceObject.AzureOpenAI({
90
+ baseURL: openaiBaseURL,
91
+ apiKey: openaiApiKey,
92
+ httpAgent: proxyAgent,
93
+ ...openaiExtraConfig,
94
+ dangerouslyAllowBrowser: true
95
+ });
96
+ else if (useAzureOpenai) {
97
+ let tokenProvider;
98
+ if (azureOpenaiScope) {
99
+ (0, utils_namespaceObject.assert)(!utils_namespaceObject.ifInBrowser, 'Azure OpenAI is not supported in browser with Midscene.');
100
+ const credential = new identity_namespaceObject.DefaultAzureCredential();
101
+ tokenProvider = (0, identity_namespaceObject.getBearerTokenProvider)(credential, azureOpenaiScope);
102
+ openai = new external_openai_namespaceObject.AzureOpenAI({
103
+ azureADTokenProvider: tokenProvider,
104
+ endpoint: azureOpenaiEndpoint,
105
+ apiVersion: azureOpenaiApiVersion,
106
+ deployment: azureOpenaiDeployment,
107
+ ...openaiExtraConfig,
108
+ ...azureExtraConfig
109
+ });
110
+ } else openai = new external_openai_namespaceObject.AzureOpenAI({
111
+ apiKey: azureOpenaiKey,
112
+ endpoint: azureOpenaiEndpoint,
113
+ apiVersion: azureOpenaiApiVersion,
114
+ deployment: azureOpenaiDeployment,
115
+ dangerouslyAllowBrowser: true,
116
+ ...openaiExtraConfig,
117
+ ...azureExtraConfig
123
118
  });
124
- for await (const chunk of stream){
125
- var _chunk_choices__delta, _chunk_choices_, _chunk_choices, _chunk_choices__delta1, _chunk_choices_1, _chunk_choices1, _chunk_choices_2, _chunk_choices2;
126
- const content = (null == (_chunk_choices = chunk.choices) ? void 0 : null == (_chunk_choices_ = _chunk_choices[0]) ? void 0 : null == (_chunk_choices__delta = _chunk_choices_.delta) ? void 0 : _chunk_choices__delta.content) || '';
127
- const reasoning_content = (null == (_chunk_choices1 = chunk.choices) ? void 0 : null == (_chunk_choices_1 = _chunk_choices1[0]) ? void 0 : null == (_chunk_choices__delta1 = _chunk_choices_1.delta) ? void 0 : _chunk_choices__delta1.reasoning_content) || '';
128
- if (chunk.usage) usage = chunk.usage;
129
- if (content || reasoning_content) {
130
- accumulated += content;
131
- const chunkData = {
132
- content,
133
- reasoning_content,
134
- accumulated,
135
- isComplete: false,
136
- usage: void 0
137
- };
138
- options.onChunk(chunkData);
139
- }
140
- if (null == (_chunk_choices2 = chunk.choices) ? void 0 : null == (_chunk_choices_2 = _chunk_choices2[0]) ? void 0 : _chunk_choices_2.finish_reason) {
119
+ } else if (!useAnthropicSdk) openai = new (external_openai_default())({
120
+ baseURL: openaiBaseURL,
121
+ apiKey: openaiApiKey,
122
+ httpAgent: proxyAgent,
123
+ ...openaiExtraConfig,
124
+ defaultHeaders: {
125
+ ...(null == openaiExtraConfig ? void 0 : openaiExtraConfig.defaultHeaders) || {},
126
+ [env_namespaceObject.MIDSCENE_API_TYPE]: AIActionTypeValue.toString()
127
+ },
128
+ dangerouslyAllowBrowser: true
129
+ });
130
+ if (openai && env_namespaceObject.globalConfigManager.getEnvConfigInBoolean(env_namespaceObject.MIDSCENE_LANGSMITH_DEBUG)) {
131
+ if (utils_namespaceObject.ifInBrowser) throw new Error('langsmith is not supported in browser');
132
+ console.log('DEBUGGING MODE: langsmith wrapper enabled');
133
+ const { wrapOpenAI } = await Promise.resolve().then(__webpack_require__.bind(__webpack_require__, "langsmith/wrappers"));
134
+ openai = wrapOpenAI(openai);
135
+ }
136
+ if (void 0 !== openai) return {
137
+ completion: openai.chat.completions,
138
+ style: 'openai',
139
+ modelName,
140
+ modelDescription,
141
+ uiTarsVersion,
142
+ vlMode
143
+ };
144
+ if (useAnthropicSdk) openai = new sdk_namespaceObject.Anthropic({
145
+ apiKey: anthropicApiKey,
146
+ httpAgent: proxyAgent,
147
+ dangerouslyAllowBrowser: true
148
+ });
149
+ if (void 0 !== openai && openai.messages) return {
150
+ completion: openai.messages,
151
+ style: 'anthropic',
152
+ modelName,
153
+ modelDescription,
154
+ uiTarsVersion,
155
+ vlMode
156
+ };
157
+ throw new Error('Openai SDK or Anthropic SDK is not initialized');
158
+ }
159
+ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
160
+ const { completion, style, modelName, modelDescription, uiTarsVersion, vlMode } = await createChatClient({
161
+ AIActionTypeValue,
162
+ modelConfig
163
+ });
164
+ const responseFormat = getResponseFormat(modelName, AIActionTypeValue);
165
+ const maxTokens = env_namespaceObject.globalConfigManager.getEnvConfigValue(env_namespaceObject.OPENAI_MAX_TOKENS);
166
+ const debugCall = (0, logger_namespaceObject.getDebug)('ai:call');
167
+ const debugProfileStats = (0, logger_namespaceObject.getDebug)('ai:profile:stats');
168
+ const debugProfileDetail = (0, logger_namespaceObject.getDebug)('ai:profile:detail');
169
+ const startTime = Date.now();
170
+ const isStreaming = (null == options ? void 0 : options.stream) && (null == options ? void 0 : options.onChunk);
171
+ let content;
172
+ let accumulated = '';
173
+ let usage;
174
+ let timeCost;
175
+ const commonConfig = {
176
+ temperature: 'vlm-ui-tars' === vlMode ? 0.0 : 0.1,
177
+ stream: !!isStreaming,
178
+ max_tokens: 'number' == typeof maxTokens ? maxTokens : Number.parseInt(maxTokens || '2048', 10),
179
+ ...'qwen-vl' === vlMode || 'qwen3-vl' === vlMode ? {
180
+ vl_high_resolution_images: true
181
+ } : {}
182
+ };
183
+ try {
184
+ if ('openai' === style) {
185
+ debugCall(`sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`);
186
+ if (isStreaming) {
187
+ const stream = await completion.create({
188
+ model: modelName,
189
+ messages,
190
+ response_format: responseFormat,
191
+ ...commonConfig
192
+ }, {
193
+ stream: true
194
+ });
195
+ for await (const chunk of stream){
196
+ var _chunk_choices__delta, _chunk_choices_, _chunk_choices, _chunk_choices__delta1, _chunk_choices_1, _chunk_choices1, _chunk_choices_2, _chunk_choices2;
197
+ const content = (null == (_chunk_choices = chunk.choices) ? void 0 : null == (_chunk_choices_ = _chunk_choices[0]) ? void 0 : null == (_chunk_choices__delta = _chunk_choices_.delta) ? void 0 : _chunk_choices__delta.content) || '';
198
+ const reasoning_content = (null == (_chunk_choices1 = chunk.choices) ? void 0 : null == (_chunk_choices_1 = _chunk_choices1[0]) ? void 0 : null == (_chunk_choices__delta1 = _chunk_choices_1.delta) ? void 0 : _chunk_choices__delta1.reasoning_content) || '';
199
+ if (chunk.usage) usage = chunk.usage;
200
+ if (content || reasoning_content) {
201
+ accumulated += content;
202
+ const chunkData = {
203
+ content,
204
+ reasoning_content,
205
+ accumulated,
206
+ isComplete: false,
207
+ usage: void 0
208
+ };
209
+ options.onChunk(chunkData);
210
+ }
211
+ if (null == (_chunk_choices2 = chunk.choices) ? void 0 : null == (_chunk_choices_2 = _chunk_choices2[0]) ? void 0 : _chunk_choices_2.finish_reason) {
212
+ timeCost = Date.now() - startTime;
213
+ if (!usage) {
214
+ const estimatedTokens = Math.max(1, Math.floor(accumulated.length / 4));
215
+ usage = {
216
+ prompt_tokens: estimatedTokens,
217
+ completion_tokens: estimatedTokens,
218
+ total_tokens: 2 * estimatedTokens
219
+ };
220
+ }
221
+ const finalChunk = {
222
+ content: '',
223
+ accumulated,
224
+ reasoning_content: '',
225
+ isComplete: true,
226
+ usage: {
227
+ prompt_tokens: usage.prompt_tokens ?? 0,
228
+ completion_tokens: usage.completion_tokens ?? 0,
229
+ total_tokens: usage.total_tokens ?? 0,
230
+ time_cost: timeCost ?? 0,
231
+ model_name: modelName,
232
+ model_description: modelDescription,
233
+ intent: modelConfig.intent
234
+ }
235
+ };
236
+ options.onChunk(finalChunk);
237
+ break;
238
+ }
239
+ }
240
+ content = accumulated;
241
+ debugProfileStats(`streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}`);
242
+ } else {
243
+ var _result_usage, _result_usage1, _result_usage2;
244
+ const result = await completion.create({
245
+ model: modelName,
246
+ messages,
247
+ response_format: responseFormat,
248
+ ...commonConfig
249
+ });
141
250
  timeCost = Date.now() - startTime;
142
- if (!usage) {
143
- const estimatedTokens = Math.max(1, Math.floor(accumulated.length / 4));
144
- usage = {
145
- prompt_tokens: estimatedTokens,
146
- completion_tokens: estimatedTokens,
147
- total_tokens: 2 * estimatedTokens
251
+ debugProfileStats(`model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${(null == (_result_usage = result.usage) ? void 0 : _result_usage.prompt_tokens) || ''}, completion-tokens, ${(null == (_result_usage1 = result.usage) ? void 0 : _result_usage1.completion_tokens) || ''}, total-tokens, ${(null == (_result_usage2 = result.usage) ? void 0 : _result_usage2.total_tokens) || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`);
252
+ debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);
253
+ (0, utils_namespaceObject.assert)(result.choices, `invalid response from LLM service: ${JSON.stringify(result)}`);
254
+ content = result.choices[0].message.content;
255
+ usage = result.usage;
256
+ }
257
+ debugCall(`response: ${content}`);
258
+ (0, utils_namespaceObject.assert)(content, 'empty content');
259
+ } else if ('anthropic' === style) {
260
+ const convertImageContent = (content)=>{
261
+ if ('image_url' === content.type) {
262
+ const imgBase64 = content.image_url.url;
263
+ (0, utils_namespaceObject.assert)(imgBase64, 'image_url is required');
264
+ const { mimeType, body } = (0, img_namespaceObject.parseBase64)(content.image_url.url);
265
+ return {
266
+ source: {
267
+ type: 'base64',
268
+ media_type: mimeType,
269
+ data: body
270
+ },
271
+ type: 'image'
148
272
  };
149
273
  }
150
- const finalChunk = {
151
- content: '',
152
- accumulated,
153
- reasoning_content: '',
154
- isComplete: true,
155
- usage: {
156
- prompt_tokens: usage.prompt_tokens ?? 0,
157
- completion_tokens: usage.completion_tokens ?? 0,
158
- total_tokens: usage.total_tokens ?? 0,
159
- time_cost: timeCost ?? 0,
160
- model_name: modelName,
161
- model_description: modelDescription,
162
- intent: modelConfig.intent
274
+ return content;
275
+ };
276
+ if (isStreaming) {
277
+ const stream = await completion.create({
278
+ model: modelName,
279
+ system: 'You are a versatile professional in software UI automation',
280
+ messages: messages.map((m)=>({
281
+ role: 'user',
282
+ content: Array.isArray(m.content) ? m.content.map(convertImageContent) : m.content
283
+ })),
284
+ response_format: responseFormat,
285
+ ...commonConfig
286
+ });
287
+ for await (const chunk of stream){
288
+ var _chunk_delta;
289
+ const content = (null == (_chunk_delta = chunk.delta) ? void 0 : _chunk_delta.text) || '';
290
+ if (content) {
291
+ accumulated += content;
292
+ const chunkData = {
293
+ content,
294
+ accumulated,
295
+ reasoning_content: '',
296
+ isComplete: false,
297
+ usage: void 0
298
+ };
299
+ options.onChunk(chunkData);
163
300
  }
164
- };
165
- options.onChunk(finalChunk);
166
- break;
301
+ if ('message_stop' === chunk.type) {
302
+ timeCost = Date.now() - startTime;
303
+ const anthropicUsage = chunk.usage;
304
+ const finalChunk = {
305
+ content: '',
306
+ accumulated,
307
+ reasoning_content: '',
308
+ isComplete: true,
309
+ usage: anthropicUsage ? {
310
+ prompt_tokens: anthropicUsage.input_tokens ?? 0,
311
+ completion_tokens: anthropicUsage.output_tokens ?? 0,
312
+ total_tokens: (anthropicUsage.input_tokens ?? 0) + (anthropicUsage.output_tokens ?? 0),
313
+ time_cost: timeCost ?? 0,
314
+ model_name: modelName,
315
+ model_description: modelDescription,
316
+ intent: modelConfig.intent
317
+ } : void 0
318
+ };
319
+ options.onChunk(finalChunk);
320
+ break;
321
+ }
322
+ }
323
+ content = accumulated;
324
+ } else {
325
+ const result = await completion.create({
326
+ model: modelName,
327
+ system: 'You are a versatile professional in software UI automation',
328
+ messages: messages.map((m)=>({
329
+ role: 'user',
330
+ content: Array.isArray(m.content) ? m.content.map(convertImageContent) : m.content
331
+ })),
332
+ response_format: responseFormat,
333
+ ...commonConfig
334
+ });
335
+ timeCost = Date.now() - startTime;
336
+ content = result.content[0].text;
337
+ usage = result.usage;
167
338
  }
339
+ (0, utils_namespaceObject.assert)(content, 'empty content');
340
+ }
341
+ if (isStreaming && !usage) {
342
+ const estimatedTokens = Math.max(1, Math.floor((content || '').length / 4));
343
+ usage = {
344
+ prompt_tokens: estimatedTokens,
345
+ completion_tokens: estimatedTokens,
346
+ total_tokens: 2 * estimatedTokens
347
+ };
168
348
  }
169
- content = accumulated;
170
- debugProfileStats(`streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}`);
171
- } else {
172
- var _result_usage, _result_usage1, _result_usage2;
173
- const result = await completion.create({
174
- model: modelName,
175
- messages,
176
- response_format: responseFormat,
177
- ...commonConfig
349
+ return {
350
+ content: content || '',
351
+ usage: usage ? {
352
+ prompt_tokens: usage.prompt_tokens ?? 0,
353
+ completion_tokens: usage.completion_tokens ?? 0,
354
+ total_tokens: usage.total_tokens ?? 0,
355
+ time_cost: timeCost ?? 0,
356
+ model_name: modelName,
357
+ model_description: modelDescription,
358
+ intent: modelConfig.intent
359
+ } : void 0,
360
+ isStreamed: !!isStreaming
361
+ };
362
+ } catch (e) {
363
+ console.error(' call AI error', e);
364
+ const newError = new Error(`failed to call ${isStreaming ? 'streaming ' : ''}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`, {
365
+ cause: e
178
366
  });
179
- timeCost = Date.now() - startTime;
180
- debugProfileStats(`model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${(null == (_result_usage = result.usage) ? void 0 : _result_usage.prompt_tokens) || ''}, completion-tokens, ${(null == (_result_usage1 = result.usage) ? void 0 : _result_usage1.completion_tokens) || ''}, total-tokens, ${(null == (_result_usage2 = result.usage) ? void 0 : _result_usage2.total_tokens) || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`);
181
- debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);
182
- (0, utils_namespaceObject.assert)(result.choices, `invalid response from LLM service: ${JSON.stringify(result)}`);
183
- content = result.choices[0].message.content;
184
- usage = result.usage;
367
+ throw newError;
185
368
  }
186
- debugCall(`response: ${content}`);
187
- (0, utils_namespaceObject.assert)(content, 'empty content');
188
- if (isStreaming && !usage) {
189
- const estimatedTokens = Math.max(1, Math.floor((content || '').length / 4));
190
- usage = {
191
- prompt_tokens: estimatedTokens,
192
- completion_tokens: estimatedTokens,
193
- total_tokens: 2 * estimatedTokens
194
- };
369
+ }
370
+ const getResponseFormat = (modelName, AIActionTypeValue)=>{
371
+ let responseFormat;
372
+ if (modelName.includes('gpt-4')) switch(AIActionTypeValue){
373
+ case external_common_js_namespaceObject.AIActionType.ASSERT:
374
+ responseFormat = assertion_js_namespaceObject.assertSchema;
375
+ break;
376
+ case external_common_js_namespaceObject.AIActionType.INSPECT_ELEMENT:
377
+ responseFormat = llm_locator_js_namespaceObject.locatorSchema;
378
+ break;
379
+ case external_common_js_namespaceObject.AIActionType.PLAN:
380
+ responseFormat = llm_planning_js_namespaceObject.planSchema;
381
+ break;
382
+ case external_common_js_namespaceObject.AIActionType.EXTRACT_DATA:
383
+ case external_common_js_namespaceObject.AIActionType.DESCRIBE_ELEMENT:
384
+ responseFormat = {
385
+ type: external_types_js_namespaceObject.AIResponseFormat.JSON
386
+ };
387
+ break;
388
+ case external_common_js_namespaceObject.AIActionType.TEXT:
389
+ responseFormat = void 0;
390
+ break;
195
391
  }
392
+ if ('gpt-4o-2024-05-13' === modelName && AIActionTypeValue !== external_common_js_namespaceObject.AIActionType.TEXT) responseFormat = {
393
+ type: external_types_js_namespaceObject.AIResponseFormat.JSON
394
+ };
395
+ return responseFormat;
396
+ };
397
+ async function callAIWithObjectResponse(messages, AIActionTypeValue, modelConfig) {
398
+ const response = await callAI(messages, AIActionTypeValue, modelConfig);
399
+ (0, utils_namespaceObject.assert)(response, 'empty response');
400
+ const vlMode = modelConfig.vlMode;
401
+ const jsonContent = safeParseJson(response.content, vlMode);
196
402
  return {
197
- content: content || '',
198
- usage: usage ? {
199
- prompt_tokens: usage.prompt_tokens ?? 0,
200
- completion_tokens: usage.completion_tokens ?? 0,
201
- total_tokens: usage.total_tokens ?? 0,
202
- time_cost: timeCost ?? 0,
203
- model_name: modelName,
204
- model_description: modelDescription,
205
- intent: modelConfig.intent
206
- } : void 0,
207
- isStreamed: !!isStreaming
403
+ content: jsonContent,
404
+ usage: response.usage
208
405
  };
209
- } catch (e) {
210
- console.error(' call AI error', e);
211
- const newError = new Error(`failed to call ${isStreaming ? 'streaming ' : ''}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`, {
212
- cause: e
213
- });
214
- throw newError;
215
406
  }
216
- }
217
- const getResponseFormat = (modelName, AIActionTypeValue)=>{
218
- let responseFormat;
219
- if (modelName.includes('gpt-4')) switch(AIActionTypeValue){
220
- case external_common_js_namespaceObject.AIActionType.ASSERT:
221
- responseFormat = assertion_js_namespaceObject.assertSchema;
222
- break;
223
- case external_common_js_namespaceObject.AIActionType.INSPECT_ELEMENT:
224
- responseFormat = llm_locator_js_namespaceObject.locatorSchema;
225
- break;
226
- case external_common_js_namespaceObject.AIActionType.PLAN:
227
- responseFormat = llm_planning_js_namespaceObject.planSchema;
228
- break;
229
- case external_common_js_namespaceObject.AIActionType.EXTRACT_DATA:
230
- case external_common_js_namespaceObject.AIActionType.DESCRIBE_ELEMENT:
231
- responseFormat = {
232
- type: external_types_js_namespaceObject.AIResponseFormat.JSON
233
- };
234
- break;
235
- case external_common_js_namespaceObject.AIActionType.TEXT:
236
- responseFormat = void 0;
237
- break;
407
+ async function callAIWithStringResponse(msgs, AIActionTypeValue, modelConfig) {
408
+ const { content, usage } = await callAI(msgs, AIActionTypeValue, modelConfig);
409
+ return {
410
+ content,
411
+ usage
412
+ };
238
413
  }
239
- if ('gpt-4o-2024-05-13' === modelName && AIActionTypeValue !== external_common_js_namespaceObject.AIActionType.TEXT) responseFormat = {
240
- type: external_types_js_namespaceObject.AIResponseFormat.JSON
241
- };
242
- return responseFormat;
243
- };
244
- async function callAIWithObjectResponse(messages, AIActionTypeValue, modelConfig) {
245
- const response = await callAI(messages, AIActionTypeValue, modelConfig);
246
- (0, utils_namespaceObject.assert)(response, 'empty response');
247
- const vlMode = modelConfig.vlMode;
248
- const jsonContent = safeParseJson(response.content, vlMode);
249
- return {
250
- content: jsonContent,
251
- usage: response.usage
252
- };
253
- }
254
- async function callAIWithStringResponse(msgs, AIActionTypeValue, modelConfig) {
255
- const { content, usage } = await callAI(msgs, AIActionTypeValue, modelConfig);
256
- return {
257
- content,
258
- usage
259
- };
260
- }
261
- function extractJSONFromCodeBlock(response) {
262
- try {
263
- const jsonMatch = response.match(/^\s*(\{[\s\S]*\})\s*$/);
264
- if (jsonMatch) return jsonMatch[1];
265
- const codeBlockMatch = response.match(/```(?:json)?\s*(\{[\s\S]*?\})\s*```/);
266
- if (codeBlockMatch) return codeBlockMatch[1];
267
- const jsonLikeMatch = response.match(/\{[\s\S]*\}/);
268
- if (jsonLikeMatch) return jsonLikeMatch[0];
269
- } catch {}
270
- return response;
271
- }
272
- function preprocessDoubaoBboxJson(input) {
273
- if (input.includes('bbox')) while(/\d+\s+\d+/.test(input))input = input.replace(/(\d+)\s+(\d+)/g, '$1,$2');
274
- return input;
275
- }
276
- function safeParseJson(input, vlMode) {
277
- const cleanJsonString = extractJSONFromCodeBlock(input);
278
- if (null == cleanJsonString ? void 0 : cleanJsonString.match(/\((\d+),(\d+)\)/)) {
279
- var _cleanJsonString_match;
280
- return null == (_cleanJsonString_match = cleanJsonString.match(/\((\d+),(\d+)\)/)) ? void 0 : _cleanJsonString_match.slice(1).map(Number);
414
+ function extractJSONFromCodeBlock(response) {
415
+ try {
416
+ const jsonMatch = response.match(/^\s*(\{[\s\S]*\})\s*$/);
417
+ if (jsonMatch) return jsonMatch[1];
418
+ const codeBlockMatch = response.match(/```(?:json)?\s*(\{[\s\S]*?\})\s*```/);
419
+ if (codeBlockMatch) return codeBlockMatch[1];
420
+ const jsonLikeMatch = response.match(/\{[\s\S]*\}/);
421
+ if (jsonLikeMatch) return jsonLikeMatch[0];
422
+ } catch {}
423
+ return response;
281
424
  }
282
- try {
283
- return JSON.parse(cleanJsonString);
284
- } catch {}
285
- try {
286
- return JSON.parse((0, external_jsonrepair_namespaceObject.jsonrepair)(cleanJsonString));
287
- } catch (e) {}
288
- if ('doubao-vision' === vlMode || 'vlm-ui-tars' === vlMode) {
289
- const jsonString = preprocessDoubaoBboxJson(cleanJsonString);
290
- return JSON.parse((0, external_jsonrepair_namespaceObject.jsonrepair)(jsonString));
425
+ function preprocessDoubaoBboxJson(input) {
426
+ if (input.includes('bbox')) while(/\d+\s+\d+/.test(input))input = input.replace(/(\d+)\s+(\d+)/g, '$1,$2');
427
+ return input;
291
428
  }
292
- throw Error(`failed to parse json response: ${input}`);
293
- }
429
+ function safeParseJson(input, vlMode) {
430
+ const cleanJsonString = extractJSONFromCodeBlock(input);
431
+ if (null == cleanJsonString ? void 0 : cleanJsonString.match(/\((\d+),(\d+)\)/)) {
432
+ var _cleanJsonString_match;
433
+ return null == (_cleanJsonString_match = cleanJsonString.match(/\((\d+),(\d+)\)/)) ? void 0 : _cleanJsonString_match.slice(1).map(Number);
434
+ }
435
+ try {
436
+ return JSON.parse(cleanJsonString);
437
+ } catch {}
438
+ try {
439
+ return JSON.parse((0, external_jsonrepair_namespaceObject.jsonrepair)(cleanJsonString));
440
+ } catch (e) {}
441
+ if ('doubao-vision' === vlMode || 'vlm-ui-tars' === vlMode) {
442
+ const jsonString = preprocessDoubaoBboxJson(cleanJsonString);
443
+ return JSON.parse((0, external_jsonrepair_namespaceObject.jsonrepair)(jsonString));
444
+ }
445
+ throw Error(`failed to parse json response: ${input}`);
446
+ }
447
+ })();
294
448
  exports.callAI = __webpack_exports__.callAI;
295
449
  exports.callAIWithObjectResponse = __webpack_exports__.callAIWithObjectResponse;
296
450
  exports.callAIWithStringResponse = __webpack_exports__.callAIWithStringResponse;