@midscene/core 0.30.5-beta-20251017073249.0 → 0.30.5-beta-20251020035347.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +9 -3
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +1 -1
- package/dist/es/ai-model/llm-planning.mjs +5 -22
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +78 -214
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/es/yaml/player.mjs +10 -8
- package/dist/es/yaml/player.mjs.map +1 -1
- package/dist/lib/agent/agent.js +9 -3
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/utils.js +1 -1
- package/dist/lib/ai-model/llm-planning.js +4 -21
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +244 -398
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/lib/yaml/player.js +10 -8
- package/dist/lib/yaml/player.js.map +1 -1
- package/dist/types/agent/agent.d.ts +16 -0
- package/dist/types/yaml.d.ts +3 -1
- package/package.json +4 -7
|
@@ -1,21 +1,5 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var
|
|
3
|
-
"langsmith/wrappers": function(module) {
|
|
4
|
-
module.exports = import("langsmith/wrappers").then(function(module) {
|
|
5
|
-
return module;
|
|
6
|
-
});
|
|
7
|
-
}
|
|
8
|
-
};
|
|
9
|
-
var __webpack_module_cache__ = {};
|
|
10
|
-
function __webpack_require__(moduleId) {
|
|
11
|
-
var cachedModule = __webpack_module_cache__[moduleId];
|
|
12
|
-
if (void 0 !== cachedModule) return cachedModule.exports;
|
|
13
|
-
var module = __webpack_module_cache__[moduleId] = {
|
|
14
|
-
exports: {}
|
|
15
|
-
};
|
|
16
|
-
__webpack_modules__[moduleId](module, module.exports, __webpack_require__);
|
|
17
|
-
return module.exports;
|
|
18
|
-
}
|
|
2
|
+
var __webpack_require__ = {};
|
|
19
3
|
(()=>{
|
|
20
4
|
__webpack_require__.n = (module)=>{
|
|
21
5
|
var getter = module && module.__esModule ? ()=>module['default'] : ()=>module;
|
|
@@ -47,404 +31,266 @@ function __webpack_require__(moduleId) {
|
|
|
47
31
|
};
|
|
48
32
|
})();
|
|
49
33
|
var __webpack_exports__ = {};
|
|
50
|
-
(
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
34
|
+
__webpack_require__.r(__webpack_exports__);
|
|
35
|
+
__webpack_require__.d(__webpack_exports__, {
|
|
36
|
+
extractJSONFromCodeBlock: ()=>extractJSONFromCodeBlock,
|
|
37
|
+
callAIWithStringResponse: ()=>callAIWithStringResponse,
|
|
38
|
+
preprocessDoubaoBboxJson: ()=>preprocessDoubaoBboxJson,
|
|
39
|
+
callAIWithObjectResponse: ()=>callAIWithObjectResponse,
|
|
40
|
+
getResponseFormat: ()=>getResponseFormat,
|
|
41
|
+
safeParseJson: ()=>safeParseJson,
|
|
42
|
+
callAI: ()=>callAI
|
|
43
|
+
});
|
|
44
|
+
const external_types_js_namespaceObject = require("../../types.js");
|
|
45
|
+
const env_namespaceObject = require("@midscene/shared/env");
|
|
46
|
+
const logger_namespaceObject = require("@midscene/shared/logger");
|
|
47
|
+
const utils_namespaceObject = require("@midscene/shared/utils");
|
|
48
|
+
const external_https_proxy_agent_namespaceObject = require("https-proxy-agent");
|
|
49
|
+
const external_jsonrepair_namespaceObject = require("jsonrepair");
|
|
50
|
+
const external_openai_namespaceObject = require("openai");
|
|
51
|
+
var external_openai_default = /*#__PURE__*/ __webpack_require__.n(external_openai_namespaceObject);
|
|
52
|
+
const external_socks_proxy_agent_namespaceObject = require("socks-proxy-agent");
|
|
53
|
+
const external_common_js_namespaceObject = require("../common.js");
|
|
54
|
+
const assertion_js_namespaceObject = require("../prompt/assertion.js");
|
|
55
|
+
const llm_locator_js_namespaceObject = require("../prompt/llm-locator.js");
|
|
56
|
+
const llm_planning_js_namespaceObject = require("../prompt/llm-planning.js");
|
|
57
|
+
async function createChatClient({ AIActionTypeValue, modelConfig }) {
|
|
58
|
+
const { socksProxy, httpProxy, modelName, openaiBaseURL, openaiApiKey, openaiExtraConfig, modelDescription, uiTarsModelVersion: uiTarsVersion, vlMode } = modelConfig;
|
|
59
|
+
let proxyAgent;
|
|
60
|
+
const debugProxy = (0, logger_namespaceObject.getDebug)('ai:call:proxy');
|
|
61
|
+
if (httpProxy) {
|
|
62
|
+
debugProxy('using http proxy', httpProxy);
|
|
63
|
+
proxyAgent = new external_https_proxy_agent_namespaceObject.HttpsProxyAgent(httpProxy);
|
|
64
|
+
} else if (socksProxy) {
|
|
65
|
+
debugProxy('using socks proxy', socksProxy);
|
|
66
|
+
proxyAgent = new external_socks_proxy_agent_namespaceObject.SocksProxyAgent(socksProxy);
|
|
67
|
+
}
|
|
68
|
+
const openai = new (external_openai_default())({
|
|
69
|
+
baseURL: openaiBaseURL,
|
|
70
|
+
apiKey: openaiApiKey,
|
|
71
|
+
...proxyAgent ? {
|
|
72
|
+
httpAgent: proxyAgent
|
|
73
|
+
} : {},
|
|
74
|
+
...openaiExtraConfig,
|
|
75
|
+
defaultHeaders: {
|
|
76
|
+
...(null == openaiExtraConfig ? void 0 : openaiExtraConfig.defaultHeaders) || {},
|
|
77
|
+
[env_namespaceObject.MIDSCENE_API_TYPE]: AIActionTypeValue.toString()
|
|
78
|
+
},
|
|
79
|
+
dangerouslyAllowBrowser: true
|
|
60
80
|
});
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
const
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
const
|
|
75
|
-
const
|
|
76
|
-
const
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
azureADTokenProvider: tokenProvider,
|
|
104
|
-
endpoint: azureOpenaiEndpoint,
|
|
105
|
-
apiVersion: azureOpenaiApiVersion,
|
|
106
|
-
deployment: azureOpenaiDeployment,
|
|
107
|
-
...openaiExtraConfig,
|
|
108
|
-
...azureExtraConfig
|
|
109
|
-
});
|
|
110
|
-
} else openai = new external_openai_namespaceObject.AzureOpenAI({
|
|
111
|
-
apiKey: azureOpenaiKey,
|
|
112
|
-
endpoint: azureOpenaiEndpoint,
|
|
113
|
-
apiVersion: azureOpenaiApiVersion,
|
|
114
|
-
deployment: azureOpenaiDeployment,
|
|
115
|
-
dangerouslyAllowBrowser: true,
|
|
116
|
-
...openaiExtraConfig,
|
|
117
|
-
...azureExtraConfig
|
|
81
|
+
return {
|
|
82
|
+
completion: openai.chat.completions,
|
|
83
|
+
modelName,
|
|
84
|
+
modelDescription,
|
|
85
|
+
uiTarsVersion,
|
|
86
|
+
vlMode
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
async function callAI(messages, AIActionTypeValue, modelConfig, options) {
|
|
90
|
+
const { completion, modelName, modelDescription, uiTarsVersion, vlMode } = await createChatClient({
|
|
91
|
+
AIActionTypeValue,
|
|
92
|
+
modelConfig
|
|
93
|
+
});
|
|
94
|
+
const responseFormat = getResponseFormat(modelName, AIActionTypeValue);
|
|
95
|
+
const maxTokens = env_namespaceObject.globalConfigManager.getEnvConfigValue(env_namespaceObject.OPENAI_MAX_TOKENS);
|
|
96
|
+
const debugCall = (0, logger_namespaceObject.getDebug)('ai:call');
|
|
97
|
+
const debugProfileStats = (0, logger_namespaceObject.getDebug)('ai:profile:stats');
|
|
98
|
+
const debugProfileDetail = (0, logger_namespaceObject.getDebug)('ai:profile:detail');
|
|
99
|
+
const startTime = Date.now();
|
|
100
|
+
const isStreaming = (null == options ? void 0 : options.stream) && (null == options ? void 0 : options.onChunk);
|
|
101
|
+
let content;
|
|
102
|
+
let accumulated = '';
|
|
103
|
+
let usage;
|
|
104
|
+
let timeCost;
|
|
105
|
+
const commonConfig = {
|
|
106
|
+
temperature: 'vlm-ui-tars' === vlMode ? 0.0 : 0.1,
|
|
107
|
+
stream: !!isStreaming,
|
|
108
|
+
max_tokens: 'number' == typeof maxTokens ? maxTokens : Number.parseInt(maxTokens || '2048', 10),
|
|
109
|
+
...'qwen-vl' === vlMode ? {
|
|
110
|
+
vl_high_resolution_images: true
|
|
111
|
+
} : {}
|
|
112
|
+
};
|
|
113
|
+
try {
|
|
114
|
+
debugCall(`sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`);
|
|
115
|
+
if (isStreaming) {
|
|
116
|
+
const stream = await completion.create({
|
|
117
|
+
model: modelName,
|
|
118
|
+
messages,
|
|
119
|
+
response_format: responseFormat,
|
|
120
|
+
...commonConfig
|
|
121
|
+
}, {
|
|
122
|
+
stream: true
|
|
118
123
|
});
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
openai = wrapOpenAI(openai);
|
|
135
|
-
}
|
|
136
|
-
if (void 0 !== openai) return {
|
|
137
|
-
completion: openai.chat.completions,
|
|
138
|
-
style: 'openai',
|
|
139
|
-
modelName,
|
|
140
|
-
modelDescription,
|
|
141
|
-
uiTarsVersion,
|
|
142
|
-
vlMode
|
|
143
|
-
};
|
|
144
|
-
if (useAnthropicSdk) openai = new sdk_namespaceObject.Anthropic({
|
|
145
|
-
apiKey: anthropicApiKey,
|
|
146
|
-
httpAgent: proxyAgent,
|
|
147
|
-
dangerouslyAllowBrowser: true
|
|
148
|
-
});
|
|
149
|
-
if (void 0 !== openai && openai.messages) return {
|
|
150
|
-
completion: openai.messages,
|
|
151
|
-
style: 'anthropic',
|
|
152
|
-
modelName,
|
|
153
|
-
modelDescription,
|
|
154
|
-
uiTarsVersion,
|
|
155
|
-
vlMode
|
|
156
|
-
};
|
|
157
|
-
throw new Error('Openai SDK or Anthropic SDK is not initialized');
|
|
158
|
-
}
|
|
159
|
-
async function callAI(messages, AIActionTypeValue, modelConfig, options) {
|
|
160
|
-
const { completion, style, modelName, modelDescription, uiTarsVersion, vlMode } = await createChatClient({
|
|
161
|
-
AIActionTypeValue,
|
|
162
|
-
modelConfig
|
|
163
|
-
});
|
|
164
|
-
const responseFormat = getResponseFormat(modelName, AIActionTypeValue);
|
|
165
|
-
const maxTokens = env_namespaceObject.globalConfigManager.getEnvConfigValue(env_namespaceObject.OPENAI_MAX_TOKENS);
|
|
166
|
-
const debugCall = (0, logger_namespaceObject.getDebug)('ai:call');
|
|
167
|
-
const debugProfileStats = (0, logger_namespaceObject.getDebug)('ai:profile:stats');
|
|
168
|
-
const debugProfileDetail = (0, logger_namespaceObject.getDebug)('ai:profile:detail');
|
|
169
|
-
const startTime = Date.now();
|
|
170
|
-
const isStreaming = (null == options ? void 0 : options.stream) && (null == options ? void 0 : options.onChunk);
|
|
171
|
-
let content;
|
|
172
|
-
let accumulated = '';
|
|
173
|
-
let usage;
|
|
174
|
-
let timeCost;
|
|
175
|
-
const commonConfig = {
|
|
176
|
-
temperature: 'vlm-ui-tars' === vlMode ? 0.0 : 0.1,
|
|
177
|
-
stream: !!isStreaming,
|
|
178
|
-
max_tokens: 'number' == typeof maxTokens ? maxTokens : Number.parseInt(maxTokens || '2048', 10),
|
|
179
|
-
...'qwen-vl' === vlMode || 'qwen3-vl' === vlMode ? {
|
|
180
|
-
vl_high_resolution_images: true
|
|
181
|
-
} : {}
|
|
182
|
-
};
|
|
183
|
-
try {
|
|
184
|
-
if ('openai' === style) {
|
|
185
|
-
debugCall(`sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`);
|
|
186
|
-
if (isStreaming) {
|
|
187
|
-
const stream = await completion.create({
|
|
188
|
-
model: modelName,
|
|
189
|
-
messages,
|
|
190
|
-
response_format: responseFormat,
|
|
191
|
-
...commonConfig
|
|
192
|
-
}, {
|
|
193
|
-
stream: true
|
|
194
|
-
});
|
|
195
|
-
for await (const chunk of stream){
|
|
196
|
-
var _chunk_choices__delta, _chunk_choices_, _chunk_choices, _chunk_choices__delta1, _chunk_choices_1, _chunk_choices1, _chunk_choices_2, _chunk_choices2;
|
|
197
|
-
const content = (null == (_chunk_choices = chunk.choices) ? void 0 : null == (_chunk_choices_ = _chunk_choices[0]) ? void 0 : null == (_chunk_choices__delta = _chunk_choices_.delta) ? void 0 : _chunk_choices__delta.content) || '';
|
|
198
|
-
const reasoning_content = (null == (_chunk_choices1 = chunk.choices) ? void 0 : null == (_chunk_choices_1 = _chunk_choices1[0]) ? void 0 : null == (_chunk_choices__delta1 = _chunk_choices_1.delta) ? void 0 : _chunk_choices__delta1.reasoning_content) || '';
|
|
199
|
-
if (chunk.usage) usage = chunk.usage;
|
|
200
|
-
if (content || reasoning_content) {
|
|
201
|
-
accumulated += content;
|
|
202
|
-
const chunkData = {
|
|
203
|
-
content,
|
|
204
|
-
reasoning_content,
|
|
205
|
-
accumulated,
|
|
206
|
-
isComplete: false,
|
|
207
|
-
usage: void 0
|
|
208
|
-
};
|
|
209
|
-
options.onChunk(chunkData);
|
|
210
|
-
}
|
|
211
|
-
if (null == (_chunk_choices2 = chunk.choices) ? void 0 : null == (_chunk_choices_2 = _chunk_choices2[0]) ? void 0 : _chunk_choices_2.finish_reason) {
|
|
212
|
-
timeCost = Date.now() - startTime;
|
|
213
|
-
if (!usage) {
|
|
214
|
-
const estimatedTokens = Math.max(1, Math.floor(accumulated.length / 4));
|
|
215
|
-
usage = {
|
|
216
|
-
prompt_tokens: estimatedTokens,
|
|
217
|
-
completion_tokens: estimatedTokens,
|
|
218
|
-
total_tokens: 2 * estimatedTokens
|
|
219
|
-
};
|
|
220
|
-
}
|
|
221
|
-
const finalChunk = {
|
|
222
|
-
content: '',
|
|
223
|
-
accumulated,
|
|
224
|
-
reasoning_content: '',
|
|
225
|
-
isComplete: true,
|
|
226
|
-
usage: {
|
|
227
|
-
prompt_tokens: usage.prompt_tokens ?? 0,
|
|
228
|
-
completion_tokens: usage.completion_tokens ?? 0,
|
|
229
|
-
total_tokens: usage.total_tokens ?? 0,
|
|
230
|
-
time_cost: timeCost ?? 0,
|
|
231
|
-
model_name: modelName,
|
|
232
|
-
model_description: modelDescription,
|
|
233
|
-
intent: modelConfig.intent
|
|
234
|
-
}
|
|
235
|
-
};
|
|
236
|
-
options.onChunk(finalChunk);
|
|
237
|
-
break;
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
content = accumulated;
|
|
241
|
-
debugProfileStats(`streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}`);
|
|
242
|
-
} else {
|
|
243
|
-
var _result_usage, _result_usage1, _result_usage2;
|
|
244
|
-
const result = await completion.create({
|
|
245
|
-
model: modelName,
|
|
246
|
-
messages,
|
|
247
|
-
response_format: responseFormat,
|
|
248
|
-
...commonConfig
|
|
249
|
-
});
|
|
250
|
-
timeCost = Date.now() - startTime;
|
|
251
|
-
debugProfileStats(`model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${(null == (_result_usage = result.usage) ? void 0 : _result_usage.prompt_tokens) || ''}, completion-tokens, ${(null == (_result_usage1 = result.usage) ? void 0 : _result_usage1.completion_tokens) || ''}, total-tokens, ${(null == (_result_usage2 = result.usage) ? void 0 : _result_usage2.total_tokens) || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`);
|
|
252
|
-
debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);
|
|
253
|
-
(0, utils_namespaceObject.assert)(result.choices, `invalid response from LLM service: ${JSON.stringify(result)}`);
|
|
254
|
-
content = result.choices[0].message.content;
|
|
255
|
-
usage = result.usage;
|
|
124
|
+
for await (const chunk of stream){
|
|
125
|
+
var _chunk_choices__delta, _chunk_choices_, _chunk_choices, _chunk_choices__delta1, _chunk_choices_1, _chunk_choices1, _chunk_choices_2, _chunk_choices2;
|
|
126
|
+
const content = (null == (_chunk_choices = chunk.choices) ? void 0 : null == (_chunk_choices_ = _chunk_choices[0]) ? void 0 : null == (_chunk_choices__delta = _chunk_choices_.delta) ? void 0 : _chunk_choices__delta.content) || '';
|
|
127
|
+
const reasoning_content = (null == (_chunk_choices1 = chunk.choices) ? void 0 : null == (_chunk_choices_1 = _chunk_choices1[0]) ? void 0 : null == (_chunk_choices__delta1 = _chunk_choices_1.delta) ? void 0 : _chunk_choices__delta1.reasoning_content) || '';
|
|
128
|
+
if (chunk.usage) usage = chunk.usage;
|
|
129
|
+
if (content || reasoning_content) {
|
|
130
|
+
accumulated += content;
|
|
131
|
+
const chunkData = {
|
|
132
|
+
content,
|
|
133
|
+
reasoning_content,
|
|
134
|
+
accumulated,
|
|
135
|
+
isComplete: false,
|
|
136
|
+
usage: void 0
|
|
137
|
+
};
|
|
138
|
+
options.onChunk(chunkData);
|
|
256
139
|
}
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
return {
|
|
266
|
-
source: {
|
|
267
|
-
type: 'base64',
|
|
268
|
-
media_type: mimeType,
|
|
269
|
-
data: body
|
|
270
|
-
},
|
|
271
|
-
type: 'image'
|
|
140
|
+
if (null == (_chunk_choices2 = chunk.choices) ? void 0 : null == (_chunk_choices_2 = _chunk_choices2[0]) ? void 0 : _chunk_choices_2.finish_reason) {
|
|
141
|
+
timeCost = Date.now() - startTime;
|
|
142
|
+
if (!usage) {
|
|
143
|
+
const estimatedTokens = Math.max(1, Math.floor(accumulated.length / 4));
|
|
144
|
+
usage = {
|
|
145
|
+
prompt_tokens: estimatedTokens,
|
|
146
|
+
completion_tokens: estimatedTokens,
|
|
147
|
+
total_tokens: 2 * estimatedTokens
|
|
272
148
|
};
|
|
273
149
|
}
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
for await (const chunk of stream){
|
|
288
|
-
var _chunk_delta;
|
|
289
|
-
const content = (null == (_chunk_delta = chunk.delta) ? void 0 : _chunk_delta.text) || '';
|
|
290
|
-
if (content) {
|
|
291
|
-
accumulated += content;
|
|
292
|
-
const chunkData = {
|
|
293
|
-
content,
|
|
294
|
-
accumulated,
|
|
295
|
-
reasoning_content: '',
|
|
296
|
-
isComplete: false,
|
|
297
|
-
usage: void 0
|
|
298
|
-
};
|
|
299
|
-
options.onChunk(chunkData);
|
|
150
|
+
const finalChunk = {
|
|
151
|
+
content: '',
|
|
152
|
+
accumulated,
|
|
153
|
+
reasoning_content: '',
|
|
154
|
+
isComplete: true,
|
|
155
|
+
usage: {
|
|
156
|
+
prompt_tokens: usage.prompt_tokens ?? 0,
|
|
157
|
+
completion_tokens: usage.completion_tokens ?? 0,
|
|
158
|
+
total_tokens: usage.total_tokens ?? 0,
|
|
159
|
+
time_cost: timeCost ?? 0,
|
|
160
|
+
model_name: modelName,
|
|
161
|
+
model_description: modelDescription,
|
|
162
|
+
intent: modelConfig.intent
|
|
300
163
|
}
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
const finalChunk = {
|
|
305
|
-
content: '',
|
|
306
|
-
accumulated,
|
|
307
|
-
reasoning_content: '',
|
|
308
|
-
isComplete: true,
|
|
309
|
-
usage: anthropicUsage ? {
|
|
310
|
-
prompt_tokens: anthropicUsage.input_tokens ?? 0,
|
|
311
|
-
completion_tokens: anthropicUsage.output_tokens ?? 0,
|
|
312
|
-
total_tokens: (anthropicUsage.input_tokens ?? 0) + (anthropicUsage.output_tokens ?? 0),
|
|
313
|
-
time_cost: timeCost ?? 0,
|
|
314
|
-
model_name: modelName,
|
|
315
|
-
model_description: modelDescription,
|
|
316
|
-
intent: modelConfig.intent
|
|
317
|
-
} : void 0
|
|
318
|
-
};
|
|
319
|
-
options.onChunk(finalChunk);
|
|
320
|
-
break;
|
|
321
|
-
}
|
|
322
|
-
}
|
|
323
|
-
content = accumulated;
|
|
324
|
-
} else {
|
|
325
|
-
const result = await completion.create({
|
|
326
|
-
model: modelName,
|
|
327
|
-
system: 'You are a versatile professional in software UI automation',
|
|
328
|
-
messages: messages.map((m)=>({
|
|
329
|
-
role: 'user',
|
|
330
|
-
content: Array.isArray(m.content) ? m.content.map(convertImageContent) : m.content
|
|
331
|
-
})),
|
|
332
|
-
response_format: responseFormat,
|
|
333
|
-
...commonConfig
|
|
334
|
-
});
|
|
335
|
-
timeCost = Date.now() - startTime;
|
|
336
|
-
content = result.content[0].text;
|
|
337
|
-
usage = result.usage;
|
|
164
|
+
};
|
|
165
|
+
options.onChunk(finalChunk);
|
|
166
|
+
break;
|
|
338
167
|
}
|
|
339
|
-
(0, utils_namespaceObject.assert)(content, 'empty content');
|
|
340
|
-
}
|
|
341
|
-
if (isStreaming && !usage) {
|
|
342
|
-
const estimatedTokens = Math.max(1, Math.floor((content || '').length / 4));
|
|
343
|
-
usage = {
|
|
344
|
-
prompt_tokens: estimatedTokens,
|
|
345
|
-
completion_tokens: estimatedTokens,
|
|
346
|
-
total_tokens: 2 * estimatedTokens
|
|
347
|
-
};
|
|
348
168
|
}
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
intent: modelConfig.intent
|
|
359
|
-
} : void 0,
|
|
360
|
-
isStreamed: !!isStreaming
|
|
361
|
-
};
|
|
362
|
-
} catch (e) {
|
|
363
|
-
console.error(' call AI error', e);
|
|
364
|
-
const newError = new Error(`failed to call ${isStreaming ? 'streaming ' : ''}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`, {
|
|
365
|
-
cause: e
|
|
169
|
+
content = accumulated;
|
|
170
|
+
debugProfileStats(`streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}`);
|
|
171
|
+
} else {
|
|
172
|
+
var _result_usage, _result_usage1, _result_usage2;
|
|
173
|
+
const result = await completion.create({
|
|
174
|
+
model: modelName,
|
|
175
|
+
messages,
|
|
176
|
+
response_format: responseFormat,
|
|
177
|
+
...commonConfig
|
|
366
178
|
});
|
|
367
|
-
|
|
179
|
+
timeCost = Date.now() - startTime;
|
|
180
|
+
debugProfileStats(`model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${(null == (_result_usage = result.usage) ? void 0 : _result_usage.prompt_tokens) || ''}, completion-tokens, ${(null == (_result_usage1 = result.usage) ? void 0 : _result_usage1.completion_tokens) || ''}, total-tokens, ${(null == (_result_usage2 = result.usage) ? void 0 : _result_usage2.total_tokens) || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`);
|
|
181
|
+
debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);
|
|
182
|
+
(0, utils_namespaceObject.assert)(result.choices, `invalid response from LLM service: ${JSON.stringify(result)}`);
|
|
183
|
+
content = result.choices[0].message.content;
|
|
184
|
+
usage = result.usage;
|
|
368
185
|
}
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
break;
|
|
379
|
-
case external_common_js_namespaceObject.AIActionType.PLAN:
|
|
380
|
-
responseFormat = llm_planning_js_namespaceObject.planSchema;
|
|
381
|
-
break;
|
|
382
|
-
case external_common_js_namespaceObject.AIActionType.EXTRACT_DATA:
|
|
383
|
-
case external_common_js_namespaceObject.AIActionType.DESCRIBE_ELEMENT:
|
|
384
|
-
responseFormat = {
|
|
385
|
-
type: external_types_js_namespaceObject.AIResponseFormat.JSON
|
|
386
|
-
};
|
|
387
|
-
break;
|
|
388
|
-
case external_common_js_namespaceObject.AIActionType.TEXT:
|
|
389
|
-
responseFormat = void 0;
|
|
390
|
-
break;
|
|
186
|
+
debugCall(`response: ${content}`);
|
|
187
|
+
(0, utils_namespaceObject.assert)(content, 'empty content');
|
|
188
|
+
if (isStreaming && !usage) {
|
|
189
|
+
const estimatedTokens = Math.max(1, Math.floor((content || '').length / 4));
|
|
190
|
+
usage = {
|
|
191
|
+
prompt_tokens: estimatedTokens,
|
|
192
|
+
completion_tokens: estimatedTokens,
|
|
193
|
+
total_tokens: 2 * estimatedTokens
|
|
194
|
+
};
|
|
391
195
|
}
|
|
392
|
-
if ('gpt-4o-2024-05-13' === modelName && AIActionTypeValue !== external_common_js_namespaceObject.AIActionType.TEXT) responseFormat = {
|
|
393
|
-
type: external_types_js_namespaceObject.AIResponseFormat.JSON
|
|
394
|
-
};
|
|
395
|
-
return responseFormat;
|
|
396
|
-
};
|
|
397
|
-
async function callAIWithObjectResponse(messages, AIActionTypeValue, modelConfig) {
|
|
398
|
-
const response = await callAI(messages, AIActionTypeValue, modelConfig);
|
|
399
|
-
(0, utils_namespaceObject.assert)(response, 'empty response');
|
|
400
|
-
const vlMode = modelConfig.vlMode;
|
|
401
|
-
const jsonContent = safeParseJson(response.content, vlMode);
|
|
402
196
|
return {
|
|
403
|
-
content:
|
|
404
|
-
usage:
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
197
|
+
content: content || '',
|
|
198
|
+
usage: usage ? {
|
|
199
|
+
prompt_tokens: usage.prompt_tokens ?? 0,
|
|
200
|
+
completion_tokens: usage.completion_tokens ?? 0,
|
|
201
|
+
total_tokens: usage.total_tokens ?? 0,
|
|
202
|
+
time_cost: timeCost ?? 0,
|
|
203
|
+
model_name: modelName,
|
|
204
|
+
model_description: modelDescription,
|
|
205
|
+
intent: modelConfig.intent
|
|
206
|
+
} : void 0,
|
|
207
|
+
isStreamed: !!isStreaming
|
|
412
208
|
};
|
|
209
|
+
} catch (e) {
|
|
210
|
+
console.error(' call AI error', e);
|
|
211
|
+
const newError = new Error(`failed to call ${isStreaming ? 'streaming ' : ''}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`, {
|
|
212
|
+
cause: e
|
|
213
|
+
});
|
|
214
|
+
throw newError;
|
|
413
215
|
}
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
216
|
+
}
|
|
217
|
+
const getResponseFormat = (modelName, AIActionTypeValue)=>{
|
|
218
|
+
let responseFormat;
|
|
219
|
+
if (modelName.includes('gpt-4')) switch(AIActionTypeValue){
|
|
220
|
+
case external_common_js_namespaceObject.AIActionType.ASSERT:
|
|
221
|
+
responseFormat = assertion_js_namespaceObject.assertSchema;
|
|
222
|
+
break;
|
|
223
|
+
case external_common_js_namespaceObject.AIActionType.INSPECT_ELEMENT:
|
|
224
|
+
responseFormat = llm_locator_js_namespaceObject.locatorSchema;
|
|
225
|
+
break;
|
|
226
|
+
case external_common_js_namespaceObject.AIActionType.PLAN:
|
|
227
|
+
responseFormat = llm_planning_js_namespaceObject.planSchema;
|
|
228
|
+
break;
|
|
229
|
+
case external_common_js_namespaceObject.AIActionType.EXTRACT_DATA:
|
|
230
|
+
case external_common_js_namespaceObject.AIActionType.DESCRIBE_ELEMENT:
|
|
231
|
+
responseFormat = {
|
|
232
|
+
type: external_types_js_namespaceObject.AIResponseFormat.JSON
|
|
233
|
+
};
|
|
234
|
+
break;
|
|
235
|
+
case external_common_js_namespaceObject.AIActionType.TEXT:
|
|
236
|
+
responseFormat = void 0;
|
|
237
|
+
break;
|
|
424
238
|
}
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
239
|
+
if ('gpt-4o-2024-05-13' === modelName && AIActionTypeValue !== external_common_js_namespaceObject.AIActionType.TEXT) responseFormat = {
|
|
240
|
+
type: external_types_js_namespaceObject.AIResponseFormat.JSON
|
|
241
|
+
};
|
|
242
|
+
return responseFormat;
|
|
243
|
+
};
|
|
244
|
+
async function callAIWithObjectResponse(messages, AIActionTypeValue, modelConfig) {
|
|
245
|
+
const response = await callAI(messages, AIActionTypeValue, modelConfig);
|
|
246
|
+
(0, utils_namespaceObject.assert)(response, 'empty response');
|
|
247
|
+
const vlMode = modelConfig.vlMode;
|
|
248
|
+
const jsonContent = safeParseJson(response.content, vlMode);
|
|
249
|
+
return {
|
|
250
|
+
content: jsonContent,
|
|
251
|
+
usage: response.usage
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
async function callAIWithStringResponse(msgs, AIActionTypeValue, modelConfig) {
|
|
255
|
+
const { content, usage } = await callAI(msgs, AIActionTypeValue, modelConfig);
|
|
256
|
+
return {
|
|
257
|
+
content,
|
|
258
|
+
usage
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
function extractJSONFromCodeBlock(response) {
|
|
262
|
+
try {
|
|
263
|
+
const jsonMatch = response.match(/^\s*(\{[\s\S]*\})\s*$/);
|
|
264
|
+
if (jsonMatch) return jsonMatch[1];
|
|
265
|
+
const codeBlockMatch = response.match(/```(?:json)?\s*(\{[\s\S]*?\})\s*```/);
|
|
266
|
+
if (codeBlockMatch) return codeBlockMatch[1];
|
|
267
|
+
const jsonLikeMatch = response.match(/\{[\s\S]*\}/);
|
|
268
|
+
if (jsonLikeMatch) return jsonLikeMatch[0];
|
|
269
|
+
} catch {}
|
|
270
|
+
return response;
|
|
271
|
+
}
|
|
272
|
+
function preprocessDoubaoBboxJson(input) {
|
|
273
|
+
if (input.includes('bbox')) while(/\d+\s+\d+/.test(input))input = input.replace(/(\d+)\s+(\d+)/g, '$1,$2');
|
|
274
|
+
return input;
|
|
275
|
+
}
|
|
276
|
+
function safeParseJson(input, vlMode) {
|
|
277
|
+
const cleanJsonString = extractJSONFromCodeBlock(input);
|
|
278
|
+
if (null == cleanJsonString ? void 0 : cleanJsonString.match(/\((\d+),(\d+)\)/)) {
|
|
279
|
+
var _cleanJsonString_match;
|
|
280
|
+
return null == (_cleanJsonString_match = cleanJsonString.match(/\((\d+),(\d+)\)/)) ? void 0 : _cleanJsonString_match.slice(1).map(Number);
|
|
428
281
|
}
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
try {
|
|
439
|
-
return JSON.parse((0, external_jsonrepair_namespaceObject.jsonrepair)(cleanJsonString));
|
|
440
|
-
} catch (e) {}
|
|
441
|
-
if ('doubao-vision' === vlMode || 'vlm-ui-tars' === vlMode) {
|
|
442
|
-
const jsonString = preprocessDoubaoBboxJson(cleanJsonString);
|
|
443
|
-
return JSON.parse((0, external_jsonrepair_namespaceObject.jsonrepair)(jsonString));
|
|
444
|
-
}
|
|
445
|
-
throw Error(`failed to parse json response: ${input}`);
|
|
282
|
+
try {
|
|
283
|
+
return JSON.parse(cleanJsonString);
|
|
284
|
+
} catch {}
|
|
285
|
+
try {
|
|
286
|
+
return JSON.parse((0, external_jsonrepair_namespaceObject.jsonrepair)(cleanJsonString));
|
|
287
|
+
} catch (e) {}
|
|
288
|
+
if ('doubao-vision' === vlMode || 'vlm-ui-tars' === vlMode) {
|
|
289
|
+
const jsonString = preprocessDoubaoBboxJson(cleanJsonString);
|
|
290
|
+
return JSON.parse((0, external_jsonrepair_namespaceObject.jsonrepair)(jsonString));
|
|
446
291
|
}
|
|
447
|
-
})
|
|
292
|
+
throw Error(`failed to parse json response: ${input}`);
|
|
293
|
+
}
|
|
448
294
|
exports.callAI = __webpack_exports__.callAI;
|
|
449
295
|
exports.callAIWithObjectResponse = __webpack_exports__.callAIWithObjectResponse;
|
|
450
296
|
exports.callAIWithStringResponse = __webpack_exports__.callAIWithStringResponse;
|