@midscene/core 0.30.10 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +233 -144
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/execution-session.mjs +41 -0
- package/dist/es/agent/execution-session.mjs.map +1 -0
- package/dist/es/agent/index.mjs +3 -3
- package/dist/es/agent/task-builder.mjs +319 -0
- package/dist/es/agent/task-builder.mjs.map +1 -0
- package/dist/es/agent/task-cache.mjs +4 -4
- package/dist/es/agent/task-cache.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +197 -504
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/ui-utils.mjs +54 -35
- package/dist/es/agent/ui-utils.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +16 -58
- package/dist/es/agent/utils.mjs.map +1 -1
- package/dist/es/ai-model/conversation-history.mjs +25 -13
- package/dist/es/ai-model/conversation-history.mjs.map +1 -1
- package/dist/es/ai-model/index.mjs +4 -4
- package/dist/es/ai-model/inspect.mjs +45 -54
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +47 -65
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/assertion.mjs.map +1 -1
- package/dist/es/ai-model/prompt/common.mjs.map +1 -1
- package/dist/es/ai-model/prompt/describe.mjs.map +1 -1
- package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-locator.mjs +11 -235
- package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +76 -322
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -14
- package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs +2 -2
- package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/util.mjs +3 -88
- package/dist/es/ai-model/prompt/util.mjs.map +1 -1
- package/dist/es/ai-model/prompt/yaml-generator.mjs +10 -10
- package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +182 -274
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs +69 -8
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/{ai-model/common.mjs → common.mjs} +18 -30
- package/dist/es/common.mjs.map +1 -0
- package/dist/es/device/device-options.mjs +0 -0
- package/dist/es/device/index.mjs +29 -12
- package/dist/es/device/index.mjs.map +1 -1
- package/dist/es/index.mjs +5 -4
- package/dist/es/index.mjs.map +1 -1
- package/dist/es/report.mjs.map +1 -1
- package/dist/es/{insight → service}/index.mjs +38 -51
- package/dist/es/service/index.mjs.map +1 -0
- package/dist/es/{insight → service}/utils.mjs +3 -3
- package/dist/es/service/utils.mjs.map +1 -0
- package/dist/es/task-runner.mjs +264 -0
- package/dist/es/task-runner.mjs.map +1 -0
- package/dist/es/tree.mjs +13 -2
- package/dist/es/tree.mjs.map +1 -0
- package/dist/es/types.mjs +18 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +6 -7
- package/dist/es/utils.mjs.map +1 -1
- package/dist/es/yaml/builder.mjs.map +1 -1
- package/dist/es/yaml/player.mjs +121 -98
- package/dist/es/yaml/player.mjs.map +1 -1
- package/dist/es/yaml/utils.mjs +1 -1
- package/dist/es/yaml/utils.mjs.map +1 -1
- package/dist/lib/agent/agent.js +231 -142
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/common.js +1 -1
- package/dist/lib/agent/execution-session.js +75 -0
- package/dist/lib/agent/execution-session.js.map +1 -0
- package/dist/lib/agent/index.js +14 -14
- package/dist/lib/agent/index.js.map +1 -1
- package/dist/lib/agent/task-builder.js +356 -0
- package/dist/lib/agent/task-builder.js.map +1 -0
- package/dist/lib/agent/task-cache.js +8 -8
- package/dist/lib/agent/task-cache.js.map +1 -1
- package/dist/lib/agent/tasks.js +202 -506
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/ui-utils.js +58 -36
- package/dist/lib/agent/ui-utils.js.map +1 -1
- package/dist/lib/agent/utils.js +26 -68
- package/dist/lib/agent/utils.js.map +1 -1
- package/dist/lib/ai-model/conversation-history.js +27 -15
- package/dist/lib/ai-model/conversation-history.js.map +1 -1
- package/dist/lib/ai-model/index.js +27 -27
- package/dist/lib/ai-model/index.js.map +1 -1
- package/dist/lib/ai-model/inspect.js +51 -57
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +49 -67
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/assertion.js +2 -2
- package/dist/lib/ai-model/prompt/assertion.js.map +1 -1
- package/dist/lib/ai-model/prompt/common.js +2 -2
- package/dist/lib/ai-model/prompt/common.js.map +1 -1
- package/dist/lib/ai-model/prompt/describe.js +2 -2
- package/dist/lib/ai-model/prompt/describe.js.map +1 -1
- package/dist/lib/ai-model/prompt/extraction.js +2 -2
- package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-locator.js +14 -241
- package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +79 -328
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-section-locator.js +17 -16
- package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js +11 -11
- package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
- package/dist/lib/ai-model/prompt/ui-tars-locator.js +2 -2
- package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/ui-tars-planning.js +2 -2
- package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/util.js +7 -95
- package/dist/lib/ai-model/prompt/util.js.map +1 -1
- package/dist/lib/ai-model/prompt/yaml-generator.js +18 -18
- package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +288 -401
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js +71 -10
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/{ai-model/common.js → common.js} +40 -55
- package/dist/lib/common.js.map +1 -0
- package/dist/lib/device/device-options.js +20 -0
- package/dist/lib/device/device-options.js.map +1 -0
- package/dist/lib/device/index.js +63 -40
- package/dist/lib/device/index.js.map +1 -1
- package/dist/lib/image/index.js +5 -5
- package/dist/lib/image/index.js.map +1 -1
- package/dist/lib/index.js +24 -20
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/report.js +2 -2
- package/dist/lib/report.js.map +1 -1
- package/dist/lib/{insight → service}/index.js +41 -54
- package/dist/lib/service/index.js.map +1 -0
- package/dist/lib/{insight → service}/utils.js +7 -7
- package/dist/lib/service/utils.js.map +1 -0
- package/dist/lib/task-runner.js +301 -0
- package/dist/lib/task-runner.js.map +1 -0
- package/dist/lib/tree.js +13 -4
- package/dist/lib/tree.js.map +1 -1
- package/dist/lib/types.js +31 -12
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +16 -17
- package/dist/lib/utils.js.map +1 -1
- package/dist/lib/yaml/builder.js +2 -2
- package/dist/lib/yaml/builder.js.map +1 -1
- package/dist/lib/yaml/index.js +16 -22
- package/dist/lib/yaml/index.js.map +1 -1
- package/dist/lib/yaml/player.js +123 -100
- package/dist/lib/yaml/player.js.map +1 -1
- package/dist/lib/yaml/utils.js +6 -6
- package/dist/lib/yaml/utils.js.map +1 -1
- package/dist/lib/yaml.js +1 -1
- package/dist/lib/yaml.js.map +1 -1
- package/dist/types/agent/agent.d.ts +62 -17
- package/dist/types/agent/execution-session.d.ts +36 -0
- package/dist/types/agent/index.d.ts +3 -2
- package/dist/types/agent/task-builder.d.ts +35 -0
- package/dist/types/agent/tasks.d.ts +32 -23
- package/dist/types/agent/ui-utils.d.ts +9 -2
- package/dist/types/agent/utils.d.ts +9 -35
- package/dist/types/ai-model/conversation-history.d.ts +8 -4
- package/dist/types/ai-model/index.d.ts +5 -5
- package/dist/types/ai-model/inspect.d.ts +20 -12
- package/dist/types/ai-model/llm-planning.d.ts +3 -1
- package/dist/types/ai-model/prompt/llm-locator.d.ts +1 -6
- package/dist/types/ai-model/prompt/llm-planning.d.ts +2 -3
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +1 -3
- package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
- package/dist/types/ai-model/prompt/util.d.ts +2 -34
- package/dist/types/ai-model/service-caller/index.d.ts +2 -3
- package/dist/types/ai-model/ui-tars-planning.d.ts +15 -2
- package/dist/types/{ai-model/common.d.ts → common.d.ts} +6 -6
- package/dist/types/device/device-options.d.ts +57 -0
- package/dist/types/device/index.d.ts +55 -39
- package/dist/types/index.d.ts +7 -6
- package/dist/types/service/index.d.ts +26 -0
- package/dist/types/service/utils.d.ts +2 -0
- package/dist/types/task-runner.d.ts +49 -0
- package/dist/types/tree.d.ts +4 -1
- package/dist/types/types.d.ts +103 -66
- package/dist/types/yaml/utils.d.ts +1 -1
- package/dist/types/yaml.d.ts +68 -43
- package/package.json +9 -12
- package/dist/es/ai-model/action-executor.mjs +0 -129
- package/dist/es/ai-model/action-executor.mjs.map +0 -1
- package/dist/es/ai-model/common.mjs.map +0 -1
- package/dist/es/insight/index.mjs.map +0 -1
- package/dist/es/insight/utils.mjs.map +0 -1
- package/dist/lib/ai-model/action-executor.js +0 -163
- package/dist/lib/ai-model/action-executor.js.map +0 -1
- package/dist/lib/ai-model/common.js.map +0 -1
- package/dist/lib/insight/index.js.map +0 -1
- package/dist/lib/insight/utils.js.map +0 -1
- package/dist/types/ai-model/action-executor.d.ts +0 -19
- package/dist/types/insight/index.d.ts +0 -31
- package/dist/types/insight/utils.d.ts +0 -2
|
@@ -1,282 +1,207 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { Anthropic } from "@anthropic-ai/sdk";
|
|
3
|
-
import { DefaultAzureCredential, getBearerTokenProvider } from "@azure/identity";
|
|
4
|
-
import { MIDSCENE_API_TYPE, MIDSCENE_LANGSMITH_DEBUG, OPENAI_MAX_TOKENS, globalConfigManager } from "@midscene/shared/env";
|
|
5
|
-
import { parseBase64 } from "@midscene/shared/img";
|
|
1
|
+
import { MIDSCENE_LANGFUSE_DEBUG, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MODEL_MAX_TOKENS, OPENAI_MAX_TOKENS, globalConfigManager } from "@midscene/shared/env";
|
|
6
2
|
import { getDebug } from "@midscene/shared/logger";
|
|
7
3
|
import { assert, ifInBrowser } from "@midscene/shared/utils";
|
|
8
|
-
import { HttpsProxyAgent } from "https-proxy-agent";
|
|
9
4
|
import { jsonrepair } from "jsonrepair";
|
|
10
|
-
import openai_0
|
|
11
|
-
import { SocksProxyAgent } from "socks-proxy-agent";
|
|
12
|
-
import { AIActionType } from "../common.mjs";
|
|
13
|
-
import { assertSchema } from "../prompt/assertion.mjs";
|
|
14
|
-
import { locatorSchema } from "../prompt/llm-locator.mjs";
|
|
15
|
-
import { planSchema } from "../prompt/llm-planning.mjs";
|
|
5
|
+
import openai_0 from "openai";
|
|
16
6
|
async function createChatClient({ AIActionTypeValue, modelConfig }) {
|
|
17
|
-
const { socksProxy, httpProxy, modelName, openaiBaseURL, openaiApiKey, openaiExtraConfig,
|
|
18
|
-
let openai;
|
|
7
|
+
const { socksProxy, httpProxy, modelName, openaiBaseURL, openaiApiKey, openaiExtraConfig, modelDescription, uiTarsModelVersion: uiTarsVersion, vlMode, createOpenAIClient, timeout } = modelConfig;
|
|
19
8
|
let proxyAgent;
|
|
20
9
|
const debugProxy = getDebug('ai:call:proxy');
|
|
10
|
+
const sanitizeProxyUrl = (url)=>{
|
|
11
|
+
try {
|
|
12
|
+
const parsed = new URL(url);
|
|
13
|
+
if (parsed.username) {
|
|
14
|
+
parsed.password = '****';
|
|
15
|
+
return parsed.href;
|
|
16
|
+
}
|
|
17
|
+
return url;
|
|
18
|
+
} catch {
|
|
19
|
+
return url;
|
|
20
|
+
}
|
|
21
|
+
};
|
|
21
22
|
if (httpProxy) {
|
|
22
|
-
debugProxy('using http proxy', httpProxy);
|
|
23
|
-
|
|
23
|
+
debugProxy('using http proxy', sanitizeProxyUrl(httpProxy));
|
|
24
|
+
if (ifInBrowser) console.warn('HTTP proxy is configured but not supported in browser environment');
|
|
25
|
+
else {
|
|
26
|
+
const moduleName = 'undici';
|
|
27
|
+
const { ProxyAgent } = await import(moduleName);
|
|
28
|
+
proxyAgent = new ProxyAgent({
|
|
29
|
+
uri: httpProxy
|
|
30
|
+
});
|
|
31
|
+
}
|
|
24
32
|
} else if (socksProxy) {
|
|
25
|
-
debugProxy('using socks proxy', socksProxy);
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
apiVersion: azureOpenaiApiVersion,
|
|
45
|
-
deployment: azureOpenaiDeployment,
|
|
46
|
-
...openaiExtraConfig,
|
|
47
|
-
...azureExtraConfig
|
|
33
|
+
debugProxy('using socks proxy', sanitizeProxyUrl(socksProxy));
|
|
34
|
+
if (ifInBrowser) console.warn('SOCKS proxy is configured but not supported in browser environment');
|
|
35
|
+
else try {
|
|
36
|
+
const moduleName = 'fetch-socks';
|
|
37
|
+
const { socksDispatcher } = await import(moduleName);
|
|
38
|
+
const proxyUrl = new URL(socksProxy);
|
|
39
|
+
if (!proxyUrl.hostname) throw new Error('SOCKS proxy URL must include a valid hostname');
|
|
40
|
+
const port = Number.parseInt(proxyUrl.port, 10);
|
|
41
|
+
if (!proxyUrl.port || Number.isNaN(port)) throw new Error('SOCKS proxy URL must include a valid port');
|
|
42
|
+
const protocol = proxyUrl.protocol.replace(':', '');
|
|
43
|
+
const socksType = 'socks4' === protocol ? 4 : 'socks5' === protocol ? 5 : 5;
|
|
44
|
+
proxyAgent = socksDispatcher({
|
|
45
|
+
type: socksType,
|
|
46
|
+
host: proxyUrl.hostname,
|
|
47
|
+
port,
|
|
48
|
+
...proxyUrl.username ? {
|
|
49
|
+
userId: decodeURIComponent(proxyUrl.username),
|
|
50
|
+
password: decodeURIComponent(proxyUrl.password || '')
|
|
51
|
+
} : {}
|
|
48
52
|
});
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
}
|
|
58
|
-
}
|
|
53
|
+
debugProxy('socks proxy configured successfully', {
|
|
54
|
+
type: socksType,
|
|
55
|
+
host: proxyUrl.hostname,
|
|
56
|
+
port: port
|
|
57
|
+
});
|
|
58
|
+
} catch (error) {
|
|
59
|
+
console.error('Failed to configure SOCKS proxy:', error);
|
|
60
|
+
throw new Error(`Invalid SOCKS proxy URL: ${socksProxy}. Expected format: socks4://host:port, socks5://host:port, or with authentication: socks5://user:pass@host:port`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
const openAIOptions = {
|
|
59
64
|
baseURL: openaiBaseURL,
|
|
60
65
|
apiKey: openaiApiKey,
|
|
61
|
-
|
|
66
|
+
...proxyAgent ? {
|
|
67
|
+
fetchOptions: {
|
|
68
|
+
dispatcher: proxyAgent
|
|
69
|
+
}
|
|
70
|
+
} : {},
|
|
62
71
|
...openaiExtraConfig,
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
},
|
|
72
|
+
...'number' == typeof timeout ? {
|
|
73
|
+
timeout
|
|
74
|
+
} : {},
|
|
67
75
|
dangerouslyAllowBrowser: true
|
|
68
|
-
}
|
|
76
|
+
};
|
|
77
|
+
const baseOpenAI = new openai_0(openAIOptions);
|
|
78
|
+
let openai = baseOpenAI;
|
|
69
79
|
if (openai && globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)) {
|
|
70
80
|
if (ifInBrowser) throw new Error('langsmith is not supported in browser');
|
|
71
81
|
console.log('DEBUGGING MODE: langsmith wrapper enabled');
|
|
72
|
-
const
|
|
82
|
+
const langsmithModule = 'langsmith/wrappers';
|
|
83
|
+
const { wrapOpenAI } = await import(langsmithModule);
|
|
73
84
|
openai = wrapOpenAI(openai);
|
|
74
85
|
}
|
|
75
|
-
if (
|
|
86
|
+
if (openai && globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGFUSE_DEBUG)) {
|
|
87
|
+
if (ifInBrowser) throw new Error('langfuse is not supported in browser');
|
|
88
|
+
console.log('DEBUGGING MODE: langfuse wrapper enabled');
|
|
89
|
+
const langfuseModule = 'langfuse';
|
|
90
|
+
const { observeOpenAI } = await import(langfuseModule);
|
|
91
|
+
openai = observeOpenAI(openai);
|
|
92
|
+
}
|
|
93
|
+
if (createOpenAIClient) {
|
|
94
|
+
const wrappedClient = await createOpenAIClient(baseOpenAI, openAIOptions);
|
|
95
|
+
if (wrappedClient) openai = wrappedClient;
|
|
96
|
+
}
|
|
97
|
+
return {
|
|
76
98
|
completion: openai.chat.completions,
|
|
77
|
-
style: 'openai',
|
|
78
|
-
modelName,
|
|
79
|
-
modelDescription,
|
|
80
|
-
uiTarsVersion,
|
|
81
|
-
vlMode
|
|
82
|
-
};
|
|
83
|
-
if (useAnthropicSdk) openai = new Anthropic({
|
|
84
|
-
apiKey: anthropicApiKey,
|
|
85
|
-
httpAgent: proxyAgent,
|
|
86
|
-
dangerouslyAllowBrowser: true
|
|
87
|
-
});
|
|
88
|
-
if (void 0 !== openai && openai.messages) return {
|
|
89
|
-
completion: openai.messages,
|
|
90
|
-
style: 'anthropic',
|
|
91
99
|
modelName,
|
|
92
100
|
modelDescription,
|
|
93
101
|
uiTarsVersion,
|
|
94
102
|
vlMode
|
|
95
103
|
};
|
|
96
|
-
throw new Error('Openai SDK or Anthropic SDK is not initialized');
|
|
97
104
|
}
|
|
98
105
|
async function callAI(messages, AIActionTypeValue, modelConfig, options) {
|
|
99
|
-
const { completion,
|
|
106
|
+
const { completion, modelName, modelDescription, uiTarsVersion, vlMode } = await createChatClient({
|
|
100
107
|
AIActionTypeValue,
|
|
101
108
|
modelConfig
|
|
102
109
|
});
|
|
103
|
-
const
|
|
104
|
-
const maxTokens = globalConfigManager.getEnvConfigValue(OPENAI_MAX_TOKENS);
|
|
110
|
+
const maxTokens = globalConfigManager.getEnvConfigValue(MIDSCENE_MODEL_MAX_TOKENS) ?? globalConfigManager.getEnvConfigValue(OPENAI_MAX_TOKENS);
|
|
105
111
|
const debugCall = getDebug('ai:call');
|
|
106
112
|
const debugProfileStats = getDebug('ai:profile:stats');
|
|
107
113
|
const debugProfileDetail = getDebug('ai:profile:detail');
|
|
108
114
|
const startTime = Date.now();
|
|
109
|
-
const isStreaming =
|
|
115
|
+
const isStreaming = options?.stream && options?.onChunk;
|
|
110
116
|
let content;
|
|
111
117
|
let accumulated = '';
|
|
112
118
|
let usage;
|
|
113
119
|
let timeCost;
|
|
120
|
+
const buildUsageInfo = (usageData)=>{
|
|
121
|
+
if (!usageData) return;
|
|
122
|
+
const cachedInputTokens = usageData?.prompt_tokens_details?.cached_tokens;
|
|
123
|
+
return {
|
|
124
|
+
prompt_tokens: usageData.prompt_tokens ?? 0,
|
|
125
|
+
completion_tokens: usageData.completion_tokens ?? 0,
|
|
126
|
+
total_tokens: usageData.total_tokens ?? 0,
|
|
127
|
+
cached_input: cachedInputTokens ?? 0,
|
|
128
|
+
time_cost: timeCost ?? 0,
|
|
129
|
+
model_name: modelName,
|
|
130
|
+
model_description: modelDescription,
|
|
131
|
+
intent: modelConfig.intent
|
|
132
|
+
};
|
|
133
|
+
};
|
|
114
134
|
const commonConfig = {
|
|
115
|
-
temperature: 'vlm-ui-tars' === vlMode ? 0.0 : 0
|
|
135
|
+
temperature: 'vlm-ui-tars' === vlMode ? 0.0 : void 0,
|
|
116
136
|
stream: !!isStreaming,
|
|
117
|
-
max_tokens: 'number' == typeof maxTokens ? maxTokens :
|
|
118
|
-
...'
|
|
137
|
+
max_tokens: 'number' == typeof maxTokens ? maxTokens : void 0,
|
|
138
|
+
...'qwen2.5-vl' === vlMode ? {
|
|
119
139
|
vl_high_resolution_images: true
|
|
120
140
|
} : {}
|
|
121
141
|
};
|
|
122
142
|
try {
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
accumulated,
|
|
145
|
-
isComplete: false,
|
|
146
|
-
usage: void 0
|
|
147
|
-
};
|
|
148
|
-
options.onChunk(chunkData);
|
|
149
|
-
}
|
|
150
|
-
if (null == (_chunk_choices2 = chunk.choices) ? void 0 : null == (_chunk_choices_2 = _chunk_choices2[0]) ? void 0 : _chunk_choices_2.finish_reason) {
|
|
151
|
-
timeCost = Date.now() - startTime;
|
|
152
|
-
if (!usage) {
|
|
153
|
-
const estimatedTokens = Math.max(1, Math.floor(accumulated.length / 4));
|
|
154
|
-
usage = {
|
|
155
|
-
prompt_tokens: estimatedTokens,
|
|
156
|
-
completion_tokens: estimatedTokens,
|
|
157
|
-
total_tokens: 2 * estimatedTokens
|
|
158
|
-
};
|
|
159
|
-
}
|
|
160
|
-
const finalChunk = {
|
|
161
|
-
content: '',
|
|
162
|
-
accumulated,
|
|
163
|
-
reasoning_content: '',
|
|
164
|
-
isComplete: true,
|
|
165
|
-
usage: {
|
|
166
|
-
prompt_tokens: usage.prompt_tokens ?? 0,
|
|
167
|
-
completion_tokens: usage.completion_tokens ?? 0,
|
|
168
|
-
total_tokens: usage.total_tokens ?? 0,
|
|
169
|
-
time_cost: timeCost ?? 0,
|
|
170
|
-
model_name: modelName,
|
|
171
|
-
model_description: modelDescription,
|
|
172
|
-
intent: modelConfig.intent
|
|
173
|
-
}
|
|
174
|
-
};
|
|
175
|
-
options.onChunk(finalChunk);
|
|
176
|
-
break;
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
content = accumulated;
|
|
180
|
-
debugProfileStats(`streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}`);
|
|
181
|
-
} else {
|
|
182
|
-
var _result_usage, _result_usage1, _result_usage2;
|
|
183
|
-
const result = await completion.create({
|
|
184
|
-
model: modelName,
|
|
185
|
-
messages,
|
|
186
|
-
response_format: responseFormat,
|
|
187
|
-
...commonConfig
|
|
188
|
-
});
|
|
189
|
-
timeCost = Date.now() - startTime;
|
|
190
|
-
debugProfileStats(`model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${(null == (_result_usage = result.usage) ? void 0 : _result_usage.prompt_tokens) || ''}, completion-tokens, ${(null == (_result_usage1 = result.usage) ? void 0 : _result_usage1.completion_tokens) || ''}, total-tokens, ${(null == (_result_usage2 = result.usage) ? void 0 : _result_usage2.total_tokens) || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`);
|
|
191
|
-
debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);
|
|
192
|
-
assert(result.choices, `invalid response from LLM service: ${JSON.stringify(result)}`);
|
|
193
|
-
content = result.choices[0].message.content;
|
|
194
|
-
usage = result.usage;
|
|
195
|
-
}
|
|
196
|
-
debugCall(`response: ${content}`);
|
|
197
|
-
assert(content, 'empty content');
|
|
198
|
-
} else if ('anthropic' === style) {
|
|
199
|
-
const convertImageContent = (content)=>{
|
|
200
|
-
if ('image_url' === content.type) {
|
|
201
|
-
const imgBase64 = content.image_url.url;
|
|
202
|
-
assert(imgBase64, 'image_url is required');
|
|
203
|
-
const { mimeType, body } = parseBase64(content.image_url.url);
|
|
204
|
-
return {
|
|
205
|
-
source: {
|
|
206
|
-
type: 'base64',
|
|
207
|
-
media_type: mimeType,
|
|
208
|
-
data: body
|
|
209
|
-
},
|
|
210
|
-
type: 'image'
|
|
143
|
+
debugCall(`sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`);
|
|
144
|
+
if (isStreaming) {
|
|
145
|
+
const stream = await completion.create({
|
|
146
|
+
model: modelName,
|
|
147
|
+
messages,
|
|
148
|
+
...commonConfig
|
|
149
|
+
}, {
|
|
150
|
+
stream: true
|
|
151
|
+
});
|
|
152
|
+
for await (const chunk of stream){
|
|
153
|
+
const content = chunk.choices?.[0]?.delta?.content || '';
|
|
154
|
+
const reasoning_content = chunk.choices?.[0]?.delta?.reasoning_content || '';
|
|
155
|
+
if (chunk.usage) usage = chunk.usage;
|
|
156
|
+
if (content || reasoning_content) {
|
|
157
|
+
accumulated += content;
|
|
158
|
+
const chunkData = {
|
|
159
|
+
content,
|
|
160
|
+
reasoning_content,
|
|
161
|
+
accumulated,
|
|
162
|
+
isComplete: false,
|
|
163
|
+
usage: void 0
|
|
211
164
|
};
|
|
165
|
+
options.onChunk(chunkData);
|
|
212
166
|
}
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
content: Array.isArray(m.content) ? m.content.map(convertImageContent) : m.content
|
|
222
|
-
})),
|
|
223
|
-
response_format: responseFormat,
|
|
224
|
-
...commonConfig
|
|
225
|
-
});
|
|
226
|
-
for await (const chunk of stream){
|
|
227
|
-
var _chunk_delta;
|
|
228
|
-
const content = (null == (_chunk_delta = chunk.delta) ? void 0 : _chunk_delta.text) || '';
|
|
229
|
-
if (content) {
|
|
230
|
-
accumulated += content;
|
|
231
|
-
const chunkData = {
|
|
232
|
-
content,
|
|
233
|
-
accumulated,
|
|
234
|
-
reasoning_content: '',
|
|
235
|
-
isComplete: false,
|
|
236
|
-
usage: void 0
|
|
237
|
-
};
|
|
238
|
-
options.onChunk(chunkData);
|
|
239
|
-
}
|
|
240
|
-
if ('message_stop' === chunk.type) {
|
|
241
|
-
timeCost = Date.now() - startTime;
|
|
242
|
-
const anthropicUsage = chunk.usage;
|
|
243
|
-
const finalChunk = {
|
|
244
|
-
content: '',
|
|
245
|
-
accumulated,
|
|
246
|
-
reasoning_content: '',
|
|
247
|
-
isComplete: true,
|
|
248
|
-
usage: anthropicUsage ? {
|
|
249
|
-
prompt_tokens: anthropicUsage.input_tokens ?? 0,
|
|
250
|
-
completion_tokens: anthropicUsage.output_tokens ?? 0,
|
|
251
|
-
total_tokens: (anthropicUsage.input_tokens ?? 0) + (anthropicUsage.output_tokens ?? 0),
|
|
252
|
-
time_cost: timeCost ?? 0,
|
|
253
|
-
model_name: modelName,
|
|
254
|
-
model_description: modelDescription,
|
|
255
|
-
intent: modelConfig.intent
|
|
256
|
-
} : void 0
|
|
167
|
+
if (chunk.choices?.[0]?.finish_reason) {
|
|
168
|
+
timeCost = Date.now() - startTime;
|
|
169
|
+
if (!usage) {
|
|
170
|
+
const estimatedTokens = Math.max(1, Math.floor(accumulated.length / 4));
|
|
171
|
+
usage = {
|
|
172
|
+
prompt_tokens: estimatedTokens,
|
|
173
|
+
completion_tokens: estimatedTokens,
|
|
174
|
+
total_tokens: 2 * estimatedTokens
|
|
257
175
|
};
|
|
258
|
-
options.onChunk(finalChunk);
|
|
259
|
-
break;
|
|
260
176
|
}
|
|
177
|
+
const finalChunk = {
|
|
178
|
+
content: '',
|
|
179
|
+
accumulated,
|
|
180
|
+
reasoning_content: '',
|
|
181
|
+
isComplete: true,
|
|
182
|
+
usage: buildUsageInfo(usage)
|
|
183
|
+
};
|
|
184
|
+
options.onChunk(finalChunk);
|
|
185
|
+
break;
|
|
261
186
|
}
|
|
262
|
-
content = accumulated;
|
|
263
|
-
} else {
|
|
264
|
-
const result = await completion.create({
|
|
265
|
-
model: modelName,
|
|
266
|
-
system: 'You are a versatile professional in software UI automation',
|
|
267
|
-
messages: messages.map((m)=>({
|
|
268
|
-
role: 'user',
|
|
269
|
-
content: Array.isArray(m.content) ? m.content.map(convertImageContent) : m.content
|
|
270
|
-
})),
|
|
271
|
-
response_format: responseFormat,
|
|
272
|
-
...commonConfig
|
|
273
|
-
});
|
|
274
|
-
timeCost = Date.now() - startTime;
|
|
275
|
-
content = result.content[0].text;
|
|
276
|
-
usage = result.usage;
|
|
277
187
|
}
|
|
278
|
-
|
|
188
|
+
content = accumulated;
|
|
189
|
+
debugProfileStats(`streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}`);
|
|
190
|
+
} else {
|
|
191
|
+
const result = await completion.create({
|
|
192
|
+
model: modelName,
|
|
193
|
+
messages,
|
|
194
|
+
...commonConfig
|
|
195
|
+
});
|
|
196
|
+
timeCost = Date.now() - startTime;
|
|
197
|
+
debugProfileStats(`model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`);
|
|
198
|
+
debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);
|
|
199
|
+
assert(result.choices, `invalid response from LLM service: ${JSON.stringify(result)}`);
|
|
200
|
+
content = result.choices[0].message.content;
|
|
201
|
+
usage = result.usage;
|
|
279
202
|
}
|
|
203
|
+
debugCall(`response: ${content}`);
|
|
204
|
+
assert(content, 'empty content');
|
|
280
205
|
if (isStreaming && !usage) {
|
|
281
206
|
const estimatedTokens = Math.max(1, Math.floor((content || '').length / 4));
|
|
282
207
|
usage = {
|
|
@@ -287,52 +212,17 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
|
|
|
287
212
|
}
|
|
288
213
|
return {
|
|
289
214
|
content: content || '',
|
|
290
|
-
usage: usage
|
|
291
|
-
prompt_tokens: usage.prompt_tokens ?? 0,
|
|
292
|
-
completion_tokens: usage.completion_tokens ?? 0,
|
|
293
|
-
total_tokens: usage.total_tokens ?? 0,
|
|
294
|
-
time_cost: timeCost ?? 0,
|
|
295
|
-
model_name: modelName,
|
|
296
|
-
model_description: modelDescription,
|
|
297
|
-
intent: modelConfig.intent
|
|
298
|
-
} : void 0,
|
|
215
|
+
usage: buildUsageInfo(usage),
|
|
299
216
|
isStreamed: !!isStreaming
|
|
300
217
|
};
|
|
301
218
|
} catch (e) {
|
|
302
219
|
console.error(' call AI error', e);
|
|
303
|
-
const newError = new Error(`failed to call ${isStreaming ? 'streaming ' : ''}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`, {
|
|
220
|
+
const newError = new Error(`failed to call ${isStreaming ? 'streaming ' : ''}AI model service (${modelName}): ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`, {
|
|
304
221
|
cause: e
|
|
305
222
|
});
|
|
306
223
|
throw newError;
|
|
307
224
|
}
|
|
308
225
|
}
|
|
309
|
-
const getResponseFormat = (modelName, AIActionTypeValue)=>{
|
|
310
|
-
let responseFormat;
|
|
311
|
-
if (modelName.includes('gpt-4')) switch(AIActionTypeValue){
|
|
312
|
-
case AIActionType.ASSERT:
|
|
313
|
-
responseFormat = assertSchema;
|
|
314
|
-
break;
|
|
315
|
-
case AIActionType.INSPECT_ELEMENT:
|
|
316
|
-
responseFormat = locatorSchema;
|
|
317
|
-
break;
|
|
318
|
-
case AIActionType.PLAN:
|
|
319
|
-
responseFormat = planSchema;
|
|
320
|
-
break;
|
|
321
|
-
case AIActionType.EXTRACT_DATA:
|
|
322
|
-
case AIActionType.DESCRIBE_ELEMENT:
|
|
323
|
-
responseFormat = {
|
|
324
|
-
type: AIResponseFormat.JSON
|
|
325
|
-
};
|
|
326
|
-
break;
|
|
327
|
-
case AIActionType.TEXT:
|
|
328
|
-
responseFormat = void 0;
|
|
329
|
-
break;
|
|
330
|
-
}
|
|
331
|
-
if ('gpt-4o-2024-05-13' === modelName && AIActionTypeValue !== AIActionType.TEXT) responseFormat = {
|
|
332
|
-
type: AIResponseFormat.JSON
|
|
333
|
-
};
|
|
334
|
-
return responseFormat;
|
|
335
|
-
};
|
|
336
226
|
async function callAIWithObjectResponse(messages, AIActionTypeValue, modelConfig) {
|
|
337
227
|
const response = await callAI(messages, AIActionTypeValue, modelConfig);
|
|
338
228
|
assert(response, 'empty response');
|
|
@@ -340,6 +230,7 @@ async function callAIWithObjectResponse(messages, AIActionTypeValue, modelConfig
|
|
|
340
230
|
const jsonContent = safeParseJson(response.content, vlMode);
|
|
341
231
|
return {
|
|
342
232
|
content: jsonContent,
|
|
233
|
+
contentString: response.content,
|
|
343
234
|
usage: response.usage
|
|
344
235
|
};
|
|
345
236
|
}
|
|
@@ -365,24 +256,41 @@ function preprocessDoubaoBboxJson(input) {
|
|
|
365
256
|
if (input.includes('bbox')) while(/\d+\s+\d+/.test(input))input = input.replace(/(\d+)\s+(\d+)/g, '$1,$2');
|
|
366
257
|
return input;
|
|
367
258
|
}
|
|
259
|
+
function normalizeJsonObject(obj) {
|
|
260
|
+
if (null == obj) return obj;
|
|
261
|
+
if (Array.isArray(obj)) return obj.map((item)=>normalizeJsonObject(item));
|
|
262
|
+
if ('object' == typeof obj) {
|
|
263
|
+
const normalized = {};
|
|
264
|
+
for (const [key, value] of Object.entries(obj)){
|
|
265
|
+
const trimmedKey = key.trim();
|
|
266
|
+
let normalizedValue = normalizeJsonObject(value);
|
|
267
|
+
if ('string' == typeof normalizedValue) normalizedValue = normalizedValue.trim();
|
|
268
|
+
normalized[trimmedKey] = normalizedValue;
|
|
269
|
+
}
|
|
270
|
+
return normalized;
|
|
271
|
+
}
|
|
272
|
+
if ('string' == typeof obj) return obj.trim();
|
|
273
|
+
return obj;
|
|
274
|
+
}
|
|
368
275
|
function safeParseJson(input, vlMode) {
|
|
369
276
|
const cleanJsonString = extractJSONFromCodeBlock(input);
|
|
370
|
-
if (
|
|
371
|
-
|
|
372
|
-
return null == (_cleanJsonString_match = cleanJsonString.match(/\((\d+),(\d+)\)/)) ? void 0 : _cleanJsonString_match.slice(1).map(Number);
|
|
373
|
-
}
|
|
277
|
+
if (cleanJsonString?.match(/\((\d+),(\d+)\)/)) return cleanJsonString.match(/\((\d+),(\d+)\)/)?.slice(1).map(Number);
|
|
278
|
+
let parsed;
|
|
374
279
|
try {
|
|
375
|
-
|
|
280
|
+
parsed = JSON.parse(cleanJsonString);
|
|
281
|
+
return normalizeJsonObject(parsed);
|
|
376
282
|
} catch {}
|
|
377
283
|
try {
|
|
378
|
-
|
|
284
|
+
parsed = JSON.parse(jsonrepair(cleanJsonString));
|
|
285
|
+
return normalizeJsonObject(parsed);
|
|
379
286
|
} catch (e) {}
|
|
380
287
|
if ('doubao-vision' === vlMode || 'vlm-ui-tars' === vlMode) {
|
|
381
288
|
const jsonString = preprocessDoubaoBboxJson(cleanJsonString);
|
|
382
|
-
|
|
289
|
+
parsed = JSON.parse(jsonrepair(jsonString));
|
|
290
|
+
return normalizeJsonObject(parsed);
|
|
383
291
|
}
|
|
384
292
|
throw Error(`failed to parse json response: ${input}`);
|
|
385
293
|
}
|
|
386
|
-
export { callAI, callAIWithObjectResponse, callAIWithStringResponse, extractJSONFromCodeBlock,
|
|
294
|
+
export { callAI, callAIWithObjectResponse, callAIWithStringResponse, extractJSONFromCodeBlock, preprocessDoubaoBboxJson, safeParseJson };
|
|
387
295
|
|
|
388
296
|
//# sourceMappingURL=index.mjs.map
|