@midscene/core 0.30.10 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +233 -144
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/execution-session.mjs +41 -0
- package/dist/es/agent/execution-session.mjs.map +1 -0
- package/dist/es/agent/index.mjs +3 -3
- package/dist/es/agent/task-builder.mjs +319 -0
- package/dist/es/agent/task-builder.mjs.map +1 -0
- package/dist/es/agent/task-cache.mjs +4 -4
- package/dist/es/agent/task-cache.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +197 -504
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/ui-utils.mjs +54 -35
- package/dist/es/agent/ui-utils.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +16 -58
- package/dist/es/agent/utils.mjs.map +1 -1
- package/dist/es/ai-model/conversation-history.mjs +25 -13
- package/dist/es/ai-model/conversation-history.mjs.map +1 -1
- package/dist/es/ai-model/index.mjs +4 -4
- package/dist/es/ai-model/inspect.mjs +45 -54
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +47 -65
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/assertion.mjs.map +1 -1
- package/dist/es/ai-model/prompt/common.mjs.map +1 -1
- package/dist/es/ai-model/prompt/describe.mjs.map +1 -1
- package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-locator.mjs +11 -235
- package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +76 -322
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -14
- package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs +2 -2
- package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/util.mjs +3 -88
- package/dist/es/ai-model/prompt/util.mjs.map +1 -1
- package/dist/es/ai-model/prompt/yaml-generator.mjs +10 -10
- package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +182 -274
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs +69 -8
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/{ai-model/common.mjs → common.mjs} +18 -30
- package/dist/es/common.mjs.map +1 -0
- package/dist/es/device/device-options.mjs +0 -0
- package/dist/es/device/index.mjs +29 -12
- package/dist/es/device/index.mjs.map +1 -1
- package/dist/es/index.mjs +5 -4
- package/dist/es/index.mjs.map +1 -1
- package/dist/es/report.mjs.map +1 -1
- package/dist/es/{insight → service}/index.mjs +38 -51
- package/dist/es/service/index.mjs.map +1 -0
- package/dist/es/{insight → service}/utils.mjs +3 -3
- package/dist/es/service/utils.mjs.map +1 -0
- package/dist/es/task-runner.mjs +264 -0
- package/dist/es/task-runner.mjs.map +1 -0
- package/dist/es/tree.mjs +13 -2
- package/dist/es/tree.mjs.map +1 -0
- package/dist/es/types.mjs +18 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +6 -7
- package/dist/es/utils.mjs.map +1 -1
- package/dist/es/yaml/builder.mjs.map +1 -1
- package/dist/es/yaml/player.mjs +121 -98
- package/dist/es/yaml/player.mjs.map +1 -1
- package/dist/es/yaml/utils.mjs +1 -1
- package/dist/es/yaml/utils.mjs.map +1 -1
- package/dist/lib/agent/agent.js +231 -142
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/common.js +1 -1
- package/dist/lib/agent/execution-session.js +75 -0
- package/dist/lib/agent/execution-session.js.map +1 -0
- package/dist/lib/agent/index.js +14 -14
- package/dist/lib/agent/index.js.map +1 -1
- package/dist/lib/agent/task-builder.js +356 -0
- package/dist/lib/agent/task-builder.js.map +1 -0
- package/dist/lib/agent/task-cache.js +8 -8
- package/dist/lib/agent/task-cache.js.map +1 -1
- package/dist/lib/agent/tasks.js +202 -506
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/ui-utils.js +58 -36
- package/dist/lib/agent/ui-utils.js.map +1 -1
- package/dist/lib/agent/utils.js +26 -68
- package/dist/lib/agent/utils.js.map +1 -1
- package/dist/lib/ai-model/conversation-history.js +27 -15
- package/dist/lib/ai-model/conversation-history.js.map +1 -1
- package/dist/lib/ai-model/index.js +27 -27
- package/dist/lib/ai-model/index.js.map +1 -1
- package/dist/lib/ai-model/inspect.js +51 -57
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +49 -67
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/assertion.js +2 -2
- package/dist/lib/ai-model/prompt/assertion.js.map +1 -1
- package/dist/lib/ai-model/prompt/common.js +2 -2
- package/dist/lib/ai-model/prompt/common.js.map +1 -1
- package/dist/lib/ai-model/prompt/describe.js +2 -2
- package/dist/lib/ai-model/prompt/describe.js.map +1 -1
- package/dist/lib/ai-model/prompt/extraction.js +2 -2
- package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-locator.js +14 -241
- package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +79 -328
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-section-locator.js +17 -16
- package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js +11 -11
- package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
- package/dist/lib/ai-model/prompt/ui-tars-locator.js +2 -2
- package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/ui-tars-planning.js +2 -2
- package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/util.js +7 -95
- package/dist/lib/ai-model/prompt/util.js.map +1 -1
- package/dist/lib/ai-model/prompt/yaml-generator.js +18 -18
- package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +288 -401
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js +71 -10
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/{ai-model/common.js → common.js} +40 -55
- package/dist/lib/common.js.map +1 -0
- package/dist/lib/device/device-options.js +20 -0
- package/dist/lib/device/device-options.js.map +1 -0
- package/dist/lib/device/index.js +63 -40
- package/dist/lib/device/index.js.map +1 -1
- package/dist/lib/image/index.js +5 -5
- package/dist/lib/image/index.js.map +1 -1
- package/dist/lib/index.js +24 -20
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/report.js +2 -2
- package/dist/lib/report.js.map +1 -1
- package/dist/lib/{insight → service}/index.js +41 -54
- package/dist/lib/service/index.js.map +1 -0
- package/dist/lib/{insight → service}/utils.js +7 -7
- package/dist/lib/service/utils.js.map +1 -0
- package/dist/lib/task-runner.js +301 -0
- package/dist/lib/task-runner.js.map +1 -0
- package/dist/lib/tree.js +13 -4
- package/dist/lib/tree.js.map +1 -1
- package/dist/lib/types.js +31 -12
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +16 -17
- package/dist/lib/utils.js.map +1 -1
- package/dist/lib/yaml/builder.js +2 -2
- package/dist/lib/yaml/builder.js.map +1 -1
- package/dist/lib/yaml/index.js +16 -22
- package/dist/lib/yaml/index.js.map +1 -1
- package/dist/lib/yaml/player.js +123 -100
- package/dist/lib/yaml/player.js.map +1 -1
- package/dist/lib/yaml/utils.js +6 -6
- package/dist/lib/yaml/utils.js.map +1 -1
- package/dist/lib/yaml.js +1 -1
- package/dist/lib/yaml.js.map +1 -1
- package/dist/types/agent/agent.d.ts +62 -17
- package/dist/types/agent/execution-session.d.ts +36 -0
- package/dist/types/agent/index.d.ts +3 -2
- package/dist/types/agent/task-builder.d.ts +35 -0
- package/dist/types/agent/tasks.d.ts +32 -23
- package/dist/types/agent/ui-utils.d.ts +9 -2
- package/dist/types/agent/utils.d.ts +9 -35
- package/dist/types/ai-model/conversation-history.d.ts +8 -4
- package/dist/types/ai-model/index.d.ts +5 -5
- package/dist/types/ai-model/inspect.d.ts +20 -12
- package/dist/types/ai-model/llm-planning.d.ts +3 -1
- package/dist/types/ai-model/prompt/llm-locator.d.ts +1 -6
- package/dist/types/ai-model/prompt/llm-planning.d.ts +2 -3
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +1 -3
- package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
- package/dist/types/ai-model/prompt/util.d.ts +2 -34
- package/dist/types/ai-model/service-caller/index.d.ts +2 -3
- package/dist/types/ai-model/ui-tars-planning.d.ts +15 -2
- package/dist/types/{ai-model/common.d.ts → common.d.ts} +6 -6
- package/dist/types/device/device-options.d.ts +57 -0
- package/dist/types/device/index.d.ts +55 -39
- package/dist/types/index.d.ts +7 -6
- package/dist/types/service/index.d.ts +26 -0
- package/dist/types/service/utils.d.ts +2 -0
- package/dist/types/task-runner.d.ts +49 -0
- package/dist/types/tree.d.ts +4 -1
- package/dist/types/types.d.ts +103 -66
- package/dist/types/yaml/utils.d.ts +1 -1
- package/dist/types/yaml.d.ts +68 -43
- package/package.json +9 -12
- package/dist/es/ai-model/action-executor.mjs +0 -129
- package/dist/es/ai-model/action-executor.mjs.map +0 -1
- package/dist/es/ai-model/common.mjs.map +0 -1
- package/dist/es/insight/index.mjs.map +0 -1
- package/dist/es/insight/utils.mjs.map +0 -1
- package/dist/lib/ai-model/action-executor.js +0 -163
- package/dist/lib/ai-model/action-executor.js.map +0 -1
- package/dist/lib/ai-model/common.js.map +0 -1
- package/dist/lib/insight/index.js.map +0 -1
- package/dist/lib/insight/utils.js.map +0 -1
- package/dist/types/ai-model/action-executor.d.ts +0 -19
- package/dist/types/insight/index.d.ts +0 -31
- package/dist/types/insight/utils.d.ts +0 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/service-caller/index.mjs","sources":["webpack://@midscene/core/./src/ai-model/service-caller/index.ts"],"sourcesContent":["import { AIResponseFormat, type AIUsageInfo } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport { Anthropic } from '@anthropic-ai/sdk';\nimport {\n DefaultAzureCredential,\n getBearerTokenProvider,\n} from '@azure/identity';\nimport {\n type IModelConfig,\n MIDSCENE_API_TYPE,\n MIDSCENE_LANGSMITH_DEBUG,\n OPENAI_MAX_TOKENS,\n type TVlModeTypes,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { parseBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ifInBrowser } from '@midscene/shared/utils';\nimport { HttpsProxyAgent } from 'https-proxy-agent';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI, { AzureOpenAI } from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport { SocksProxyAgent } from 'socks-proxy-agent';\nimport { AIActionType, type AIArgs } from '../common';\nimport { assertSchema } from '../prompt/assertion';\nimport { locatorSchema } from '../prompt/llm-locator';\nimport { planSchema } from '../prompt/llm-planning';\n\nasync function createChatClient({\n AIActionTypeValue,\n modelConfig,\n}: {\n AIActionTypeValue: AIActionType;\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n style: 'openai' | 'anthropic';\n modelName: string;\n modelDescription: string;\n uiTarsVersion?: UITarsModelVersion;\n vlMode: TVlModeTypes | undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n openaiUseAzureDeprecated,\n useAzureOpenai,\n azureOpenaiScope,\n azureOpenaiKey,\n azureOpenaiEndpoint,\n azureOpenaiApiVersion,\n azureOpenaiDeployment,\n azureExtraConfig,\n useAnthropicSdk,\n anthropicApiKey,\n modelDescription,\n uiTarsModelVersion: uiTarsVersion,\n vlMode,\n } = modelConfig;\n\n let openai: OpenAI | AzureOpenAI | undefined;\n\n let proxyAgent = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n if (httpProxy) {\n debugProxy('using http proxy', httpProxy);\n proxyAgent = new HttpsProxyAgent(httpProxy);\n } else if (socksProxy) {\n debugProxy('using socks proxy', socksProxy);\n proxyAgent = new SocksProxyAgent(socksProxy);\n }\n\n if (openaiUseAzureDeprecated) {\n // this is deprecated\n openai = new AzureOpenAI({\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n httpAgent: proxyAgent,\n ...openaiExtraConfig,\n dangerouslyAllowBrowser: true,\n }) as OpenAI;\n } else if (useAzureOpenai) {\n // https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart?tabs=bash%2Cjavascript-key%2Ctypescript-keyless%2Cpython&pivots=programming-language-javascript#rest-api\n // keyless authentication\n let tokenProvider: any = undefined;\n if (azureOpenaiScope) {\n assert(\n !ifInBrowser,\n 'Azure OpenAI is not supported in browser with Midscene.',\n );\n const credential = new DefaultAzureCredential();\n\n tokenProvider = getBearerTokenProvider(credential, azureOpenaiScope);\n\n openai = new AzureOpenAI({\n azureADTokenProvider: tokenProvider,\n endpoint: azureOpenaiEndpoint,\n apiVersion: azureOpenaiApiVersion,\n deployment: azureOpenaiDeployment,\n ...openaiExtraConfig,\n ...azureExtraConfig,\n });\n } else {\n // endpoint, apiKey, apiVersion, deployment\n openai = new AzureOpenAI({\n apiKey: azureOpenaiKey,\n endpoint: azureOpenaiEndpoint,\n apiVersion: azureOpenaiApiVersion,\n deployment: azureOpenaiDeployment,\n dangerouslyAllowBrowser: true,\n ...openaiExtraConfig,\n ...azureExtraConfig,\n });\n }\n } else if (!useAnthropicSdk) {\n openai = new OpenAI({\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n httpAgent: proxyAgent,\n ...openaiExtraConfig,\n defaultHeaders: {\n ...(openaiExtraConfig?.defaultHeaders || {}),\n [MIDSCENE_API_TYPE]: AIActionTypeValue.toString(),\n },\n dangerouslyAllowBrowser: true,\n });\n }\n\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n const { wrapOpenAI } = await import('langsmith/wrappers');\n openai = wrapOpenAI(openai);\n }\n\n if (typeof openai !== 'undefined') {\n return {\n completion: openai.chat.completions,\n style: 'openai',\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n };\n }\n\n // Anthropic\n if (useAnthropicSdk) {\n openai = new Anthropic({\n apiKey: anthropicApiKey,\n httpAgent: proxyAgent,\n dangerouslyAllowBrowser: true,\n }) as any;\n }\n\n if (typeof openai !== 'undefined' && (openai as any).messages) {\n return {\n completion: (openai as any).messages,\n style: 'anthropic',\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n };\n }\n\n throw new Error('Openai SDK or Anthropic SDK is not initialized');\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n },\n): Promise<{ content: string; usage?: AIUsageInfo; isStreamed: boolean }> {\n const {\n completion,\n style,\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n } = await createChatClient({\n AIActionTypeValue,\n modelConfig,\n });\n\n const responseFormat = getResponseFormat(modelName, AIActionTypeValue);\n\n const maxTokens = globalConfigManager.getEnvConfigValue(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n\n const commonConfig = {\n temperature: vlMode === 'vlm-ui-tars' ? 0.0 : 0.1,\n stream: !!isStreaming,\n max_tokens:\n typeof maxTokens === 'number'\n ? maxTokens\n : Number.parseInt(maxTokens || '2048', 10),\n ...(vlMode === 'qwen-vl' || vlMode === 'qwen3-vl' // qwen specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n try {\n if (style === 'openai') {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model: modelName,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: {\n prompt_tokens: usage.prompt_tokens ?? 0,\n completion_tokens: usage.completion_tokens ?? 0,\n total_tokens: usage.total_tokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n },\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}`,\n );\n } else {\n const result = await completion.create({\n model: modelName,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`,\n );\n\n debugProfileDetail(\n `model usage detail: ${JSON.stringify(result.usage)}`,\n );\n\n assert(\n result.choices,\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n content = result.choices[0].message.content!;\n usage = result.usage;\n }\n\n debugCall(`response: ${content}`);\n assert(content, 'empty content');\n } else if (style === 'anthropic') {\n const convertImageContent = (content: any) => {\n if (content.type === 'image_url') {\n const imgBase64 = content.image_url.url;\n assert(imgBase64, 'image_url is required');\n const { mimeType, body } = parseBase64(content.image_url.url);\n return {\n source: {\n type: 'base64',\n media_type: mimeType,\n data: body,\n },\n type: 'image',\n };\n }\n return content;\n };\n\n if (isStreaming) {\n const stream = (await completion.create({\n model: modelName,\n system: 'You are a versatile professional in software UI automation',\n messages: messages.map((m) => ({\n role: 'user',\n content: Array.isArray(m.content)\n ? (m.content as any).map(convertImageContent)\n : m.content,\n })),\n response_format: responseFormat,\n ...commonConfig,\n } as any)) as any;\n\n for await (const chunk of stream) {\n const content = chunk.delta?.text || '';\n if (content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n accumulated,\n reasoning_content: '',\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.type === 'message_stop') {\n timeCost = Date.now() - startTime;\n const anthropicUsage = chunk.usage;\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: anthropicUsage\n ? {\n prompt_tokens: anthropicUsage.input_tokens ?? 0,\n completion_tokens: anthropicUsage.output_tokens ?? 0,\n total_tokens:\n (anthropicUsage.input_tokens ?? 0) +\n (anthropicUsage.output_tokens ?? 0),\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n }\n : undefined,\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n } else {\n const result = await completion.create({\n model: modelName,\n system: 'You are a versatile professional in software UI automation',\n messages: messages.map((m) => ({\n role: 'user',\n content: Array.isArray(m.content)\n ? (m.content as any).map(convertImageContent)\n : m.content,\n })),\n response_format: responseFormat,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n content = (result as any).content[0].text as string;\n usage = result.usage;\n }\n\n assert(content, 'empty content');\n }\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n return {\n content: content || '',\n usage: usage\n ? {\n prompt_tokens: usage.prompt_tokens ?? 0,\n completion_tokens: usage.completion_tokens ?? 0,\n total_tokens: usage.total_tokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n }\n : undefined,\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport const getResponseFormat = (\n modelName: string,\n AIActionTypeValue: AIActionType,\n):\n | OpenAI.ChatCompletionCreateParams['response_format']\n | OpenAI.ResponseFormatJSONObject => {\n let responseFormat:\n | OpenAI.ChatCompletionCreateParams['response_format']\n | OpenAI.ResponseFormatJSONObject\n | undefined;\n\n if (modelName.includes('gpt-4')) {\n switch (AIActionTypeValue) {\n case AIActionType.ASSERT:\n responseFormat = assertSchema;\n break;\n case AIActionType.INSPECT_ELEMENT:\n responseFormat = locatorSchema;\n break;\n case AIActionType.PLAN:\n responseFormat = planSchema;\n break;\n case AIActionType.EXTRACT_DATA:\n case AIActionType.DESCRIBE_ELEMENT:\n responseFormat = { type: AIResponseFormat.JSON };\n break;\n case AIActionType.TEXT:\n // No response format for plain text - return as-is\n responseFormat = undefined;\n break;\n }\n }\n\n // gpt-4o-2024-05-13 only supports json_object response format\n // Skip for plain text to allow string output\n if (\n modelName === 'gpt-4o-2024-05-13' &&\n AIActionTypeValue !== AIActionType.TEXT\n ) {\n responseFormat = { type: AIResponseFormat.JSON };\n }\n\n return responseFormat;\n};\n\nexport async function callAIWithObjectResponse<T>(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n): Promise<{ content: T; usage?: AIUsageInfo }> {\n const response = await callAI(messages, AIActionTypeValue, modelConfig);\n assert(response, 'empty response');\n const vlMode = modelConfig.vlMode;\n const jsonContent = safeParseJson(response.content, vlMode);\n return { content: jsonContent, usage: response.usage };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, AIActionTypeValue, modelConfig);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nexport function safeParseJson(input: string, vlMode: TVlModeTypes | undefined) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n try {\n return JSON.parse(cleanJsonString);\n } catch {}\n try {\n return JSON.parse(jsonrepair(cleanJsonString));\n } catch (e) {}\n\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n return JSON.parse(jsonrepair(jsonString));\n }\n throw Error(`failed to parse json response: ${input}`);\n}\n"],"names":["createChatClient","AIActionTypeValue","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","openaiUseAzureDeprecated","useAzureOpenai","azureOpenaiScope","azureOpenaiKey","azureOpenaiEndpoint","azureOpenaiApiVersion","azureOpenaiDeployment","azureExtraConfig","useAnthropicSdk","anthropicApiKey","modelDescription","uiTarsVersion","vlMode","openai","proxyAgent","debugProxy","getDebug","HttpsProxyAgent","SocksProxyAgent","AzureOpenAI","tokenProvider","assert","ifInBrowser","credential","DefaultAzureCredential","getBearerTokenProvider","OpenAI","MIDSCENE_API_TYPE","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","Error","console","wrapOpenAI","Anthropic","callAI","messages","options","completion","style","responseFormat","getResponseFormat","maxTokens","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","isStreaming","content","accumulated","usage","timeCost","commonConfig","Number","stream","chunk","_chunk_choices__delta","_chunk_choices__delta1","_chunk_choices_2","reasoning_content","chunkData","undefined","estimatedTokens","Math","finalChunk","_result_usage","_result_usage1","_result_usage2","result","JSON","convertImageContent","imgBase64","mimeType","body","parseBase64","m","Array","_chunk_delta","anthropicUsage","e","newError","AIActionType","assertSchema","locatorSchema","planSchema","AIResponseFormat","callAIWithObjectResponse","response","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","cleanJsonString","_cleanJsonString_match","jsonrepair","jsonString"],"mappings":";;;;;;;;;;;;;;;AAgCA,eAAeA,iBAAiB,EAC9BC,iBAAiB,EACjBC,WAAW,EAIZ;IAQC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,wBAAwB,EACxBC,cAAc,EACdC,gBAAgB,EAChBC,cAAc,EACdC,mBAAmB,EACnBC,qBAAqB,EACrBC,qBAAqB,EACrBC,gBAAgB,EAChBC,eAAe,EACfC,eAAe,EACfC,gBAAgB,EAChB,oBAAoBC,aAAa,EACjCC,MAAM,EACP,GAAGnB;IAEJ,IAAIoB;IAEJ,IAAIC;IACJ,MAAMC,aAAaC,SAAS;IAC5B,IAAIrB,WAAW;QACboB,WAAW,oBAAoBpB;QAC/BmB,aAAa,IAAIG,gBAAgBtB;IACnC,OAAO,IAAID,YAAY;QACrBqB,WAAW,qBAAqBrB;QAChCoB,aAAa,IAAII,gBAAgBxB;IACnC;IAEA,IAAIM,0BAEFa,SAAS,IAAIM,YAAY;QACvB,SAAStB;QACT,QAAQC;QACR,WAAWgB;QACX,GAAGf,iBAAiB;QACpB,yBAAyB;IAC3B;SACK,IAAIE,gBAAgB;QAGzB,IAAImB;QACJ,IAAIlB,kBAAkB;YACpBmB,OACE,CAACC,aACD;YAEF,MAAMC,aAAa,IAAIC;YAEvBJ,gBAAgBK,uBAAuBF,YAAYrB;YAEnDW,SAAS,IAAIM,YAAY;gBACvB,sBAAsBC;gBACtB,UAAUhB;gBACV,YAAYC;gBACZ,YAAYC;gBACZ,GAAGP,iBAAiB;gBACpB,GAAGQ,gBAAgB;YACrB;QACF,OAEEM,SAAS,IAAIM,YAAY;YACvB,QAAQhB;YACR,UAAUC;YACV,YAAYC;YACZ,YAAYC;YACZ,yBAAyB;YACzB,GAAGP,iBAAiB;YACpB,GAAGQ,gBAAgB;QACrB;IAEJ,OAAO,IAAI,CAACC,iBACVK,SAAS,IAAIa,SAAO;QAClB,SAAS7B;QACT,QAAQC;QACR,WAAWgB;QACX,GAAGf,iBAAiB;QACpB,gBAAgB;YACd,GAAIA,AAAAA,CAAAA,QAAAA,oBAAAA,KAAAA,IAAAA,kBAAmB,cAAc,AAAD,KAAK,CAAC,CAAC;YAC3C,CAAC4B,kBAAkB,EAAEnC,kBAAkB,QAAQ;QACjD;QACA,yBAAyB;IAC3B;IAGF,IACEqB,UACAe,oBAAoB,qBAAqB,CAACC,2BAC1C;QACA,IAAIP,aACF,MAAM,IAAIQ,MAAM;QAElBC,QAAQ,GAAG,CAAC;QACZ,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC;QACpCnB,SAASmB,WAAWnB;IACtB;IAEA,IAAI,AAAkB,WAAXA,QACT,OAAO;QACL,YAAYA,OAAO,IAAI,CAAC,WAAW;QACnC,OAAO;QACPjB;QACAc;QACAC;QACAC;IACF;IAIF,IAAIJ,iBACFK,SAAS,IAAIoB,UAAU;QACrB,QAAQxB;QACR,WAAWK;QACX,yBAAyB;IAC3B;IAGF,IAAI,AAAkB,WAAXD,UAA2BA,OAAe,QAAQ,EAC3D,OAAO;QACL,YAAaA,OAAe,QAAQ;QACpC,OAAO;QACPjB;QACAc;QACAC;QACAC;IACF;IAGF,MAAM,IAAIkB,MAAM;AAClB;AAEO,eAAeI,OACpBC,QAAsC,EACtC3C,iBAA+B,EAC/BC,WAAyB,EACzB2C,OAGC;IAED,MAAM,EACJC,UAAU,EACVC,KAAK,EACL1C,SAAS,EACTc,gBAAgB,EAChBC,aAAa,EACbC,MAAM,EACP,GAAG,MAAMrB,iBAAiB;QACzBC;QACAC;IACF;IAEA,MAAM8C,iBAAiBC,kBAAkB5C,WAAWJ;IAEpD,MAAMiD,YAAYb,oBAAoB,iBAAiB,CAACc;IACxD,MAAMC,YAAY3B,SAAS;IAC3B,MAAM4B,oBAAoB5B,SAAS;IACnC,MAAM6B,qBAAqB7B,SAAS;IAEpC,MAAM8B,YAAYC,KAAK,GAAG;IAE1B,MAAMC,cAAcZ,AAAAA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,MAAM,AAAD,KAAKA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,AAAD;IACtD,IAAIa;IACJ,IAAIC,cAAc;IAClB,IAAIC;IACJ,IAAIC;IAEJ,MAAMC,eAAe;QACnB,aAAazC,AAAW,kBAAXA,SAA2B,MAAM;QAC9C,QAAQ,CAAC,CAACoC;QACV,YACE,AAAqB,YAArB,OAAOP,YACHA,YACAa,OAAO,QAAQ,CAACb,aAAa,QAAQ;QAC3C,GAAI7B,AAAW,cAAXA,UAAwBA,AAAW,eAAXA,SACxB;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAI;QACF,IAAI0B,AAAU,aAAVA,OAAoB;YACtBK,UACE,CAAC,QAAQ,EAAEK,cAAc,eAAe,GAAG,WAAW,EAAEpD,WAAW;YAGrE,IAAIoD,aAAa;gBACf,MAAMO,SAAU,MAAMlB,WAAW,MAAM,CACrC;oBACE,OAAOzC;oBACPuC;oBACA,iBAAiBI;oBACjB,GAAGc,YAAY;gBACjB,GACA;oBACE,QAAQ;gBACV;gBAKF,WAAW,MAAMG,SAASD,OAAQ;wBAChBE,uBAAAA,iBAAAA,gBAEbC,wBAAAA,kBAAAA,iBAoBCC,kBAAAA;oBAtBJ,MAAMV,UAAUQ,AAAAA,SAAAA,CAAAA,iBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,kBAAAA,cAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,QAAAA,CAAAA,wBAAAA,gBAAoB,KAAK,AAAD,IAAxBA,KAAAA,IAAAA,sBAA2B,OAAO,AAAD,KAAK;oBACtD,MAAMG,oBACJ,AAAC,SAAAF,CAAAA,kBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,mBAAAA,eAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,QAAAA,CAAAA,yBAAAA,iBAAoB,KAAK,AAAD,IAAxBA,KAAAA,IAAAA,uBAAmC,iBAAiB,AAAD,KAAK;oBAG3D,IAAIF,MAAM,KAAK,EACbL,QAAQK,MAAM,KAAK;oBAGrB,IAAIP,WAAWW,mBAAmB;wBAChCV,eAAeD;wBACf,MAAMY,YAAiC;4BACrCZ;4BACAW;4BACAV;4BACA,YAAY;4BACZ,OAAOY;wBACT;wBACA1B,QAAQ,OAAO,CAAEyB;oBACnB;oBAGA,IAAI,QAAAF,CAAAA,kBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,mBAAAA,eAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,iBAAoB,aAAa,EAAE;wBACrCP,WAAWL,KAAK,GAAG,KAAKD;wBAGxB,IAAI,CAACK,OAAO;4BAEV,MAAMY,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACd,YAAY,MAAM,GAAG;4BAElCC,QAAQ;gCACN,eAAeY;gCACf,mBAAmBA;gCACnB,cAAcA,AAAkB,IAAlBA;4BAChB;wBACF;wBAGA,MAAME,aAAkC;4BACtC,SAAS;4BACTf;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAO;gCACL,eAAeC,MAAM,aAAa,IAAI;gCACtC,mBAAmBA,MAAM,iBAAiB,IAAI;gCAC9C,cAAcA,MAAM,YAAY,IAAI;gCACpC,WAAWC,YAAY;gCACvB,YAAYxD;gCACZ,mBAAmBc;gCACnB,QAAQjB,YAAY,MAAM;4BAC5B;wBACF;wBACA2C,QAAQ,OAAO,CAAE6B;wBACjB;oBACF;gBACF;gBACAhB,UAAUC;gBACVN,kBACE,CAAC,iBAAiB,EAAEhD,UAAU,QAAQ,EAAEgB,UAAU,UAAU,WAAW,EAAEwC,UAAU;YAEvF,OAAO;oBAUqGc,eAAyDC,gBAAwDC;gBAT3N,MAAMC,SAAS,MAAMhC,WAAW,MAAM,CAAC;oBACrC,OAAOzC;oBACPuC;oBACA,iBAAiBI;oBACjB,GAAGc,YAAY;gBACjB;gBACAD,WAAWL,KAAK,GAAG,KAAKD;gBAExBF,kBACE,CAAC,OAAO,EAAEhD,UAAU,QAAQ,EAAEgB,UAAU,UAAU,mBAAmB,EAAED,cAAc,iBAAiB,EAAEuD,AAAAA,SAAAA,CAAAA,gBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,cAAc,aAAa,AAAD,KAAK,GAAG,qBAAqB,EAAEC,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,eAAc,iBAAiB,AAAD,KAAK,GAAG,gBAAgB,EAAEC,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,eAAc,YAAY,AAAD,KAAK,GAAG,WAAW,EAAEhB,SAAS,aAAa,EAAEiB,OAAO,WAAW,IAAI,IAAI;gBAG3TxB,mBACE,CAAC,oBAAoB,EAAEyB,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;gBAGvDhD,OACEgD,OAAO,OAAO,EACd,CAAC,mCAAmC,EAAEC,KAAK,SAAS,CAACD,SAAS;gBAEhEpB,UAAUoB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;gBAC3ClB,QAAQkB,OAAO,KAAK;YACtB;YAEA1B,UAAU,CAAC,UAAU,EAAEM,SAAS;YAChC5B,OAAO4B,SAAS;QAClB,OAAO,IAAIX,AAAU,gBAAVA,OAAuB;YAChC,MAAMiC,sBAAsB,CAACtB;gBAC3B,IAAIA,AAAiB,gBAAjBA,QAAQ,IAAI,EAAkB;oBAChC,MAAMuB,YAAYvB,QAAQ,SAAS,CAAC,GAAG;oBACvC5B,OAAOmD,WAAW;oBAClB,MAAM,EAAEC,QAAQ,EAAEC,IAAI,EAAE,GAAGC,YAAY1B,QAAQ,SAAS,CAAC,GAAG;oBAC5D,OAAO;wBACL,QAAQ;4BACN,MAAM;4BACN,YAAYwB;4BACZ,MAAMC;wBACR;wBACA,MAAM;oBACR;gBACF;gBACA,OAAOzB;YACT;YAEA,IAAID,aAAa;gBACf,MAAMO,SAAU,MAAMlB,WAAW,MAAM,CAAC;oBACtC,OAAOzC;oBACP,QAAQ;oBACR,UAAUuC,SAAS,GAAG,CAAC,CAACyC,IAAO;4BAC7B,MAAM;4BACN,SAASC,MAAM,OAAO,CAACD,EAAE,OAAO,IAC3BA,EAAE,OAAO,CAAS,GAAG,CAACL,uBACvBK,EAAE,OAAO;wBACf;oBACA,iBAAiBrC;oBACjB,GAAGc,YAAY;gBACjB;gBAEA,WAAW,MAAMG,SAASD,OAAQ;wBAChBuB;oBAAhB,MAAM7B,UAAU6B,AAAAA,SAAAA,CAAAA,eAAAA,MAAM,KAAK,AAAD,IAAVA,KAAAA,IAAAA,aAAa,IAAI,AAAD,KAAK;oBACrC,IAAI7B,SAAS;wBACXC,eAAeD;wBACf,MAAMY,YAAiC;4BACrCZ;4BACAC;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAOY;wBACT;wBACA1B,QAAQ,OAAO,CAAEyB;oBACnB;oBAGA,IAAIL,AAAe,mBAAfA,MAAM,IAAI,EAAqB;wBACjCJ,WAAWL,KAAK,GAAG,KAAKD;wBACxB,MAAMiC,iBAAiBvB,MAAM,KAAK;wBAGlC,MAAMS,aAAkC;4BACtC,SAAS;4BACTf;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAO6B,iBACH;gCACE,eAAeA,eAAe,YAAY,IAAI;gCAC9C,mBAAmBA,eAAe,aAAa,IAAI;gCACnD,cACGA,AAAAA,CAAAA,eAAe,YAAY,IAAI,KAC/BA,CAAAA,eAAe,aAAa,IAAI;gCACnC,WAAW3B,YAAY;gCACvB,YAAYxD;gCACZ,mBAAmBc;gCACnB,QAAQjB,YAAY,MAAM;4BAC5B,IACAqE;wBACN;wBACA1B,QAAQ,OAAO,CAAE6B;wBACjB;oBACF;gBACF;gBACAhB,UAAUC;YACZ,OAAO;gBACL,MAAMmB,SAAS,MAAMhC,WAAW,MAAM,CAAC;oBACrC,OAAOzC;oBACP,QAAQ;oBACR,UAAUuC,SAAS,GAAG,CAAC,CAACyC,IAAO;4BAC7B,MAAM;4BACN,SAASC,MAAM,OAAO,CAACD,EAAE,OAAO,IAC3BA,EAAE,OAAO,CAAS,GAAG,CAACL,uBACvBK,EAAE,OAAO;wBACf;oBACA,iBAAiBrC;oBACjB,GAAGc,YAAY;gBACjB;gBACAD,WAAWL,KAAK,GAAG,KAAKD;gBACxBG,UAAWoB,OAAe,OAAO,CAAC,EAAE,CAAC,IAAI;gBACzClB,QAAQkB,OAAO,KAAK;YACtB;YAEAhD,OAAO4B,SAAS;QAClB;QAEA,IAAID,eAAe,CAACG,OAAO;YAEzB,MAAMY,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEf,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtCE,QAAQ;gBACN,eAAeY;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASd,WAAW;YACpB,OAAOE,QACH;gBACE,eAAeA,MAAM,aAAa,IAAI;gBACtC,mBAAmBA,MAAM,iBAAiB,IAAI;gBAC9C,cAAcA,MAAM,YAAY,IAAI;gBACpC,WAAWC,YAAY;gBACvB,YAAYxD;gBACZ,mBAAmBc;gBACnB,QAAQjB,YAAY,MAAM;YAC5B,IACAqE;YACJ,YAAY,CAAC,CAACd;QAChB;IACF,EAAE,OAAOgC,GAAQ;QACfjD,QAAQ,KAAK,CAAC,kBAAkBiD;QAChC,MAAMC,WAAW,IAAInD,MACnB,CAAC,eAAe,EAAEkB,cAAc,eAAe,GAAG,kBAAkB,EAAEgC,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC/I;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,MAAMzC,oBAAoB,CAC/B5C,WACAJ;IAIA,IAAI+C;IAKJ,IAAI3C,UAAU,QAAQ,CAAC,UACrB,OAAQJ;QACN,KAAK0F,aAAa,MAAM;YACtB3C,iBAAiB4C;YACjB;QACF,KAAKD,aAAa,eAAe;YAC/B3C,iBAAiB6C;YACjB;QACF,KAAKF,aAAa,IAAI;YACpB3C,iBAAiB8C;YACjB;QACF,KAAKH,aAAa,YAAY;QAC9B,KAAKA,aAAa,gBAAgB;YAChC3C,iBAAiB;gBAAE,MAAM+C,iBAAiB,IAAI;YAAC;YAC/C;QACF,KAAKJ,aAAa,IAAI;YAEpB3C,iBAAiBuB;YACjB;IACJ;IAKF,IACElE,AAAc,wBAAdA,aACAJ,sBAAsB0F,aAAa,IAAI,EAEvC3C,iBAAiB;QAAE,MAAM+C,iBAAiB,IAAI;IAAC;IAGjD,OAAO/C;AACT;AAEO,eAAegD,yBACpBpD,QAAsC,EACtC3C,iBAA+B,EAC/BC,WAAyB;IAEzB,MAAM+F,WAAW,MAAMtD,OAAOC,UAAU3C,mBAAmBC;IAC3D4B,OAAOmE,UAAU;IACjB,MAAM5E,SAASnB,YAAY,MAAM;IACjC,MAAMgG,cAAcC,cAAcF,SAAS,OAAO,EAAE5E;IACpD,OAAO;QAAE,SAAS6E;QAAa,OAAOD,SAAS,KAAK;IAAC;AACvD;AAEO,eAAeG,yBACpBC,IAAY,EACZpG,iBAA+B,EAC/BC,WAAyB;IAEzB,MAAM,EAAEwD,OAAO,EAAEE,KAAK,EAAE,GAAG,MAAMjB,OAAO0D,MAAMpG,mBAAmBC;IACjE,OAAO;QAAEwD;QAASE;IAAM;AAC1B;AAEO,SAAS0C,yBAAyBL,QAAgB;IACvD,IAAI;QAEF,MAAMM,YAAYN,SAAS,KAAK,CAAC;QACjC,IAAIM,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBP,SAAS,KAAK,CACnC;QAEF,IAAIO,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBR,SAAS,KAAK,CAAC;QACrC,IAAIQ,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOR;AACT;AAEO,SAASS,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEO,SAASR,cAAcQ,KAAa,EAAEtF,MAAgC;IAC3E,MAAMuF,kBAAkBN,yBAAyBK;IAEjD,IAAIC,QAAAA,kBAAAA,KAAAA,IAAAA,gBAAiB,KAAK,CAAC,oBAAoB;YACtCC;QAAP,OAAO,QAAAA,CAAAA,yBAAAA,gBACJ,KAAK,CAAC,kBAAiB,IADnBA,KAAAA,IAAAA,uBAEH,KAAK,CAAC,GACP,GAAG,CAAC9C;IACT;IACA,IAAI;QACF,OAAOgB,KAAK,KAAK,CAAC6B;IACpB,EAAE,OAAM,CAAC;IACT,IAAI;QACF,OAAO7B,KAAK,KAAK,CAAC+B,WAAWF;IAC/B,EAAE,OAAOnB,GAAG,CAAC;IAEb,IAAIpE,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,QAA0B;QAC1D,MAAM0F,aAAaL,yBAAyBE;QAC5C,OAAO7B,KAAK,KAAK,CAAC+B,WAAWC;IAC/B;IACA,MAAMxE,MAAM,CAAC,+BAA+B,EAAEoE,OAAO;AACvD"}
|
|
1
|
+
{"version":3,"file":"ai-model/service-caller/index.mjs","sources":["../../../../src/ai-model/service-caller/index.ts"],"sourcesContent":["import { AIResponseFormat, type AIUsageInfo } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_LANGFUSE_DEBUG,\n MIDSCENE_LANGSMITH_DEBUG,\n MIDSCENE_MODEL_MAX_TOKENS,\n OPENAI_MAX_TOKENS,\n type TVlModeTypes,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert, ifInBrowser } from '@midscene/shared/utils';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport type { AIActionType, AIArgs } from '../../common';\n\nasync function createChatClient({\n AIActionTypeValue,\n modelConfig,\n}: {\n AIActionTypeValue: AIActionType;\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n modelName: string;\n modelDescription: string;\n uiTarsVersion?: UITarsModelVersion;\n vlMode: TVlModeTypes | undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n modelDescription,\n uiTarsModelVersion: uiTarsVersion,\n vlMode,\n createOpenAIClient,\n timeout,\n } = modelConfig;\n\n let proxyAgent: any = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n\n // Helper function to sanitize proxy URL for logging (remove credentials)\n // Uses URL API instead of regex to avoid ReDoS vulnerabilities\n const sanitizeProxyUrl = (url: string): string => {\n try {\n const parsed = new URL(url);\n if (parsed.username) {\n // Keep username for debugging, hide password for security\n parsed.password = '****';\n return parsed.href;\n }\n return url;\n } catch {\n // If URL parsing fails, return original URL (will be caught later)\n return url;\n }\n };\n\n if (httpProxy) {\n debugProxy('using http proxy', sanitizeProxyUrl(httpProxy));\n if (ifInBrowser) {\n console.warn(\n 'HTTP proxy is configured but not supported in browser environment',\n );\n } else {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'undici';\n const { ProxyAgent } = await import(moduleName);\n proxyAgent = new ProxyAgent({\n uri: httpProxy,\n // Note: authentication is handled via the URI (e.g., http://user:pass@proxy.com:8080)\n });\n }\n } else if (socksProxy) {\n debugProxy('using socks proxy', sanitizeProxyUrl(socksProxy));\n if (ifInBrowser) {\n console.warn(\n 'SOCKS proxy is configured but not supported in browser environment',\n );\n } else {\n try {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'fetch-socks';\n const { socksDispatcher } = await import(moduleName);\n // Parse SOCKS proxy URL (e.g., socks5://127.0.0.1:1080)\n const proxyUrl = new URL(socksProxy);\n\n // Validate hostname\n if (!proxyUrl.hostname) {\n throw new Error('SOCKS proxy URL must include a valid hostname');\n }\n\n // Validate and parse port\n const port = Number.parseInt(proxyUrl.port, 10);\n if (!proxyUrl.port || Number.isNaN(port)) {\n throw new Error('SOCKS proxy URL must include a valid port');\n }\n\n // Parse SOCKS version from protocol\n const protocol = proxyUrl.protocol.replace(':', '');\n const socksType =\n protocol === 'socks4' ? 4 : protocol === 'socks5' ? 5 : 5;\n\n proxyAgent = socksDispatcher({\n type: socksType,\n host: proxyUrl.hostname,\n port,\n ...(proxyUrl.username\n ? {\n userId: decodeURIComponent(proxyUrl.username),\n password: decodeURIComponent(proxyUrl.password || ''),\n }\n : {}),\n });\n debugProxy('socks proxy configured successfully', {\n type: socksType,\n host: proxyUrl.hostname,\n port: port,\n });\n } catch (error) {\n console.error('Failed to configure SOCKS proxy:', error);\n throw new Error(\n `Invalid SOCKS proxy URL: ${socksProxy}. Expected format: socks4://host:port, socks5://host:port, or with authentication: socks5://user:pass@host:port`,\n );\n }\n }\n }\n\n const openAIOptions = {\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n // Use fetchOptions.dispatcher for fetch-based SDK instead of httpAgent\n // Note: Type assertion needed due to undici version mismatch between dependencies\n ...(proxyAgent ? { fetchOptions: { dispatcher: proxyAgent as any } } : {}),\n ...openaiExtraConfig,\n ...(typeof timeout === 'number' ? { timeout } : {}),\n dangerouslyAllowBrowser: true,\n };\n\n const baseOpenAI = new OpenAI(openAIOptions);\n\n let openai: OpenAI = baseOpenAI;\n\n // LangSmith wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langsmithModule = 'langsmith/wrappers';\n const { wrapOpenAI } = await import(langsmithModule);\n openai = wrapOpenAI(openai);\n }\n\n // Langfuse wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGFUSE_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langfuse is not supported in browser');\n }\n console.log('DEBUGGING MODE: langfuse wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langfuseModule = 'langfuse';\n const { observeOpenAI } = await import(langfuseModule);\n openai = observeOpenAI(openai);\n }\n\n if (createOpenAIClient) {\n const wrappedClient = await createOpenAIClient(baseOpenAI, openAIOptions);\n\n if (wrappedClient) {\n openai = wrappedClient as OpenAI;\n }\n }\n\n return {\n completion: openai.chat.completions,\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n };\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n },\n): Promise<{ content: string; usage?: AIUsageInfo; isStreamed: boolean }> {\n const { completion, modelName, modelDescription, uiTarsVersion, vlMode } =\n await createChatClient({\n AIActionTypeValue,\n modelConfig,\n });\n\n const maxTokens =\n globalConfigManager.getEnvConfigValue(MIDSCENE_MODEL_MAX_TOKENS) ??\n globalConfigManager.getEnvConfigValue(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n\n const buildUsageInfo = (usageData?: OpenAI.CompletionUsage) => {\n if (!usageData) return undefined;\n\n const cachedInputTokens = (\n usageData as { prompt_tokens_details?: { cached_tokens?: number } }\n )?.prompt_tokens_details?.cached_tokens;\n\n return {\n prompt_tokens: usageData.prompt_tokens ?? 0,\n completion_tokens: usageData.completion_tokens ?? 0,\n total_tokens: usageData.total_tokens ?? 0,\n cached_input: cachedInputTokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n } satisfies AIUsageInfo;\n };\n\n const commonConfig = {\n temperature: vlMode === 'vlm-ui-tars' ? 0.0 : undefined,\n stream: !!isStreaming,\n max_tokens: typeof maxTokens === 'number' ? maxTokens : undefined,\n ...(vlMode === 'qwen2.5-vl' // qwen vl v2 specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n try {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model: modelName,\n messages,\n ...commonConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: buildUsageInfo(usage),\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}`,\n );\n } else {\n const result = await completion.create({\n model: modelName,\n messages,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`,\n );\n\n debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);\n\n assert(\n result.choices,\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n content = result.choices[0].message.content!;\n usage = result.usage;\n }\n\n debugCall(`response: ${content}`);\n assert(content, 'empty content');\n\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n } as OpenAI.CompletionUsage;\n }\n\n return {\n content: content || '',\n usage: buildUsageInfo(usage),\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service (${modelName}): ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport async function callAIWithObjectResponse<T>(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n): Promise<{ content: T; contentString: string; usage?: AIUsageInfo }> {\n const response = await callAI(messages, AIActionTypeValue, modelConfig);\n assert(response, 'empty response');\n const vlMode = modelConfig.vlMode;\n const jsonContent = safeParseJson(response.content, vlMode);\n return {\n content: jsonContent,\n contentString: response.content,\n usage: response.usage,\n };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, AIActionTypeValue, modelConfig);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\n/**\n * Normalize a parsed JSON object by trimming whitespace from:\n * 1. All object keys (e.g., \" prompt \" -> \"prompt\")\n * 2. All string values (e.g., \" Tap \" -> \"Tap\")\n * This handles LLM output that may include leading/trailing spaces.\n */\nfunction normalizeJsonObject(obj: any): any {\n // Handle null and undefined\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n // Handle arrays - recursively normalize each element\n if (Array.isArray(obj)) {\n return obj.map((item) => normalizeJsonObject(item));\n }\n\n // Handle objects\n if (typeof obj === 'object') {\n const normalized: any = {};\n\n for (const [key, value] of Object.entries(obj)) {\n // Trim the key to remove leading/trailing spaces\n const trimmedKey = key.trim();\n\n // Recursively normalize the value\n let normalizedValue = normalizeJsonObject(value);\n\n // Trim all string values\n if (typeof normalizedValue === 'string') {\n normalizedValue = normalizedValue.trim();\n }\n\n normalized[trimmedKey] = normalizedValue;\n }\n\n return normalized;\n }\n\n // Handle primitive strings\n if (typeof obj === 'string') {\n return obj.trim();\n }\n\n // Return other primitives as-is\n return obj;\n}\n\nexport function safeParseJson(input: string, vlMode: TVlModeTypes | undefined) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n\n let parsed: any;\n try {\n parsed = JSON.parse(cleanJsonString);\n return normalizeJsonObject(parsed);\n } catch {}\n try {\n parsed = JSON.parse(jsonrepair(cleanJsonString));\n return normalizeJsonObject(parsed);\n } catch (e) {}\n\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n parsed = JSON.parse(jsonrepair(jsonString));\n return normalizeJsonObject(parsed);\n }\n throw Error(`failed to parse json response: ${input}`);\n}\n"],"names":["createChatClient","AIActionTypeValue","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","modelDescription","uiTarsVersion","vlMode","createOpenAIClient","timeout","proxyAgent","debugProxy","getDebug","sanitizeProxyUrl","url","parsed","URL","ifInBrowser","console","moduleName","ProxyAgent","socksDispatcher","proxyUrl","Error","port","Number","protocol","socksType","decodeURIComponent","error","openAIOptions","baseOpenAI","OpenAI","openai","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","langsmithModule","wrapOpenAI","MIDSCENE_LANGFUSE_DEBUG","langfuseModule","observeOpenAI","wrappedClient","callAI","messages","options","completion","maxTokens","MIDSCENE_MODEL_MAX_TOKENS","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","isStreaming","content","accumulated","usage","timeCost","buildUsageInfo","usageData","cachedInputTokens","commonConfig","undefined","stream","chunk","reasoning_content","chunkData","estimatedTokens","Math","finalChunk","result","JSON","assert","e","newError","callAIWithObjectResponse","response","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","normalizeJsonObject","obj","Array","item","normalized","key","value","Object","trimmedKey","normalizedValue","cleanJsonString","jsonrepair","jsonString"],"mappings":";;;;;AAqBA,eAAeA,iBAAiB,EAC9BC,iBAAiB,EACjBC,WAAW,EAIZ;IAOC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,gBAAgB,EAChB,oBAAoBC,aAAa,EACjCC,MAAM,EACNC,kBAAkB,EAClBC,OAAO,EACR,GAAGX;IAEJ,IAAIY;IACJ,MAAMC,aAAaC,SAAS;IAI5B,MAAMC,mBAAmB,CAACC;QACxB,IAAI;YACF,MAAMC,SAAS,IAAIC,IAAIF;YACvB,IAAIC,OAAO,QAAQ,EAAE;gBAEnBA,OAAO,QAAQ,GAAG;gBAClB,OAAOA,OAAO,IAAI;YACpB;YACA,OAAOD;QACT,EAAE,OAAM;YAEN,OAAOA;QACT;IACF;IAEA,IAAId,WAAW;QACbW,WAAW,oBAAoBE,iBAAiBb;QAChD,IAAIiB,aACFC,QAAQ,IAAI,CACV;aAEG;YAEL,MAAMC,aAAa;YACnB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;YACpCT,aAAa,IAAIU,WAAW;gBAC1B,KAAKpB;YAEP;QACF;IACF,OAAO,IAAID,YAAY;QACrBY,WAAW,qBAAqBE,iBAAiBd;QACjD,IAAIkB,aACFC,QAAQ,IAAI,CACV;aAGF,IAAI;YAEF,MAAMC,aAAa;YACnB,MAAM,EAAEE,eAAe,EAAE,GAAG,MAAM,MAAM,CAACF;YAEzC,MAAMG,WAAW,IAAIN,IAAIjB;YAGzB,IAAI,CAACuB,SAAS,QAAQ,EACpB,MAAM,IAAIC,MAAM;YAIlB,MAAMC,OAAOC,OAAO,QAAQ,CAACH,SAAS,IAAI,EAAE;YAC5C,IAAI,CAACA,SAAS,IAAI,IAAIG,OAAO,KAAK,CAACD,OACjC,MAAM,IAAID,MAAM;YAIlB,MAAMG,WAAWJ,SAAS,QAAQ,CAAC,OAAO,CAAC,KAAK;YAChD,MAAMK,YACJD,AAAa,aAAbA,WAAwB,IAAIA,AAAa,aAAbA,WAAwB,IAAI;YAE1DhB,aAAaW,gBAAgB;gBAC3B,MAAMM;gBACN,MAAML,SAAS,QAAQ;gBACvBE;gBACA,GAAIF,SAAS,QAAQ,GACjB;oBACE,QAAQM,mBAAmBN,SAAS,QAAQ;oBAC5C,UAAUM,mBAAmBN,SAAS,QAAQ,IAAI;gBACpD,IACA,CAAC,CAAC;YACR;YACAX,WAAW,uCAAuC;gBAChD,MAAMgB;gBACN,MAAML,SAAS,QAAQ;gBACvB,MAAME;YACR;QACF,EAAE,OAAOK,OAAO;YACdX,QAAQ,KAAK,CAAC,oCAAoCW;YAClD,MAAM,IAAIN,MACR,CAAC,yBAAyB,EAAExB,WAAW,+GAA+G,CAAC;QAE3J;IAEJ;IAEA,MAAM+B,gBAAgB;QACpB,SAAS5B;QACT,QAAQC;QAGR,GAAIO,aAAa;YAAE,cAAc;gBAAE,YAAYA;YAAkB;QAAE,IAAI,CAAC,CAAC;QACzE,GAAGN,iBAAiB;QACpB,GAAI,AAAmB,YAAnB,OAAOK,UAAuB;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAClD,yBAAyB;IAC3B;IAEA,MAAMsB,aAAa,IAAIC,SAAOF;IAE9B,IAAIG,SAAiBF;IAGrB,IACEE,UACAC,oBAAoB,qBAAqB,CAACC,2BAC1C;QACA,IAAIlB,aACF,MAAM,IAAIM,MAAM;QAElBL,QAAQ,GAAG,CAAC;QAEZ,MAAMkB,kBAAkB;QACxB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;QACpCH,SAASI,WAAWJ;IACtB;IAGA,IACEA,UACAC,oBAAoB,qBAAqB,CAACI,0BAC1C;QACA,IAAIrB,aACF,MAAM,IAAIM,MAAM;QAElBL,QAAQ,GAAG,CAAC;QAEZ,MAAMqB,iBAAiB;QACvB,MAAM,EAAEC,aAAa,EAAE,GAAG,MAAM,MAAM,CAACD;QACvCN,SAASO,cAAcP;IACzB;IAEA,IAAIzB,oBAAoB;QACtB,MAAMiC,gBAAgB,MAAMjC,mBAAmBuB,YAAYD;QAE3D,IAAIW,eACFR,SAASQ;IAEb;IAEA,OAAO;QACL,YAAYR,OAAO,IAAI,CAAC,WAAW;QACnChC;QACAI;QACAC;QACAC;IACF;AACF;AAEO,eAAemC,OACpBC,QAAsC,EACtC9C,iBAA+B,EAC/BC,WAAyB,EACzB8C,OAGC;IAED,MAAM,EAAEC,UAAU,EAAE5C,SAAS,EAAEI,gBAAgB,EAAEC,aAAa,EAAEC,MAAM,EAAE,GACtE,MAAMX,iBAAiB;QACrBC;QACAC;IACF;IAEF,MAAMgD,YACJZ,oBAAoB,iBAAiB,CAACa,8BACtCb,oBAAoB,iBAAiB,CAACc;IACxC,MAAMC,YAAYrC,SAAS;IAC3B,MAAMsC,oBAAoBtC,SAAS;IACnC,MAAMuC,qBAAqBvC,SAAS;IAEpC,MAAMwC,YAAYC,KAAK,GAAG;IAE1B,MAAMC,cAAcV,SAAS,UAAUA,SAAS;IAChD,IAAIW;IACJ,IAAIC,cAAc;IAClB,IAAIC;IACJ,IAAIC;IAEJ,MAAMC,iBAAiB,CAACC;QACtB,IAAI,CAACA,WAAW;QAEhB,MAAMC,oBACJD,WACC,uBAAuB;QAE1B,OAAO;YACL,eAAeA,UAAU,aAAa,IAAI;YAC1C,mBAAmBA,UAAU,iBAAiB,IAAI;YAClD,cAAcA,UAAU,YAAY,IAAI;YACxC,cAAcC,qBAAqB;YACnC,WAAWH,YAAY;YACvB,YAAYzD;YACZ,mBAAmBI;YACnB,QAAQP,YAAY,MAAM;QAC5B;IACF;IAEA,MAAMgE,eAAe;QACnB,aAAavD,AAAW,kBAAXA,SAA2B,MAAMwD;QAC9C,QAAQ,CAAC,CAACT;QACV,YAAY,AAAqB,YAArB,OAAOR,YAAyBA,YAAYiB;QACxD,GAAIxD,AAAW,iBAAXA,SACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAI;QACF0C,UACE,CAAC,QAAQ,EAAEK,cAAc,eAAe,GAAG,WAAW,EAAErD,WAAW;QAGrE,IAAIqD,aAAa;YACf,MAAMU,SAAU,MAAMnB,WAAW,MAAM,CACrC;gBACE,OAAO5C;gBACP0C;gBACA,GAAGmB,YAAY;YACjB,GACA;gBACE,QAAQ;YACV;YAKF,WAAW,MAAMG,SAASD,OAAQ;gBAChC,MAAMT,UAAUU,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAO,WAAW;gBACtD,MAAMC,oBACHD,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAe,qBAAqB;gBAG3D,IAAIA,MAAM,KAAK,EACbR,QAAQQ,MAAM,KAAK;gBAGrB,IAAIV,WAAWW,mBAAmB;oBAChCV,eAAeD;oBACf,MAAMY,YAAiC;wBACrCZ;wBACAW;wBACAV;wBACA,YAAY;wBACZ,OAAOO;oBACT;oBACAnB,QAAQ,OAAO,CAAEuB;gBACnB;gBAGA,IAAIF,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,eAAe;oBACrCP,WAAWL,KAAK,GAAG,KAAKD;oBAGxB,IAAI,CAACK,OAAO;wBAEV,MAAMW,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACb,YAAY,MAAM,GAAG;wBAElCC,QAAQ;4BACN,eAAeW;4BACf,mBAAmBA;4BACnB,cAAcA,AAAkB,IAAlBA;wBAChB;oBACF;oBAGA,MAAME,aAAkC;wBACtC,SAAS;wBACTd;wBACA,mBAAmB;wBACnB,YAAY;wBACZ,OAAOG,eAAeF;oBACxB;oBACAb,QAAQ,OAAO,CAAE0B;oBACjB;gBACF;YACF;YACAf,UAAUC;YACVN,kBACE,CAAC,iBAAiB,EAAEjD,UAAU,QAAQ,EAAEM,UAAU,UAAU,WAAW,EAAEmD,UAAU;QAEvF,OAAO;YACL,MAAMa,SAAS,MAAM1B,WAAW,MAAM,CAAC;gBACrC,OAAO5C;gBACP0C;gBACA,GAAGmB,YAAY;YACjB;YACAJ,WAAWL,KAAK,GAAG,KAAKD;YAExBF,kBACE,CAAC,OAAO,EAAEjD,UAAU,QAAQ,EAAEM,UAAU,UAAU,mBAAmB,EAAED,cAAc,iBAAiB,EAAEiE,OAAO,KAAK,EAAE,iBAAiB,GAAG,qBAAqB,EAAEA,OAAO,KAAK,EAAE,qBAAqB,GAAG,gBAAgB,EAAEA,OAAO,KAAK,EAAE,gBAAgB,GAAG,WAAW,EAAEb,SAAS,aAAa,EAAEa,OAAO,WAAW,IAAI,IAAI;YAG3TpB,mBAAmB,CAAC,oBAAoB,EAAEqB,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;YAExEE,OACEF,OAAO,OAAO,EACd,CAAC,mCAAmC,EAAEC,KAAK,SAAS,CAACD,SAAS;YAEhEhB,UAAUgB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;YAC3Cd,QAAQc,OAAO,KAAK;QACtB;QAEAtB,UAAU,CAAC,UAAU,EAAEM,SAAS;QAChCkB,OAAOlB,SAAS;QAGhB,IAAID,eAAe,CAACG,OAAO;YAEzB,MAAMW,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEd,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtCE,QAAQ;gBACN,eAAeW;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASb,WAAW;YACpB,OAAOI,eAAeF;YACtB,YAAY,CAAC,CAACH;QAChB;IACF,EAAE,OAAOoB,GAAQ;QACfxD,QAAQ,KAAK,CAAC,kBAAkBwD;QAChC,MAAMC,WAAW,IAAIpD,MACnB,CAAC,eAAe,EAAE+B,cAAc,eAAe,GAAG,kBAAkB,EAAErD,UAAU,GAAG,EAAEyE,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC9J;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,eAAeC,yBACpBjC,QAAsC,EACtC9C,iBAA+B,EAC/BC,WAAyB;IAEzB,MAAM+E,WAAW,MAAMnC,OAAOC,UAAU9C,mBAAmBC;IAC3D2E,OAAOI,UAAU;IACjB,MAAMtE,SAAST,YAAY,MAAM;IACjC,MAAMgF,cAAcC,cAAcF,SAAS,OAAO,EAAEtE;IACpD,OAAO;QACL,SAASuE;QACT,eAAeD,SAAS,OAAO;QAC/B,OAAOA,SAAS,KAAK;IACvB;AACF;AAEO,eAAeG,yBACpBC,IAAY,EACZpF,iBAA+B,EAC/BC,WAAyB;IAEzB,MAAM,EAAEyD,OAAO,EAAEE,KAAK,EAAE,GAAG,MAAMf,OAAOuC,MAAMpF,mBAAmBC;IACjE,OAAO;QAAEyD;QAASE;IAAM;AAC1B;AAEO,SAASyB,yBAAyBL,QAAgB;IACvD,IAAI;QAEF,MAAMM,YAAYN,SAAS,KAAK,CAAC;QACjC,IAAIM,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBP,SAAS,KAAK,CACnC;QAEF,IAAIO,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBR,SAAS,KAAK,CAAC;QACrC,IAAIQ,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOR;AACT;AAEO,SAASS,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAQA,SAASC,oBAAoBC,GAAQ;IAEnC,IAAIA,QAAAA,KACF,OAAOA;IAIT,IAAIC,MAAM,OAAO,CAACD,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACE,OAASH,oBAAoBG;IAI/C,IAAI,AAAe,YAAf,OAAOF,KAAkB;QAC3B,MAAMG,aAAkB,CAAC;QAEzB,KAAK,MAAM,CAACC,KAAKC,MAAM,IAAIC,OAAO,OAAO,CAACN,KAAM;YAE9C,MAAMO,aAAaH,IAAI,IAAI;YAG3B,IAAII,kBAAkBT,oBAAoBM;YAG1C,IAAI,AAA2B,YAA3B,OAAOG,iBACTA,kBAAkBA,gBAAgB,IAAI;YAGxCL,UAAU,CAACI,WAAW,GAAGC;QAC3B;QAEA,OAAOL;IACT;IAGA,IAAI,AAAe,YAAf,OAAOH,KACT,OAAOA,IAAI,IAAI;IAIjB,OAAOA;AACT;AAEO,SAASV,cAAcQ,KAAa,EAAEhF,MAAgC;IAC3E,MAAM2F,kBAAkBhB,yBAAyBK;IAEjD,IAAIW,iBAAiB,MAAM,oBACzB,OAAOA,gBACJ,KAAK,CAAC,oBACL,MAAM,GACP,IAAIzE;IAGT,IAAIV;IACJ,IAAI;QACFA,SAASyD,KAAK,KAAK,CAAC0B;QACpB,OAAOV,oBAAoBzE;IAC7B,EAAE,OAAM,CAAC;IACT,IAAI;QACFA,SAASyD,KAAK,KAAK,CAAC2B,WAAWD;QAC/B,OAAOV,oBAAoBzE;IAC7B,EAAE,OAAO2D,GAAG,CAAC;IAEb,IAAInE,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,QAA0B;QAC1D,MAAM6F,aAAad,yBAAyBY;QAC5CnF,SAASyD,KAAK,KAAK,CAAC2B,WAAWC;QAC/B,OAAOZ,oBAAoBzE;IAC7B;IACA,MAAMQ,MAAM,CAAC,+BAA+B,EAAEgE,OAAO;AACvD"}
|
|
@@ -4,7 +4,7 @@ import { getDebug } from "@midscene/shared/logger";
|
|
|
4
4
|
import { transformHotkeyInput } from "@midscene/shared/us-keyboard-layout";
|
|
5
5
|
import { assert } from "@midscene/shared/utils";
|
|
6
6
|
import { actionParser } from "@ui-tars/action-parser";
|
|
7
|
-
import { AIActionType } from "
|
|
7
|
+
import { AIActionType } from "../common.mjs";
|
|
8
8
|
import { getSummary, getUiTarsPlanningPrompt } from "./prompt/ui-tars-planning.mjs";
|
|
9
9
|
import { callAIWithStringResponse } from "./service-caller/index.mjs";
|
|
10
10
|
const debug = getDebug('ui-tars-planning');
|
|
@@ -16,9 +16,11 @@ const pointToBbox = (point, width, height)=>[
|
|
|
16
16
|
Math.round(Math.min(point.y + bboxSize / 2, height))
|
|
17
17
|
];
|
|
18
18
|
async function uiTarsPlanning(userInstruction, options) {
|
|
19
|
-
const { conversationHistory, context, modelConfig } = options;
|
|
19
|
+
const { conversationHistory, context, modelConfig, actionContext } = options;
|
|
20
20
|
const { uiTarsModelVersion } = modelConfig;
|
|
21
|
-
|
|
21
|
+
let instruction = userInstruction;
|
|
22
|
+
if (actionContext) instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\n<user_instruction>${userInstruction}</user_instruction>`;
|
|
23
|
+
const systemPrompt = getUiTarsPlanningPrompt() + instruction;
|
|
22
24
|
const imagePayload = await resizeImageForUiTars(context.screenshotBase64, context.size, uiTarsModelVersion);
|
|
23
25
|
conversationHistory.append({
|
|
24
26
|
role: 'user',
|
|
@@ -54,6 +56,7 @@ async function uiTarsPlanning(userInstruction, options) {
|
|
|
54
56
|
});
|
|
55
57
|
debug('ui-tars modelVer', uiTarsModelVersion, ', parsed', JSON.stringify(parsed));
|
|
56
58
|
const transformActions = [];
|
|
59
|
+
const unhandledActions = [];
|
|
57
60
|
let shouldContinue = true;
|
|
58
61
|
parsed.forEach((action)=>{
|
|
59
62
|
const actionType = (action.action_type || '').toLowerCase();
|
|
@@ -72,6 +75,38 @@ async function uiTarsPlanning(userInstruction, options) {
|
|
|
72
75
|
}
|
|
73
76
|
}
|
|
74
77
|
});
|
|
78
|
+
} else if ('left_double' === actionType) {
|
|
79
|
+
assert(action.action_inputs.start_box, 'start_box is required');
|
|
80
|
+
const point = getPoint(action.action_inputs.start_box, size);
|
|
81
|
+
transformActions.push({
|
|
82
|
+
type: 'DoubleClick',
|
|
83
|
+
param: {
|
|
84
|
+
locate: {
|
|
85
|
+
prompt: action.thought || '',
|
|
86
|
+
bbox: pointToBbox({
|
|
87
|
+
x: point[0],
|
|
88
|
+
y: point[1]
|
|
89
|
+
}, size.width, size.height)
|
|
90
|
+
}
|
|
91
|
+
},
|
|
92
|
+
thought: action.thought || ''
|
|
93
|
+
});
|
|
94
|
+
} else if ('right_single' === actionType) {
|
|
95
|
+
assert(action.action_inputs.start_box, 'start_box is required');
|
|
96
|
+
const point = getPoint(action.action_inputs.start_box, size);
|
|
97
|
+
transformActions.push({
|
|
98
|
+
type: 'RightClick',
|
|
99
|
+
param: {
|
|
100
|
+
locate: {
|
|
101
|
+
prompt: action.thought || '',
|
|
102
|
+
bbox: pointToBbox({
|
|
103
|
+
x: point[0],
|
|
104
|
+
y: point[1]
|
|
105
|
+
}, size.width, size.height)
|
|
106
|
+
}
|
|
107
|
+
},
|
|
108
|
+
thought: action.thought || ''
|
|
109
|
+
});
|
|
75
110
|
} else if ('drag' === actionType) {
|
|
76
111
|
assert(action.action_inputs.start_box, 'start_box is required');
|
|
77
112
|
assert(action.action_inputs.end_box, 'end_box is required');
|
|
@@ -135,13 +170,39 @@ async function uiTarsPlanning(userInstruction, options) {
|
|
|
135
170
|
},
|
|
136
171
|
thought: action.thought || ''
|
|
137
172
|
});
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
173
|
+
else if (actionType) {
|
|
174
|
+
unhandledActions.push({
|
|
175
|
+
type: actionType,
|
|
176
|
+
thought: action.thought || ''
|
|
177
|
+
});
|
|
178
|
+
debug('Unhandled action type:', actionType, 'thought:', action.thought);
|
|
143
179
|
}
|
|
144
180
|
});
|
|
181
|
+
if (0 === transformActions.length) {
|
|
182
|
+
const errorDetails = [];
|
|
183
|
+
if (0 === parsed.length) {
|
|
184
|
+
errorDetails.push('Action parser returned no actions');
|
|
185
|
+
if (res.content.includes('Thought:') && !res.content.includes('Action:')) errorDetails.push('Response contains "Thought:" but missing "Action:" line');
|
|
186
|
+
else errorDetails.push('Response may be malformed or empty');
|
|
187
|
+
}
|
|
188
|
+
if (unhandledActions.length > 0) {
|
|
189
|
+
const types = unhandledActions.map((a)=>a.type).join(', ');
|
|
190
|
+
errorDetails.push(`Unhandled action types: ${types}`);
|
|
191
|
+
}
|
|
192
|
+
const errorMessage = [
|
|
193
|
+
'No actions found in UI-TARS response.',
|
|
194
|
+
...errorDetails,
|
|
195
|
+
`\nRaw response: ${res.content}`
|
|
196
|
+
].join('\n');
|
|
197
|
+
throw new Error(errorMessage, {
|
|
198
|
+
cause: {
|
|
199
|
+
prediction: res.content,
|
|
200
|
+
parsed,
|
|
201
|
+
unhandledActions,
|
|
202
|
+
convertedText
|
|
203
|
+
}
|
|
204
|
+
});
|
|
205
|
+
}
|
|
145
206
|
debug('transformActions', JSON.stringify(transformActions, null, 2));
|
|
146
207
|
const log = getSummary(res.content);
|
|
147
208
|
conversationHistory.append({
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/ui-tars-planning.mjs","sources":["webpack://@midscene/core/./src/ai-model/ui-tars-planning.ts"],"sourcesContent":["import type {\n PlanningAIResponse,\n PlanningAction,\n Size,\n UIContext,\n} from '@/types';\nimport { type IModelConfig, UITarsModelVersion } from '@midscene/shared/env';\nimport { resizeImgBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout';\nimport { assert } from '@midscene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport { AIActionType } from './common';\nimport type { ConversationHistory } from './conversation-history';\nimport { getSummary, getUiTarsPlanningPrompt } from './prompt/ui-tars-planning';\nimport { callAIWithStringResponse } from './service-caller/index';\ntype ActionType =\n | 'click'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait';\n\nconst debug = getDebug('ui-tars-planning');\nconst bboxSize = 10;\nconst pointToBbox = (\n point: { x: number; y: number },\n width: number,\n height: number,\n): [number, number, number, number] => {\n return [\n Math.round(Math.max(point.x - bboxSize / 2, 0)),\n Math.round(Math.max(point.y - bboxSize / 2, 0)),\n Math.round(Math.min(point.x + bboxSize / 2, width)),\n Math.round(Math.min(point.y + bboxSize / 2, height)),\n ];\n};\n\nexport async function uiTarsPlanning(\n userInstruction: string,\n options: {\n conversationHistory: ConversationHistory;\n context: UIContext;\n modelConfig: IModelConfig;\n },\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelConfig } = options;\n const { uiTarsModelVersion } = modelConfig;\n const systemPrompt = getUiTarsPlanningPrompt() + userInstruction;\n\n const imagePayload = await resizeImageForUiTars(\n context.screenshotBase64,\n context.size,\n uiTarsModelVersion,\n );\n\n conversationHistory.append({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n },\n },\n ],\n });\n\n const res = await callAIWithStringResponse(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...conversationHistory.snapshot(),\n ],\n AIActionType.INSPECT_ELEMENT,\n modelConfig,\n );\n const convertedText = convertBboxToCoordinates(res.content);\n\n const { size } = context;\n const { parsed } = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: size.width,\n height: size.height,\n },\n modelVer: uiTarsModelVersion,\n });\n\n debug(\n 'ui-tars modelVer',\n uiTarsModelVersion,\n ', parsed',\n JSON.stringify(parsed),\n );\n\n const transformActions: PlanningAction[] = [];\n let shouldContinue = true;\n parsed.forEach((action) => {\n const actionType = (action.action_type || '').toLowerCase();\n if (actionType === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n transformActions.push({\n type: 'Tap',\n param: {\n locate: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n },\n },\n });\n } else if (actionType === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box, size);\n const endPoint = getPoint(action.action_inputs.end_box, size);\n transformActions.push({\n type: 'DragAndDrop',\n param: {\n from: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: startPoint[0], y: startPoint[1] },\n size.width,\n size.height,\n ),\n },\n to: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: endPoint[0], y: endPoint[1] },\n size.width,\n size.height,\n ),\n },\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'type') {\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'finished') {\n shouldContinue = false;\n transformActions.push({\n type: 'Finished',\n param: {},\n thought: action.thought || '',\n });\n } else if (actionType === 'hotkey') {\n if (!action.action_inputs.key) {\n console.warn(\n 'No key found in action: hotkey. Will not perform action.',\n );\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n keyName: keys,\n },\n thought: action.thought || '',\n });\n }\n } else if (actionType === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n thought: action.thought || '',\n });\n }\n });\n\n if (transformActions.length === 0) {\n throw new Error(`No actions found, response: ${res.content}`, {\n cause: {\n prediction: res.content,\n parsed,\n },\n });\n }\n\n debug('transformActions', JSON.stringify(transformActions, null, 2));\n const log = getSummary(res.content);\n\n conversationHistory.append({\n role: 'assistant',\n content: log,\n });\n\n return {\n actions: transformActions,\n log,\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n more_actions_needed_by_instruction: shouldContinue,\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove [EOS] and replace <bbox> coordinates\n const cleanedText = text.replace(/\\[EOS\\]/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string, size: { width: number; height: number }) {\n const [x, y] = JSON.parse(startBox);\n return [x * size.width, y * size.height];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: Record<string, never>;\n}\n\nexport type Action =\n | ClickAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction;\n\nexport async function resizeImageForUiTars(\n imageBase64: string,\n size: Size,\n uiTarsVersion: UITarsModelVersion | undefined,\n) {\n if (uiTarsVersion === UITarsModelVersion.V1_5) {\n debug('ui-tars-v1.5, will check image size', size);\n const currentPixels = size.width * size.height;\n const maxPixels = 16384 * 28 * 28; //\n if (currentPixels > maxPixels) {\n const resizeFactor = Math.sqrt(maxPixels / currentPixels);\n const newWidth = Math.floor(size.width * resizeFactor);\n const newHeight = Math.floor(size.height * resizeFactor);\n debug(\n 'resize image for ui-tars, new width: %s, new height: %s',\n newWidth,\n newHeight,\n );\n const resizedImage = await resizeImgBase64(imageBase64, {\n width: newWidth,\n height: newHeight,\n });\n return resizedImage;\n }\n }\n return imageBase64;\n}\n"],"names":["debug","getDebug","bboxSize","pointToBbox","point","width","height","Math","uiTarsPlanning","userInstruction","options","conversationHistory","context","modelConfig","uiTarsModelVersion","systemPrompt","getUiTarsPlanningPrompt","imagePayload","resizeImageForUiTars","res","callAIWithStringResponse","AIActionType","convertedText","convertBboxToCoordinates","size","parsed","actionParser","JSON","transformActions","shouldContinue","action","actionType","assert","getPoint","startPoint","endPoint","keys","transformHotkeyInput","console","Error","log","getSummary","undefined","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","y","cleanedText","startBox","imageBase64","uiTarsVersion","UITarsModelVersion","currentPixels","maxPixels","resizeFactor","newWidth","newHeight","resizedImage","resizeImgBase64"],"mappings":";;;;;;;;;AAyBA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,WAAW;AACjB,MAAMC,cAAc,CAClBC,OACAC,OACAC,SAEO;QACLC,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGG;QAC5CE,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGI;KAC7C;AAGI,eAAeE,eACpBC,eAAuB,EACvBC,OAIC;IAED,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,WAAW,EAAE,GAAGH;IACtD,MAAM,EAAEI,kBAAkB,EAAE,GAAGD;IAC/B,MAAME,eAAeC,4BAA4BP;IAEjD,MAAMQ,eAAe,MAAMC,qBACzBN,QAAQ,gBAAgB,EACxBA,QAAQ,IAAI,EACZE;IAGFH,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKM;gBACP;YACF;SACD;IACH;IAEA,MAAME,MAAM,MAAMC,yBAChB;QACE;YACE,MAAM;YACN,SAASL;QACX;WACGJ,oBAAoB,QAAQ;KAChC,EACDU,aAAa,eAAe,EAC5BR;IAEF,MAAMS,gBAAgBC,yBAAyBJ,IAAI,OAAO;IAE1D,MAAM,EAAEK,IAAI,EAAE,GAAGZ;IACjB,MAAM,EAAEa,MAAM,EAAE,GAAGC,aAAa;QAC9B,YAAYJ;QACZ,QAAQ;YAAC;YAAM;SAAK;QACpB,eAAe;YACb,OAAOE,KAAK,KAAK;YACjB,QAAQA,KAAK,MAAM;QACrB;QACA,UAAUV;IACZ;IAEAd,MACE,oBACAc,oBACA,YACAa,KAAK,SAAS,CAACF;IAGjB,MAAMG,mBAAqC,EAAE;IAC7C,IAAIC,iBAAiB;IACrBJ,OAAO,OAAO,CAAC,CAACK;QACd,MAAMC,aAAcD,AAAAA,CAAAA,OAAO,WAAW,IAAI,EAAC,EAAG,WAAW;QACzD,IAAIC,AAAe,YAAfA,YAAwB;YAC1BC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAM1B,QAAQ6B,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEN;YACvDI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQ;wBACN,QAAQE,OAAO,OAAO,IAAI;wBAC1B,MAAM3B,YACJ;4BAAE,GAAGC,KAAK,CAAC,EAAE;4BAAE,GAAGA,KAAK,CAAC,EAAE;wBAAC,GAC3BoB,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;YACF;QACF,OAAO,IAAIO,AAAe,WAAfA,YAAuB;YAChCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCE,OAAOF,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMI,aAAaD,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEN;YAC5D,MAAMW,WAAWF,SAASH,OAAO,aAAa,CAAC,OAAO,EAAEN;YACxDI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,MAAM;wBACJ,QAAQE,OAAO,OAAO,IAAI;wBAC1B,MAAM3B,YACJ;4BAAE,GAAG+B,UAAU,CAAC,EAAE;4BAAE,GAAGA,UAAU,CAAC,EAAE;wBAAC,GACrCV,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;oBACA,IAAI;wBACF,QAAQM,OAAO,OAAO,IAAI;wBAC1B,MAAM3B,YACJ;4BAAE,GAAGgC,QAAQ,CAAC,EAAE;4BAAE,GAAGA,QAAQ,CAAC,EAAE;wBAAC,GACjCX,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;gBACA,SAASM,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YACTH,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,OAAOE,OAAO,aAAa,CAAC,OAAO;YACrC;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,aAAfA,YACTH,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWE,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,eAAfA,YAA2B;YACpCF,iBAAiB;YACjBD,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO,CAAC;gBACR,SAASE,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,aAAfA,YACT,IAAKD,OAAO,aAAa,CAAC,GAAG,EAItB;YACL,MAAMM,OAAOC,qBAAqBP,OAAO,aAAa,CAAC,GAAG;YAE1DF,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,SAASQ;gBACX;gBACA,SAASN,OAAO,OAAO,IAAI;YAC7B;QACF,OAbEQ,QAAQ,IAAI,CACV;aAaC,IAAIP,AAAe,WAAfA,YACTH,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,SAASE,OAAO,OAAO,IAAI;QAC7B;IAEJ;IAEA,IAAIF,AAA4B,MAA5BA,iBAAiB,MAAM,EACzB,MAAM,IAAIW,MAAM,CAAC,4BAA4B,EAAEpB,IAAI,OAAO,EAAE,EAAE;QAC5D,OAAO;YACL,YAAYA,IAAI,OAAO;YACvBM;QACF;IACF;IAGFzB,MAAM,oBAAoB2B,KAAK,SAAS,CAACC,kBAAkB,MAAM;IACjE,MAAMY,MAAMC,WAAWtB,IAAI,OAAO;IAElCR,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS6B;IACX;IAEA,OAAO;QACL,SAASZ;QACTY;QACA,OAAOrB,IAAI,KAAK;QAChB,aAAaQ,KAAK,SAAS,CAACR,IAAI,OAAO,EAAEuB,QAAW;QACpD,oCAAoCb;IACtC;AACF;AAOA,SAASN,yBAAyBoB,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAIjD,KAAK,KAAK,CAAE4C,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMG,IAAIlD,KAAK,KAAK,CAAE8C,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEC,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAcf,KAAK,OAAO,CAAC,YAAY;IAC7C,OAAOe,YAAY,OAAO,CAACd,SAASC,cAAc,IAAI;AACxD;AAEA,SAASZ,SAAS0B,QAAgB,EAAEnC,IAAuC;IACzE,MAAM,CAACgC,GAAGC,EAAE,GAAG9B,KAAK,KAAK,CAACgC;IAC1B,OAAO;QAACH,IAAIhC,KAAK,KAAK;QAAEiC,IAAIjC,KAAK,MAAM;KAAC;AAC1C;AAkEO,eAAeN,qBACpB0C,WAAmB,EACnBpC,IAAU,EACVqC,aAA6C;IAE7C,IAAIA,kBAAkBC,mBAAmB,IAAI,EAAE;QAC7C9D,MAAM,uCAAuCwB;QAC7C,MAAMuC,gBAAgBvC,KAAK,KAAK,GAAGA,KAAK,MAAM;QAC9C,MAAMwC,YAAY;QAClB,IAAID,gBAAgBC,WAAW;YAC7B,MAAMC,eAAe1D,KAAK,IAAI,CAACyD,YAAYD;YAC3C,MAAMG,WAAW3D,KAAK,KAAK,CAACiB,KAAK,KAAK,GAAGyC;YACzC,MAAME,YAAY5D,KAAK,KAAK,CAACiB,KAAK,MAAM,GAAGyC;YAC3CjE,MACE,2DACAkE,UACAC;YAEF,MAAMC,eAAe,MAAMC,gBAAgBT,aAAa;gBACtD,OAAOM;gBACP,QAAQC;YACV;YACA,OAAOC;QACT;IACF;IACA,OAAOR;AACT"}
|
|
1
|
+
{"version":3,"file":"ai-model/ui-tars-planning.mjs","sources":["../../../src/ai-model/ui-tars-planning.ts"],"sourcesContent":["import type {\n PlanningAIResponse,\n PlanningAction,\n Size,\n UIContext,\n} from '@/types';\nimport { type IModelConfig, UITarsModelVersion } from '@midscene/shared/env';\nimport { resizeImgBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout';\nimport { assert } from '@midscene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport { AIActionType } from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { getSummary, getUiTarsPlanningPrompt } from './prompt/ui-tars-planning';\nimport { callAIWithStringResponse } from './service-caller/index';\ntype ActionType =\n | 'click'\n | 'left_double'\n | 'right_single'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait';\n\nconst debug = getDebug('ui-tars-planning');\nconst bboxSize = 10;\nconst pointToBbox = (\n point: { x: number; y: number },\n width: number,\n height: number,\n): [number, number, number, number] => {\n return [\n Math.round(Math.max(point.x - bboxSize / 2, 0)),\n Math.round(Math.max(point.y - bboxSize / 2, 0)),\n Math.round(Math.min(point.x + bboxSize / 2, width)),\n Math.round(Math.min(point.y + bboxSize / 2, height)),\n ];\n};\n\nexport async function uiTarsPlanning(\n userInstruction: string,\n options: {\n conversationHistory: ConversationHistory;\n context: UIContext;\n modelConfig: IModelConfig;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelConfig, actionContext } = options;\n const { uiTarsModelVersion } = modelConfig;\n\n let instruction = userInstruction;\n if (actionContext) {\n instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\\n<user_instruction>${userInstruction}</user_instruction>`;\n }\n\n const systemPrompt = getUiTarsPlanningPrompt() + instruction;\n\n const imagePayload = await resizeImageForUiTars(\n context.screenshotBase64,\n context.size,\n uiTarsModelVersion,\n );\n\n conversationHistory.append({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n },\n },\n ],\n });\n\n const res = await callAIWithStringResponse(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...conversationHistory.snapshot(),\n ],\n AIActionType.INSPECT_ELEMENT,\n modelConfig,\n );\n const convertedText = convertBboxToCoordinates(res.content);\n\n const { size } = context;\n const { parsed } = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: size.width,\n height: size.height,\n },\n modelVer: uiTarsModelVersion,\n });\n\n debug(\n 'ui-tars modelVer',\n uiTarsModelVersion,\n ', parsed',\n JSON.stringify(parsed),\n );\n\n const transformActions: PlanningAction[] = [];\n const unhandledActions: Array<{ type: string; thought: string }> = [];\n let shouldContinue = true;\n parsed.forEach((action) => {\n const actionType = (action.action_type || '').toLowerCase();\n if (actionType === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n transformActions.push({\n type: 'Tap',\n param: {\n locate: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n },\n },\n });\n } else if (actionType === 'left_double') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n transformActions.push({\n type: 'DoubleClick',\n param: {\n locate: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n },\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'right_single') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n transformActions.push({\n type: 'RightClick',\n param: {\n locate: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n },\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box, size);\n const endPoint = getPoint(action.action_inputs.end_box, size);\n transformActions.push({\n type: 'DragAndDrop',\n param: {\n from: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: startPoint[0], y: startPoint[1] },\n size.width,\n size.height,\n ),\n },\n to: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: endPoint[0], y: endPoint[1] },\n size.width,\n size.height,\n ),\n },\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'type') {\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'finished') {\n shouldContinue = false;\n transformActions.push({\n type: 'Finished',\n param: {},\n thought: action.thought || '',\n });\n } else if (actionType === 'hotkey') {\n if (!action.action_inputs.key) {\n console.warn(\n 'No key found in action: hotkey. Will not perform action.',\n );\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n keyName: keys,\n },\n thought: action.thought || '',\n });\n }\n } else if (actionType === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n thought: action.thought || '',\n });\n } else if (actionType) {\n // Track unhandled action types\n unhandledActions.push({\n type: actionType,\n thought: action.thought || '',\n });\n debug('Unhandled action type:', actionType, 'thought:', action.thought);\n }\n });\n\n if (transformActions.length === 0) {\n const errorDetails: string[] = [];\n\n // Check if parsing failed\n if (parsed.length === 0) {\n errorDetails.push('Action parser returned no actions');\n\n // Check if response has Thought but no Action\n if (\n res.content.includes('Thought:') &&\n !res.content.includes('Action:')\n ) {\n errorDetails.push(\n 'Response contains \"Thought:\" but missing \"Action:\" line',\n );\n } else {\n errorDetails.push('Response may be malformed or empty');\n }\n }\n\n // Check if we have unhandled action types\n if (unhandledActions.length > 0) {\n const types = unhandledActions.map((a) => a.type).join(', ');\n errorDetails.push(`Unhandled action types: ${types}`);\n }\n\n const errorMessage = [\n 'No actions found in UI-TARS response.',\n ...errorDetails,\n `\\nRaw response: ${res.content}`,\n ].join('\\n');\n\n throw new Error(errorMessage, {\n cause: {\n prediction: res.content,\n parsed,\n unhandledActions,\n convertedText,\n },\n });\n }\n\n debug('transformActions', JSON.stringify(transformActions, null, 2));\n const log = getSummary(res.content);\n\n conversationHistory.append({\n role: 'assistant',\n content: log,\n });\n\n return {\n actions: transformActions,\n log,\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n more_actions_needed_by_instruction: shouldContinue,\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove [EOS] and replace <bbox> coordinates\n const cleanedText = text.replace(/\\[EOS\\]/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string, size: { width: number; height: number }) {\n const [x, y] = JSON.parse(startBox);\n return [x * size.width, y * size.height];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface LeftDoubleAction extends BaseAction {\n action_type: 'left_double';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface RightSingleAction extends BaseAction {\n action_type: 'right_single';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: Record<string, never>;\n}\n\nexport type Action =\n | ClickAction\n | LeftDoubleAction\n | RightSingleAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction;\n\nexport async function resizeImageForUiTars(\n imageBase64: string,\n size: Size,\n uiTarsVersion: UITarsModelVersion | undefined,\n) {\n if (uiTarsVersion === UITarsModelVersion.V1_5) {\n debug('ui-tars-v1.5, will check image size', size);\n const currentPixels = size.width * size.height;\n const maxPixels = 16384 * 28 * 28; //\n if (currentPixels > maxPixels) {\n const resizeFactor = Math.sqrt(maxPixels / currentPixels);\n const newWidth = Math.floor(size.width * resizeFactor);\n const newHeight = Math.floor(size.height * resizeFactor);\n debug(\n 'resize image for ui-tars, new width: %s, new height: %s',\n newWidth,\n newHeight,\n );\n const resizedImage = await resizeImgBase64(imageBase64, {\n width: newWidth,\n height: newHeight,\n });\n return resizedImage;\n }\n }\n return imageBase64;\n}\n"],"names":["debug","getDebug","bboxSize","pointToBbox","point","width","height","Math","uiTarsPlanning","userInstruction","options","conversationHistory","context","modelConfig","actionContext","uiTarsModelVersion","instruction","systemPrompt","getUiTarsPlanningPrompt","imagePayload","resizeImageForUiTars","res","callAIWithStringResponse","AIActionType","convertedText","convertBboxToCoordinates","size","parsed","actionParser","JSON","transformActions","unhandledActions","shouldContinue","action","actionType","assert","getPoint","startPoint","endPoint","keys","transformHotkeyInput","console","errorDetails","types","a","errorMessage","Error","log","getSummary","undefined","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","y","cleanedText","startBox","imageBase64","uiTarsVersion","UITarsModelVersion","currentPixels","maxPixels","resizeFactor","newWidth","newHeight","resizedImage","resizeImgBase64"],"mappings":";;;;;;;;;AA2BA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,WAAW;AACjB,MAAMC,cAAc,CAClBC,OACAC,OACAC,SAEO;QACLC,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGG;QAC5CE,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGI;KAC7C;AAGI,eAAeE,eACpBC,eAAuB,EACvBC,OAKC;IAED,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,WAAW,EAAEC,aAAa,EAAE,GAAGJ;IACrE,MAAM,EAAEK,kBAAkB,EAAE,GAAGF;IAE/B,IAAIG,cAAcP;IAClB,IAAIK,eACFE,cAAc,CAAC,yBAAyB,EAAEF,cAAc,8CAA8C,EAAEL,gBAAgB,mBAAmB,CAAC;IAG9I,MAAMQ,eAAeC,4BAA4BF;IAEjD,MAAMG,eAAe,MAAMC,qBACzBR,QAAQ,gBAAgB,EACxBA,QAAQ,IAAI,EACZG;IAGFJ,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKQ;gBACP;YACF;SACD;IACH;IAEA,MAAME,MAAM,MAAMC,yBAChB;QACE;YACE,MAAM;YACN,SAASL;QACX;WACGN,oBAAoB,QAAQ;KAChC,EACDY,aAAa,eAAe,EAC5BV;IAEF,MAAMW,gBAAgBC,yBAAyBJ,IAAI,OAAO;IAE1D,MAAM,EAAEK,IAAI,EAAE,GAAGd;IACjB,MAAM,EAAEe,MAAM,EAAE,GAAGC,aAAa;QAC9B,YAAYJ;QACZ,QAAQ;YAAC;YAAM;SAAK;QACpB,eAAe;YACb,OAAOE,KAAK,KAAK;YACjB,QAAQA,KAAK,MAAM;QACrB;QACA,UAAUX;IACZ;IAEAf,MACE,oBACAe,oBACA,YACAc,KAAK,SAAS,CAACF;IAGjB,MAAMG,mBAAqC,EAAE;IAC7C,MAAMC,mBAA6D,EAAE;IACrE,IAAIC,iBAAiB;IACrBL,OAAO,OAAO,CAAC,CAACM;QACd,MAAMC,aAAcD,AAAAA,CAAAA,OAAO,WAAW,IAAI,EAAC,EAAG,WAAW;QACzD,IAAIC,AAAe,YAAfA,YAAwB;YAC1BC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAM7B,QAAQgC,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YACvDI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQ;wBACN,QAAQG,OAAO,OAAO,IAAI;wBAC1B,MAAM9B,YACJ;4BAAE,GAAGC,KAAK,CAAC,EAAE;4BAAE,GAAGA,KAAK,CAAC,EAAE;wBAAC,GAC3BsB,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;YACF;QACF,OAAO,IAAIQ,AAAe,kBAAfA,YAA8B;YACvCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAM7B,QAAQgC,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YACvDI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQ;wBACN,QAAQG,OAAO,OAAO,IAAI;wBAC1B,MAAM9B,YACJ;4BAAE,GAAGC,KAAK,CAAC,EAAE;4BAAE,GAAGA,KAAK,CAAC,EAAE;wBAAC,GAC3BsB,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;gBACA,SAASO,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,mBAAfA,YAA+B;YACxCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAM7B,QAAQgC,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YACvDI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQ;wBACN,QAAQG,OAAO,OAAO,IAAI;wBAC1B,MAAM9B,YACJ;4BAAE,GAAGC,KAAK,CAAC,EAAE;4BAAE,GAAGA,KAAK,CAAC,EAAE;wBAAC,GAC3BsB,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;gBACA,SAASO,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YAAuB;YAChCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCE,OAAOF,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMI,aAAaD,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YAC5D,MAAMY,WAAWF,SAASH,OAAO,aAAa,CAAC,OAAO,EAAEP;YACxDI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,MAAM;wBACJ,QAAQG,OAAO,OAAO,IAAI;wBAC1B,MAAM9B,YACJ;4BAAE,GAAGkC,UAAU,CAAC,EAAE;4BAAE,GAAGA,UAAU,CAAC,EAAE;wBAAC,GACrCX,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;oBACA,IAAI;wBACF,QAAQO,OAAO,OAAO,IAAI;wBAC1B,MAAM9B,YACJ;4BAAE,GAAGmC,QAAQ,CAAC,EAAE;4BAAE,GAAGA,QAAQ,CAAC,EAAE;wBAAC,GACjCZ,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;gBACA,SAASO,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,OAAOG,OAAO,aAAa,CAAC,OAAO;YACrC;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,aAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWG,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,eAAfA,YAA2B;YACpCF,iBAAiB;YACjBF,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO,CAAC;gBACR,SAASG,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,aAAfA,YACT,IAAKD,OAAO,aAAa,CAAC,GAAG,EAItB;YACL,MAAMM,OAAOC,qBAAqBP,OAAO,aAAa,CAAC,GAAG;YAE1DH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,SAASS;gBACX;gBACA,SAASN,OAAO,OAAO,IAAI;YAC7B;QACF,OAbEQ,QAAQ,IAAI,CACV;aAaC,IAAIP,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,SAASG,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,YAAY;YAErBH,iBAAiB,IAAI,CAAC;gBACpB,MAAMG;gBACN,SAASD,OAAO,OAAO,IAAI;YAC7B;YACAjC,MAAM,0BAA0BkC,YAAY,YAAYD,OAAO,OAAO;QACxE;IACF;IAEA,IAAIH,AAA4B,MAA5BA,iBAAiB,MAAM,EAAQ;QACjC,MAAMY,eAAyB,EAAE;QAGjC,IAAIf,AAAkB,MAAlBA,OAAO,MAAM,EAAQ;YACvBe,aAAa,IAAI,CAAC;YAGlB,IACErB,IAAI,OAAO,CAAC,QAAQ,CAAC,eACrB,CAACA,IAAI,OAAO,CAAC,QAAQ,CAAC,YAEtBqB,aAAa,IAAI,CACf;iBAGFA,aAAa,IAAI,CAAC;QAEtB;QAGA,IAAIX,iBAAiB,MAAM,GAAG,GAAG;YAC/B,MAAMY,QAAQZ,iBAAiB,GAAG,CAAC,CAACa,IAAMA,EAAE,IAAI,EAAE,IAAI,CAAC;YACvDF,aAAa,IAAI,CAAC,CAAC,wBAAwB,EAAEC,OAAO;QACtD;QAEA,MAAME,eAAe;YACnB;eACGH;YACH,CAAC,gBAAgB,EAAErB,IAAI,OAAO,EAAE;SACjC,CAAC,IAAI,CAAC;QAEP,MAAM,IAAIyB,MAAMD,cAAc;YAC5B,OAAO;gBACL,YAAYxB,IAAI,OAAO;gBACvBM;gBACAI;gBACAP;YACF;QACF;IACF;IAEAxB,MAAM,oBAAoB6B,KAAK,SAAS,CAACC,kBAAkB,MAAM;IACjE,MAAMiB,MAAMC,WAAW3B,IAAI,OAAO;IAElCV,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAASoC;IACX;IAEA,OAAO;QACL,SAASjB;QACTiB;QACA,OAAO1B,IAAI,KAAK;QAChB,aAAaQ,KAAK,SAAS,CAACR,IAAI,OAAO,EAAE4B,QAAW;QACpD,oCAAoCjB;IACtC;AACF;AAOA,SAASP,yBAAyByB,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAIxD,KAAK,KAAK,CAAEmD,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMG,IAAIzD,KAAK,KAAK,CAAEqD,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEC,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAcf,KAAK,OAAO,CAAC,YAAY;IAC7C,OAAOe,YAAY,OAAO,CAACd,SAASC,cAAc,IAAI;AACxD;AAEA,SAAShB,SAAS8B,QAAgB,EAAExC,IAAuC;IACzE,MAAM,CAACqC,GAAGC,EAAE,GAAGnC,KAAK,KAAK,CAACqC;IAC1B,OAAO;QAACH,IAAIrC,KAAK,KAAK;QAAEsC,IAAItC,KAAK,MAAM;KAAC;AAC1C;AAkFO,eAAeN,qBACpB+C,WAAmB,EACnBzC,IAAU,EACV0C,aAA6C;IAE7C,IAAIA,kBAAkBC,mBAAmB,IAAI,EAAE;QAC7CrE,MAAM,uCAAuC0B;QAC7C,MAAM4C,gBAAgB5C,KAAK,KAAK,GAAGA,KAAK,MAAM;QAC9C,MAAM6C,YAAY;QAClB,IAAID,gBAAgBC,WAAW;YAC7B,MAAMC,eAAejE,KAAK,IAAI,CAACgE,YAAYD;YAC3C,MAAMG,WAAWlE,KAAK,KAAK,CAACmB,KAAK,KAAK,GAAG8C;YACzC,MAAME,YAAYnE,KAAK,KAAK,CAACmB,KAAK,MAAM,GAAG8C;YAC3CxE,MACE,2DACAyE,UACAC;YAEF,MAAMC,eAAe,MAAMC,gBAAgBT,aAAa;gBACtD,OAAOM;gBACP,QAAQC;YACV;YACA,OAAOC;QACT;IACF;IACA,OAAOR;AACT"}
|
|
@@ -16,14 +16,14 @@ var common_AIActionType = /*#__PURE__*/ function(AIActionType) {
|
|
|
16
16
|
const defaultBboxSize = 20;
|
|
17
17
|
const debugInspectUtils = getDebug('ai:common');
|
|
18
18
|
function fillBboxParam(locate, width, height, rightLimit, bottomLimit, vlMode) {
|
|
19
|
-
if (locate.bbox_2d && !
|
|
19
|
+
if (locate.bbox_2d && !locate?.bbox) {
|
|
20
20
|
locate.bbox = locate.bbox_2d;
|
|
21
21
|
delete locate.bbox_2d;
|
|
22
22
|
}
|
|
23
|
-
if (
|
|
23
|
+
if (locate?.bbox) locate.bbox = adaptBbox(locate.bbox, width, height, rightLimit, bottomLimit, vlMode);
|
|
24
24
|
return locate;
|
|
25
25
|
}
|
|
26
|
-
function
|
|
26
|
+
function adaptQwen2_5Bbox(bbox) {
|
|
27
27
|
if (bbox.length < 2) {
|
|
28
28
|
const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;
|
|
29
29
|
throw new Error(msg);
|
|
@@ -49,7 +49,6 @@ function adaptDoubaoBbox(bbox, width, height) {
|
|
|
49
49
|
];
|
|
50
50
|
throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);
|
|
51
51
|
}
|
|
52
|
-
if (Array.isArray(bbox) && Array.isArray(bbox[0])) bbox = bbox[0];
|
|
53
52
|
let bboxList = [];
|
|
54
53
|
if (Array.isArray(bbox) && 'string' == typeof bbox[0]) bbox.forEach((item)=>{
|
|
55
54
|
if ('string' == typeof item && item.includes(',')) {
|
|
@@ -82,14 +81,21 @@ function adaptDoubaoBbox(bbox, width, height) {
|
|
|
82
81
|
const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;
|
|
83
82
|
throw new Error(msg);
|
|
84
83
|
}
|
|
84
|
+
function normalizeBboxInput(bbox) {
|
|
85
|
+
if (Array.isArray(bbox)) {
|
|
86
|
+
if (Array.isArray(bbox[0])) return bbox[0];
|
|
87
|
+
}
|
|
88
|
+
return bbox;
|
|
89
|
+
}
|
|
85
90
|
function adaptBbox(bbox, width, height, rightLimit, bottomLimit, vlMode) {
|
|
91
|
+
const normalizedBbox = normalizeBboxInput(bbox);
|
|
86
92
|
let result = [
|
|
87
93
|
0,
|
|
88
94
|
0,
|
|
89
95
|
0,
|
|
90
96
|
0
|
|
91
97
|
];
|
|
92
|
-
result = 'doubao-vision' === vlMode || 'vlm-ui-tars' === vlMode ? adaptDoubaoBbox(
|
|
98
|
+
result = 'doubao-vision' === vlMode || 'vlm-ui-tars' === vlMode ? adaptDoubaoBbox(normalizedBbox, width, height) : 'gemini' === vlMode ? adaptGeminiBbox(normalizedBbox, width, height) : 'qwen3-vl' === vlMode ? normalized01000(normalizedBbox, width, height) : adaptQwen2_5Bbox(normalizedBbox);
|
|
93
99
|
result[2] = Math.min(result[2], rightLimit);
|
|
94
100
|
result[3] = Math.min(result[3], bottomLimit);
|
|
95
101
|
return result;
|
|
@@ -134,20 +140,6 @@ function adaptBboxToRect(bbox, width, height, offsetX = 0, offsetY = 0, rightLim
|
|
|
134
140
|
debugInspectUtils('adaptBboxToRect, result=', rect);
|
|
135
141
|
return rect;
|
|
136
142
|
}
|
|
137
|
-
let warned = false;
|
|
138
|
-
function warnGPT4oSizeLimit(size, modelName) {
|
|
139
|
-
if (warned) return;
|
|
140
|
-
if (modelName.toLowerCase().includes('gpt-4o')) {
|
|
141
|
-
const warningMsg = `GPT-4o has a maximum image input size of 2000x768 or 768x2000, but got ${size.width}x${size.height}. Please set your interface to a smaller resolution. Otherwise, the result may be inaccurate.`;
|
|
142
|
-
if (Math.max(size.width, size.height) > 2000 || Math.min(size.width, size.height) > 768) {
|
|
143
|
-
console.warn(warningMsg);
|
|
144
|
-
warned = true;
|
|
145
|
-
}
|
|
146
|
-
} else if (size.width > 1800 || size.height > 1800) {
|
|
147
|
-
console.warn(`The image size seems too large (${size.width}x${size.height}). It may lead to more token usage, slower response, and inaccurate result.`);
|
|
148
|
-
warned = true;
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
143
|
function mergeRects(rects) {
|
|
152
144
|
const minLeft = Math.min(...rects.map((r)=>r.left));
|
|
153
145
|
const minTop = Math.min(...rects.map((r)=>r.top));
|
|
@@ -161,7 +153,7 @@ function mergeRects(rects) {
|
|
|
161
153
|
};
|
|
162
154
|
}
|
|
163
155
|
function expandSearchArea(rect, screenSize, vlMode) {
|
|
164
|
-
const minEdgeSize =
|
|
156
|
+
const minEdgeSize = 500;
|
|
165
157
|
const defaultPadding = 160;
|
|
166
158
|
const paddingSizeHorizontal = rect.width < minEdgeSize ? Math.ceil((minEdgeSize - rect.width) / 2) : defaultPadding;
|
|
167
159
|
const paddingSizeVertical = rect.height < minEdgeSize ? Math.ceil((minEdgeSize - rect.height) / 2) : defaultPadding;
|
|
@@ -265,10 +257,9 @@ z.object({
|
|
|
265
257
|
}).passthrough();
|
|
266
258
|
const getMidsceneLocationSchema = ()=>MidsceneLocationInput;
|
|
267
259
|
const ifMidsceneLocatorField = (field)=>{
|
|
268
|
-
var _actualField__def, _actualField__def1;
|
|
269
260
|
let actualField = field;
|
|
270
|
-
if (
|
|
271
|
-
if (
|
|
261
|
+
if (actualField._def?.typeName === 'ZodOptional') actualField = actualField._def.innerType;
|
|
262
|
+
if (actualField._def?.typeName === 'ZodObject') {
|
|
272
263
|
const shape = actualField._def.shape();
|
|
273
264
|
if (locateFieldFlagName in shape) return true;
|
|
274
265
|
if ('prompt' in shape && shape.prompt) return true;
|
|
@@ -285,18 +276,14 @@ const dumpMidsceneLocatorField = (field)=>{
|
|
|
285
276
|
return String(field);
|
|
286
277
|
};
|
|
287
278
|
const findAllMidsceneLocatorField = (zodType, requiredOnly)=>{
|
|
288
|
-
var _zodObject__def;
|
|
289
279
|
if (!zodType) return [];
|
|
290
280
|
const zodObject = zodType;
|
|
291
|
-
if (
|
|
281
|
+
if (zodObject._def?.typeName === 'ZodObject' && zodObject.shape) {
|
|
292
282
|
const keys = Object.keys(zodObject.shape);
|
|
293
283
|
return keys.filter((key)=>{
|
|
294
284
|
const field = zodObject.shape[key];
|
|
295
285
|
if (!ifMidsceneLocatorField(field)) return false;
|
|
296
|
-
if (requiredOnly)
|
|
297
|
-
var _field__def;
|
|
298
|
-
return (null == (_field__def = field._def) ? void 0 : _field__def.typeName) !== 'ZodOptional';
|
|
299
|
-
}
|
|
286
|
+
if (requiredOnly) return field._def?.typeName !== 'ZodOptional';
|
|
300
287
|
return true;
|
|
301
288
|
});
|
|
302
289
|
}
|
|
@@ -336,6 +323,7 @@ const loadActionParam = (jsonObject, zodSchema)=>{
|
|
|
336
323
|
return result;
|
|
337
324
|
};
|
|
338
325
|
const parseActionParam = (rawParam, zodSchema)=>{
|
|
326
|
+
if (!zodSchema) return;
|
|
339
327
|
const param = rawParam ?? {};
|
|
340
328
|
const locateFields = findAllMidsceneLocatorField(zodSchema);
|
|
341
329
|
if (0 === locateFields.length) return zodSchema.parse(param);
|
|
@@ -350,6 +338,6 @@ const parseActionParam = (rawParam, zodSchema)=>{
|
|
|
350
338
|
for(const fieldName in locateFieldValues)validated[fieldName] = locateFieldValues[fieldName];
|
|
351
339
|
return validated;
|
|
352
340
|
};
|
|
353
|
-
export { common_AIActionType as AIActionType, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBbox, adaptBboxToRect, adaptDoubaoBbox, adaptGeminiBbox,
|
|
341
|
+
export { common_AIActionType as AIActionType, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBbox, adaptBboxToRect, adaptDoubaoBbox, adaptGeminiBbox, adaptQwen2_5Bbox, buildYamlFlowFromPlans, dumpActionParam, dumpMidsceneLocatorField, expandSearchArea, fillBboxParam, findAllMidsceneLocatorField, getMidsceneLocationSchema, ifMidsceneLocatorField, loadActionParam, markupImageForLLM, mergeRects, normalized01000, parseActionParam };
|
|
354
342
|
|
|
355
343
|
//# sourceMappingURL=common.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"common.mjs","sources":["../../src/common.ts"],"sourcesContent":["import type {\n BaseElement,\n DeviceAction,\n ElementTreeNode,\n MidsceneYamlFlowItem,\n PlanningAction,\n Rect,\n Size,\n} from '@/types';\nimport { assert } from '@midscene/shared/utils';\n\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\n\nimport type { PlanningLocateParam } from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport { treeToList } from '@midscene/shared/extractor';\nimport { compositeElementInfoImg } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { z } from 'zod';\n\nexport type AIArgs = ChatCompletionMessageParam[];\n\nexport enum AIActionType {\n ASSERT = 0,\n INSPECT_ELEMENT = 1,\n EXTRACT_DATA = 2,\n PLAN = 3,\n DESCRIBE_ELEMENT = 4,\n TEXT = 5,\n}\n\nconst defaultBboxSize = 20; // must be even number\nconst debugInspectUtils = getDebug('ai:common');\ntype AdaptBboxInput = number[] | string[] | string | (number[] | string[])[];\n\n// transform the param of locate from qwen mode\nexport function fillBboxParam(\n locate: PlanningLocateParam,\n width: number,\n height: number,\n rightLimit: number,\n bottomLimit: number,\n vlMode: TVlModeTypes | undefined,\n) {\n // The Qwen model might have hallucinations of naming bbox as bbox_2d.\n if ((locate as any).bbox_2d && !locate?.bbox) {\n locate.bbox = (locate as any).bbox_2d;\n // biome-ignore lint/performance/noDelete: <explanation>\n delete (locate as any).bbox_2d;\n }\n\n if (locate?.bbox) {\n locate.bbox = adaptBbox(\n locate.bbox,\n width,\n height,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n }\n\n return locate;\n}\n\nexport function adaptQwen2_5Bbox(\n bbox: number[],\n): [number, number, number, number] {\n if (bbox.length < 2) {\n const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n }\n\n const result: [number, number, number, number] = [\n Math.round(bbox[0]),\n Math.round(bbox[1]),\n typeof bbox[2] === 'number'\n ? Math.round(bbox[2])\n : Math.round(bbox[0] + defaultBboxSize),\n typeof bbox[3] === 'number'\n ? Math.round(bbox[3])\n : Math.round(bbox[1] + defaultBboxSize),\n ];\n return result;\n}\n\nexport function adaptDoubaoBbox(\n bbox: string[] | number[] | string,\n width: number,\n height: number,\n): [number, number, number, number] {\n assert(\n width > 0 && height > 0,\n 'width and height must be greater than 0 in doubao mode',\n );\n\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for doubao-vision mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return [\n Math.round((Number(splitted[0]) * width) / 1000),\n Math.round((Number(splitted[1]) * height) / 1000),\n Math.round((Number(splitted[2]) * width) / 1000),\n Math.round((Number(splitted[3]) * height) / 1000),\n ];\n }\n throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as any;\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[2] * width) / 1000),\n Math.round((bboxList[3] * height) / 1000),\n ];\n }\n\n // treat the bbox as a center point\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return [\n Math.max(\n 0,\n Math.round((bboxList[0] * width) / 1000) - defaultBboxSize / 2,\n ),\n Math.max(\n 0,\n Math.round((bboxList[1] * height) / 1000) - defaultBboxSize / 2,\n ),\n Math.min(\n width,\n Math.round((bboxList[0] * width) / 1000) + defaultBboxSize / 2,\n ),\n Math.min(\n height,\n Math.round((bboxList[1] * height) / 1000) + defaultBboxSize / 2,\n ),\n ];\n }\n\n if (bbox.length === 8) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[4] * width) / 1000),\n Math.round((bboxList[5] * height) / 1000),\n ];\n }\n\n const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nfunction normalizeBboxInput(\n bbox: AdaptBboxInput,\n): number[] | string[] | string {\n if (Array.isArray(bbox)) {\n if (Array.isArray(bbox[0])) {\n return bbox[0] as number[] | string[];\n }\n return bbox as number[] | string[];\n }\n return bbox as string;\n}\n\nexport function adaptBbox(\n bbox: AdaptBboxInput,\n width: number,\n height: number,\n rightLimit: number,\n bottomLimit: number,\n vlMode: TVlModeTypes | undefined,\n): [number, number, number, number] {\n const normalizedBbox = normalizeBboxInput(bbox);\n\n let result: [number, number, number, number] = [0, 0, 0, 0];\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n result = adaptDoubaoBbox(normalizedBbox, width, height);\n } else if (vlMode === 'gemini') {\n result = adaptGeminiBbox(normalizedBbox as number[], width, height);\n } else if (vlMode === 'qwen3-vl') {\n result = normalized01000(normalizedBbox as number[], width, height);\n } else {\n result = adaptQwen2_5Bbox(normalizedBbox as number[]);\n }\n\n result[2] = Math.min(result[2], rightLimit);\n result[3] = Math.min(result[3], bottomLimit);\n\n return result;\n}\n\n// x1, y1, x2, y2 -> 0-1000\nexport function normalized01000(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n return [\n Math.round((bbox[0] * width) / 1000),\n Math.round((bbox[1] * height) / 1000),\n Math.round((bbox[2] * width) / 1000),\n Math.round((bbox[3] * height) / 1000),\n ];\n}\n\n// y1, x1, y2, x2 -> 0-1000\nexport function adaptGeminiBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n const left = Math.round((bbox[1] * width) / 1000);\n const top = Math.round((bbox[0] * height) / 1000);\n const right = Math.round((bbox[3] * width) / 1000);\n const bottom = Math.round((bbox[2] * height) / 1000);\n return [left, top, right, bottom];\n}\n\nexport function adaptBboxToRect(\n bbox: number[],\n width: number,\n height: number,\n offsetX = 0,\n offsetY = 0,\n rightLimit = width,\n bottomLimit = height,\n vlMode?: TVlModeTypes | undefined,\n): Rect {\n debugInspectUtils(\n 'adaptBboxToRect',\n bbox,\n width,\n height,\n 'offset',\n offsetX,\n offsetY,\n 'limit',\n rightLimit,\n bottomLimit,\n 'vlMode',\n vlMode,\n );\n const [left, top, right, bottom] = adaptBbox(\n bbox,\n width,\n height,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n\n // Calculate initial rect dimensions\n const rectLeft = left;\n const rectTop = top;\n let rectWidth = right - left;\n let rectHeight = bottom - top;\n\n // Ensure the rect doesn't exceed image boundaries\n // If right edge exceeds width, adjust the width\n if (rectLeft + rectWidth > width) {\n rectWidth = width - rectLeft;\n }\n\n // If bottom edge exceeds height, adjust the height\n if (rectTop + rectHeight > height) {\n rectHeight = height - rectTop;\n }\n\n // Ensure minimum dimensions (width and height should be at least 1)\n rectWidth = Math.max(1, rectWidth);\n rectHeight = Math.max(1, rectHeight);\n\n const rect = {\n left: rectLeft + offsetX,\n top: rectTop + offsetY,\n width: rectWidth,\n height: rectHeight,\n };\n debugInspectUtils('adaptBboxToRect, result=', rect);\n\n return rect;\n}\n\nexport function mergeRects(rects: Rect[]) {\n const minLeft = Math.min(...rects.map((r) => r.left));\n const minTop = Math.min(...rects.map((r) => r.top));\n const maxRight = Math.max(...rects.map((r) => r.left + r.width));\n const maxBottom = Math.max(...rects.map((r) => r.top + r.height));\n return {\n left: minLeft,\n top: minTop,\n width: maxRight - minLeft,\n height: maxBottom - minTop,\n };\n}\n\n// expand the search area to at least 300 x 300, or add a default padding\nexport function expandSearchArea(\n rect: Rect,\n screenSize: Size,\n vlMode: TVlModeTypes | undefined,\n) {\n const minEdgeSize = 500; // vlMode === 'doubao-vision' ? 500 : 300;\n const defaultPadding = 160;\n\n // Calculate padding needed to reach minimum edge size\n const paddingSizeHorizontal =\n rect.width < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.width) / 2)\n : defaultPadding;\n const paddingSizeVertical =\n rect.height < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.height) / 2)\n : defaultPadding;\n\n // Calculate new dimensions (ensure minimum edge size)\n let newWidth = Math.max(minEdgeSize, rect.width + paddingSizeHorizontal * 2);\n let newHeight = Math.max(minEdgeSize, rect.height + paddingSizeVertical * 2);\n\n // Calculate initial position with padding\n let newLeft = rect.left - paddingSizeHorizontal;\n let newTop = rect.top - paddingSizeVertical;\n\n // Ensure the rect doesn't exceed screen boundaries by adjusting position\n // If the rect goes beyond the right edge, shift it left\n if (newLeft + newWidth > screenSize.width) {\n newLeft = screenSize.width - newWidth;\n }\n\n // If the rect goes beyond the bottom edge, shift it up\n if (newTop + newHeight > screenSize.height) {\n newTop = screenSize.height - newHeight;\n }\n\n // Ensure the rect doesn't go beyond the left/top edges\n newLeft = Math.max(0, newLeft);\n newTop = Math.max(0, newTop);\n\n // If after position adjustment, the rect still exceeds screen boundaries,\n // clamp the dimensions to fit within screen\n if (newLeft + newWidth > screenSize.width) {\n newWidth = screenSize.width - newLeft;\n }\n if (newTop + newHeight > screenSize.height) {\n newHeight = screenSize.height - newTop;\n }\n\n rect.left = newLeft;\n rect.top = newTop;\n rect.width = newWidth;\n rect.height = newHeight;\n\n return rect;\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n actionSpace: DeviceAction<any>[],\n sleep?: number,\n): MidsceneYamlFlowItem[] {\n const flow: MidsceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const verb = plan.type;\n\n const action = actionSpace.find((action) => action.name === verb);\n if (!action) {\n console.warn(\n `Cannot convert action ${verb} to yaml flow. Will ignore it.`,\n );\n continue;\n }\n\n const flowKey = action.interfaceAlias || verb;\n const flowParam = action.paramSchema\n ? dumpActionParam(plan.param || {}, action.paramSchema)\n : {};\n\n const flowItem: MidsceneYamlFlowItem = {\n [flowKey]: '',\n ...flowParam,\n };\n\n flow.push(flowItem);\n }\n\n if (sleep) {\n flow.push({\n sleep,\n });\n }\n\n return flow;\n}\n\n// Zod schemas for shared types\nexport const PointSchema = z.object({\n left: z.number(),\n top: z.number(),\n});\n\nexport const SizeSchema = z.object({\n width: z.number(),\n height: z.number(),\n dpr: z.number().optional(),\n});\n\nexport const RectSchema = PointSchema.and(SizeSchema).and(\n z.object({\n zoom: z.number().optional(),\n }),\n);\n\n// Zod schema for TMultimodalPrompt\nexport const TMultimodalPromptSchema = z.object({\n images: z\n .array(\n z.object({\n name: z.string(),\n url: z.string(),\n }),\n )\n .optional(),\n convertHttpImage2Base64: z.boolean().optional(),\n});\n\n// Zod schema for TUserPrompt\nexport const TUserPromptSchema = z.union([\n z.string(),\n z\n .object({\n prompt: z.string(),\n })\n .and(TMultimodalPromptSchema.partial()),\n]);\n\n// Generate TypeScript types from Zod schemas\nexport type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;\nexport type TUserPrompt = z.infer<typeof TUserPromptSchema>;\n\nconst locateFieldFlagName = 'midscene_location_field_flag';\n\n// Schema for locator field input (when users provide locate parameters)\nconst MidsceneLocationInput = z\n .object({\n prompt: TUserPromptSchema,\n deepThink: z.boolean().optional(),\n cacheable: z.boolean().optional(),\n xpath: z.union([z.string(), z.boolean()]).optional(),\n })\n .passthrough();\n\n// Schema for locator field result (when AI returns locate results)\nconst MidsceneLocationResult = z\n .object({\n [locateFieldFlagName]: z.literal(true),\n prompt: TUserPromptSchema,\n\n // optional fields\n deepThink: z.boolean().optional(), // only available in vl model\n cacheable: z.boolean().optional(),\n xpath: z.boolean().optional(), // preset result for xpath\n\n // these two fields will only appear in the result\n center: z.tuple([z.number(), z.number()]),\n rect: RectSchema,\n })\n .passthrough();\n\n// Export the result type - this is used for runtime results that include center and rect\nexport type MidsceneLocationResultType = z.infer<typeof MidsceneLocationResult>;\n\n// Export the input type - this is the inferred type from getMidsceneLocationSchema()\nexport type MidsceneLocationInputType = z.infer<typeof MidsceneLocationInput>;\n\n/**\n * Returns the schema for locator fields.\n * This now returns the input schema which is more permissive and suitable for validation.\n */\nexport const getMidsceneLocationSchema = () => {\n return MidsceneLocationInput;\n};\n\nexport const ifMidsceneLocatorField = (field: any): boolean => {\n // Handle optional fields by getting the inner type\n let actualField = field;\n if (actualField._def?.typeName === 'ZodOptional') {\n actualField = actualField._def.innerType;\n }\n\n // Check if this is a ZodObject\n if (actualField._def?.typeName === 'ZodObject') {\n const shape = actualField._def.shape();\n\n // Method 1: Check for the location field flag (for result schema)\n if (locateFieldFlagName in shape) {\n return true;\n }\n\n // Method 2: Check if it's the input schema by checking for 'prompt' field\n // Input schema has 'prompt' as a required field\n if ('prompt' in shape && shape.prompt) {\n return true;\n }\n }\n\n return false;\n};\n\nexport const dumpMidsceneLocatorField = (field: any): string => {\n assert(\n ifMidsceneLocatorField(field),\n 'field is not a midscene locator field',\n );\n\n // If field is a string, return it directly\n if (typeof field === 'string') {\n return field;\n }\n\n // If field is an object with prompt property\n if (field && typeof field === 'object' && field.prompt) {\n // If prompt is a string, return it directly\n if (typeof field.prompt === 'string') {\n return field.prompt;\n }\n // If prompt is a TUserPrompt object, extract the prompt string\n if (typeof field.prompt === 'object' && field.prompt.prompt) {\n return field.prompt.prompt; // TODO: dump images if necessary\n }\n }\n\n // Fallback: try to convert to string\n return String(field);\n};\n\nexport const findAllMidsceneLocatorField = (\n zodType?: z.ZodType<any>,\n requiredOnly?: boolean,\n): string[] => {\n if (!zodType) {\n return [];\n }\n\n // Check if this is a ZodObject by checking if it has a shape property\n const zodObject = zodType as any;\n if (zodObject._def?.typeName === 'ZodObject' && zodObject.shape) {\n const keys = Object.keys(zodObject.shape);\n return keys.filter((key) => {\n const field = zodObject.shape[key];\n if (!ifMidsceneLocatorField(field)) {\n return false;\n }\n\n // If requiredOnly is true, filter out optional fields\n if (requiredOnly) {\n return field._def?.typeName !== 'ZodOptional';\n }\n\n return true;\n });\n }\n\n // For other ZodType instances, we can't extract field names\n return [];\n};\n\nexport const dumpActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue) {\n // If it's already a string, keep it as is\n if (typeof fieldValue === 'string') {\n result[fieldName] = fieldValue;\n } else if (typeof fieldValue === 'object') {\n // Check if this field is actually a MidsceneLocationType object\n if (fieldValue.prompt) {\n // If prompt is a string, use it directly\n if (typeof fieldValue.prompt === 'string') {\n result[fieldName] = fieldValue.prompt;\n } else if (\n typeof fieldValue.prompt === 'object' &&\n fieldValue.prompt.prompt\n ) {\n // If prompt is a TUserPrompt object, extract the prompt string\n result[fieldName] = fieldValue.prompt.prompt;\n }\n }\n }\n }\n }\n\n return result;\n};\n\nexport const loadActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue && typeof fieldValue === 'string') {\n result[fieldName] = {\n [locateFieldFlagName]: true,\n prompt: fieldValue,\n };\n }\n }\n\n return result;\n};\n\n/**\n * Parse and validate action parameters using Zod schema.\n * All fields are validated through Zod, EXCEPT locator fields which are skipped.\n * Default values defined in the schema are automatically applied.\n *\n * Locator fields are special business logic fields with complex validation requirements,\n * so they are intentionally excluded from Zod parsing and use existing validation logic.\n */\nexport const parseActionParam = (\n rawParam: Record<string, any> | undefined,\n zodSchema?: z.ZodType<any>,\n): Record<string, any> | undefined => {\n // If no schema is provided, return undefined (action takes no parameters)\n if (!zodSchema) {\n return undefined;\n }\n\n // Handle undefined or null rawParam by providing an empty object\n const param = rawParam ?? {};\n\n // Find all locate fields in the schema\n const locateFields = findAllMidsceneLocatorField(zodSchema);\n\n // If there are no locate fields, just do normal validation\n if (locateFields.length === 0) {\n return zodSchema.parse(param);\n }\n\n // Extract locate field values to restore later\n const locateFieldValues: Record<string, any> = {};\n for (const fieldName of locateFields) {\n if (fieldName in param) {\n locateFieldValues[fieldName] = param[fieldName];\n }\n }\n\n // Build params for validation - skip locate fields and use dummy values\n const paramsForValidation: Record<string, any> = {};\n for (const key in param) {\n if (locateFields.includes(key)) {\n // Use dummy value to satisfy schema validation\n paramsForValidation[key] = { prompt: '_dummy_' };\n } else {\n paramsForValidation[key] = param[key];\n }\n }\n\n // Validate with dummy locate values\n const validated = zodSchema.parse(paramsForValidation);\n\n // Restore the actual locate field values (unvalidated, as per business requirement)\n for (const fieldName in locateFieldValues) {\n validated[fieldName] = locateFieldValues[fieldName];\n }\n\n return validated;\n};\n"],"names":["AIActionType","defaultBboxSize","debugInspectUtils","getDebug","fillBboxParam","locate","width","height","rightLimit","bottomLimit","vlMode","adaptBbox","adaptQwen2_5Bbox","bbox","msg","JSON","Error","result","Math","adaptDoubaoBbox","assert","splitted","Number","bboxList","Array","item","x","y","normalizeBboxInput","normalizedBbox","adaptGeminiBbox","normalized01000","left","top","right","bottom","adaptBboxToRect","offsetX","offsetY","rectLeft","rectTop","rectWidth","rectHeight","rect","mergeRects","rects","minLeft","r","minTop","maxRight","maxBottom","expandSearchArea","screenSize","minEdgeSize","defaultPadding","paddingSizeHorizontal","paddingSizeVertical","newWidth","newHeight","newLeft","newTop","markupImageForLLM","screenshotBase64","tree","size","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","actionSpace","sleep","flow","plan","verb","action","console","flowKey","flowParam","dumpActionParam","flowItem","PointSchema","z","SizeSchema","RectSchema","TMultimodalPromptSchema","TUserPromptSchema","locateFieldFlagName","MidsceneLocationInput","getMidsceneLocationSchema","ifMidsceneLocatorField","field","actualField","shape","dumpMidsceneLocatorField","String","findAllMidsceneLocatorField","zodType","requiredOnly","zodObject","keys","Object","key","jsonObject","zodSchema","locatorFields","fieldName","fieldValue","loadActionParam","parseActionParam","rawParam","param","locateFields","locateFieldValues","paramsForValidation","validated"],"mappings":";;;;;;AAuBO,IAAKA,sBAAYA,WAAAA,GAAAA,SAAZA,YAAY;;;;;;;WAAZA;;AASZ,MAAMC,kBAAkB;AACxB,MAAMC,oBAAoBC,SAAS;AAI5B,SAASC,cACdC,MAA2B,EAC3BC,KAAa,EACbC,MAAc,EACdC,UAAkB,EAClBC,WAAmB,EACnBC,MAAgC;IAGhC,IAAKL,OAAe,OAAO,IAAI,CAACA,QAAQ,MAAM;QAC5CA,OAAO,IAAI,GAAIA,OAAe,OAAO;QAErC,OAAQA,OAAe,OAAO;IAChC;IAEA,IAAIA,QAAQ,MACVA,OAAO,IAAI,GAAGM,UACZN,OAAO,IAAI,EACXC,OACAC,QACAC,YACAC,aACAC;IAIJ,OAAOL;AACT;AAEO,SAASO,iBACdC,IAAc;IAEd,IAAIA,KAAK,MAAM,GAAG,GAAG;QACnB,MAAMC,MAAM,CAAC,oCAAoC,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;QAC1E,MAAM,IAAIG,MAAMF;IAClB;IAEA,MAAMG,SAA2C;QAC/CC,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QACC,YAAnB,OAAOA,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGZ;QACN,YAAnB,OAAOY,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGZ;KAC1B;IACD,OAAOgB;AACT;AAEO,SAASE,gBACdN,IAAkC,EAClCP,KAAa,EACbC,MAAc;IAEda,OACEd,QAAQ,KAAKC,SAAS,GACtB;IAGF,IAAI,AAAgB,YAAhB,OAAOM,MAAmB;QAC5BO,OACE,+BAA+B,IAAI,CAACP,KAAK,IAAI,KAC7C,CAAC,iDAAiD,EAAEA,MAAM;QAE5D,MAAMQ,WAAWR,KAAK,KAAK,CAAC;QAC5B,IAAIQ,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACLH,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIf,QAAS;YAC3CY,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAId,SAAU;YAC5CW,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIf,QAAS;YAC3CY,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAId,SAAU;SAC7C;QAEH,MAAM,IAAIS,MAAM,CAAC,iDAAiD,EAAEH,MAAM;IAC5E;IAEA,IAAIU,WAAqB,EAAE;IAC3B,IAAIC,MAAM,OAAO,CAACX,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACY;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BF,SAAS,IAAI,CAACD,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAOF,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BF,SAAS,IAAI,CAACD,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OACEJ,SAAS,IAAI,CAACD,OAAOG;IAEzB;SAEAF,WAAWV;IAGb,IAAIU,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACLL,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS;QACnCY,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU;QACpCW,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS;QACnCY,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU;KACrC;IAIH,IACEgB,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QACLL,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS,QAAQL,kBAAkB;QAE/DiB,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU,QAAQN,kBAAkB;QAEhEiB,KAAK,GAAG,CACNZ,OACAY,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS,QAAQL,kBAAkB;QAE/DiB,KAAK,GAAG,CACNX,QACAW,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU,QAAQN,kBAAkB;KAEjE;IAGH,IAAIY,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACLK,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS;QACnCY,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU;QACpCW,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGjB,QAAS;QACnCY,KAAK,KAAK,CAAEK,QAAQ,CAAC,EAAE,GAAGhB,SAAU;KACrC;IAGH,MAAMO,MAAM,CAAC,0CAA0C,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;IAChF,MAAM,IAAIG,MAAMF;AAClB;AAEA,SAASc,mBACPf,IAAoB;IAEpB,IAAIW,MAAM,OAAO,CAACX,OAChB;QAAA,IAAIW,MAAM,OAAO,CAACX,IAAI,CAAC,EAAE,GACvB,OAAOA,IAAI,CAAC,EAAE;IAChB;IAGF,OAAOA;AACT;AAEO,SAASF,UACdE,IAAoB,EACpBP,KAAa,EACbC,MAAc,EACdC,UAAkB,EAClBC,WAAmB,EACnBC,MAAgC;IAEhC,MAAMmB,iBAAiBD,mBAAmBf;IAE1C,IAAII,SAA2C;QAAC;QAAG;QAAG;QAAG;KAAE;IAEzDA,SADEP,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,SACvBS,gBAAgBU,gBAAgBvB,OAAOC,UACvCG,AAAW,aAAXA,SACAoB,gBAAgBD,gBAA4BvB,OAAOC,UACnDG,AAAW,eAAXA,SACAqB,gBAAgBF,gBAA4BvB,OAAOC,UAEnDK,iBAAiBiB;IAG5BZ,MAAM,CAAC,EAAE,GAAGC,KAAK,GAAG,CAACD,MAAM,CAAC,EAAE,EAAET;IAChCS,MAAM,CAAC,EAAE,GAAGC,KAAK,GAAG,CAACD,MAAM,CAAC,EAAE,EAAER;IAEhC,OAAOQ;AACT;AAGO,SAASc,gBACdlB,IAAc,EACdP,KAAa,EACbC,MAAc;IAEd,OAAO;QACLW,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;QAC/BY,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;QAChCW,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;QAC/BY,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;KACjC;AACH;AAGO,SAASuB,gBACdjB,IAAc,EACdP,KAAa,EACbC,MAAc;IAEd,MAAMyB,OAAOd,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;IAC5C,MAAM2B,MAAMf,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;IAC5C,MAAM2B,QAAQhB,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;IAC7C,MAAM6B,SAASjB,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;IAC/C,OAAO;QAACyB;QAAMC;QAAKC;QAAOC;KAAO;AACnC;AAEO,SAASC,gBACdvB,IAAc,EACdP,KAAa,EACbC,MAAc,EACd8B,UAAU,CAAC,EACXC,UAAU,CAAC,EACX9B,aAAaF,KAAK,EAClBG,cAAcF,MAAM,EACpBG,MAAiC;IAEjCR,kBACE,mBACAW,MACAP,OACAC,QACA,UACA8B,SACAC,SACA,SACA9B,YACAC,aACA,UACAC;IAEF,MAAM,CAACsB,MAAMC,KAAKC,OAAOC,OAAO,GAAGxB,UACjCE,MACAP,OACAC,QACAC,YACAC,aACAC;IAIF,MAAM6B,WAAWP;IACjB,MAAMQ,UAAUP;IAChB,IAAIQ,YAAYP,QAAQF;IACxB,IAAIU,aAAaP,SAASF;IAI1B,IAAIM,WAAWE,YAAYnC,OACzBmC,YAAYnC,QAAQiC;IAItB,IAAIC,UAAUE,aAAanC,QACzBmC,aAAanC,SAASiC;IAIxBC,YAAYvB,KAAK,GAAG,CAAC,GAAGuB;IACxBC,aAAaxB,KAAK,GAAG,CAAC,GAAGwB;IAEzB,MAAMC,OAAO;QACX,MAAMJ,WAAWF;QACjB,KAAKG,UAAUF;QACf,OAAOG;QACP,QAAQC;IACV;IACAxC,kBAAkB,4BAA4ByC;IAE9C,OAAOA;AACT;AAEO,SAASC,WAAWC,KAAa;IACtC,MAAMC,UAAU5B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI;IACnD,MAAMC,SAAS9B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG;IACjD,MAAME,WAAW/B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI,GAAGA,EAAE,KAAK;IAC9D,MAAMG,YAAYhC,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG,GAAGA,EAAE,MAAM;IAC/D,OAAO;QACL,MAAMD;QACN,KAAKE;QACL,OAAOC,WAAWH;QAClB,QAAQI,YAAYF;IACtB;AACF;AAGO,SAASG,iBACdR,IAAU,EACVS,UAAgB,EAChB1C,MAAgC;IAEhC,MAAM2C,cAAc;IACpB,MAAMC,iBAAiB;IAGvB,MAAMC,wBACJZ,KAAK,KAAK,GAAGU,cACTnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAcV,KAAK,KAAI,IAAK,KACvCW;IACN,MAAME,sBACJb,KAAK,MAAM,GAAGU,cACVnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAcV,KAAK,MAAK,IAAK,KACxCW;IAGN,IAAIG,WAAWvC,KAAK,GAAG,CAACmC,aAAaV,KAAK,KAAK,GAAGY,AAAwB,IAAxBA;IAClD,IAAIG,YAAYxC,KAAK,GAAG,CAACmC,aAAaV,KAAK,MAAM,GAAGa,AAAsB,IAAtBA;IAGpD,IAAIG,UAAUhB,KAAK,IAAI,GAAGY;IAC1B,IAAIK,SAASjB,KAAK,GAAG,GAAGa;IAIxB,IAAIG,UAAUF,WAAWL,WAAW,KAAK,EACvCO,UAAUP,WAAW,KAAK,GAAGK;IAI/B,IAAIG,SAASF,YAAYN,WAAW,MAAM,EACxCQ,SAASR,WAAW,MAAM,GAAGM;IAI/BC,UAAUzC,KAAK,GAAG,CAAC,GAAGyC;IACtBC,SAAS1C,KAAK,GAAG,CAAC,GAAG0C;IAIrB,IAAID,UAAUF,WAAWL,WAAW,KAAK,EACvCK,WAAWL,WAAW,KAAK,GAAGO;IAEhC,IAAIC,SAASF,YAAYN,WAAW,MAAM,EACxCM,YAAYN,WAAW,MAAM,GAAGQ;IAGlCjB,KAAK,IAAI,GAAGgB;IACZhB,KAAK,GAAG,GAAGiB;IACXjB,KAAK,KAAK,GAAGc;IACbd,KAAK,MAAM,GAAGe;IAEd,OAAOf;AACT;AAEO,eAAekB,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCC,IAAU;IAEV,MAAMC,eAAeC,WAAWH;IAChC,MAAMI,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,SAAS,IAAI,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,wBAAwB;QACjD,gBAAgBT;QAChB,sBAAsBK;QACtBH;IACF;IACA,OAAOM;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,WAAgC,EAChCC,KAAc;IAEd,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQJ,MAAO;QACxB,MAAMK,OAAOD,KAAK,IAAI;QAEtB,MAAME,SAASL,YAAY,IAAI,CAAC,CAACK,SAAWA,OAAO,IAAI,KAAKD;QAC5D,IAAI,CAACC,QAAQ;YACXC,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAEF,KAAK,8BAA8B,CAAC;YAE/D;QACF;QAEA,MAAMG,UAAUF,OAAO,cAAc,IAAID;QACzC,MAAMI,YAAYH,OAAO,WAAW,GAChCI,gBAAgBN,KAAK,KAAK,IAAI,CAAC,GAAGE,OAAO,WAAW,IACpD,CAAC;QAEL,MAAMK,WAAiC;YACrC,CAACH,QAAQ,EAAE;YACX,GAAGC,SAAS;QACd;QAEAN,KAAK,IAAI,CAACQ;IACZ;IAEA,IAAIT,OACFC,KAAK,IAAI,CAAC;QACRD;IACF;IAGF,OAAOC;AACT;AAGO,MAAMS,cAAcC,EAAE,MAAM,CAAC;IAClC,MAAMA,EAAE,MAAM;IACd,KAAKA,EAAE,MAAM;AACf;AAEO,MAAMC,aAAaD,EAAE,MAAM,CAAC;IACjC,OAAOA,EAAE,MAAM;IACf,QAAQA,EAAE,MAAM;IAChB,KAAKA,EAAE,MAAM,GAAG,QAAQ;AAC1B;AAEO,MAAME,aAAaH,YAAY,GAAG,CAACE,YAAY,GAAG,CACvDD,EAAE,MAAM,CAAC;IACP,MAAMA,EAAE,MAAM,GAAG,QAAQ;AAC3B;AAIK,MAAMG,0BAA0BH,EAAE,MAAM,CAAC;IAC9C,QAAQA,EAAAA,KACA,CACJA,EAAE,MAAM,CAAC;QACP,MAAMA,EAAE,MAAM;QACd,KAAKA,EAAE,MAAM;IACf,IAED,QAAQ;IACX,yBAAyBA,EAAE,OAAO,GAAG,QAAQ;AAC/C;AAGO,MAAMI,oBAAoBJ,EAAE,KAAK,CAAC;IACvCA,EAAE,MAAM;IACRA,EAAAA,MACS,CAAC;QACN,QAAQA,EAAE,MAAM;IAClB,GACC,GAAG,CAACG,wBAAwB,OAAO;CACvC;AAMD,MAAME,sBAAsB;AAG5B,MAAMC,wBAAwBN,EAAAA,MACrB,CAAC;IACN,QAAQI;IACR,WAAWJ,EAAE,OAAO,GAAG,QAAQ;IAC/B,WAAWA,EAAE,OAAO,GAAG,QAAQ;IAC/B,OAAOA,EAAE,KAAK,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,OAAO;KAAG,EAAE,QAAQ;AACpD,GACC,WAAW;AAGiBA,EAAAA,MACtB,CAAC;IACN,CAACK,oBAAoB,EAAEL,EAAE,OAAO,CAAC;IACjC,QAAQI;IAGR,WAAWJ,EAAE,OAAO,GAAG,QAAQ;IAC/B,WAAWA,EAAE,OAAO,GAAG,QAAQ;IAC/B,OAAOA,EAAE,OAAO,GAAG,QAAQ;IAG3B,QAAQA,EAAE,KAAK,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG;IACxC,MAAME;AACR,GACC,WAAW;AAYP,MAAMK,4BAA4B,IAChCD;AAGF,MAAME,yBAAyB,CAACC;IAErC,IAAIC,cAAcD;IAClB,IAAIC,YAAY,IAAI,EAAE,aAAa,eACjCA,cAAcA,YAAY,IAAI,CAAC,SAAS;IAI1C,IAAIA,YAAY,IAAI,EAAE,aAAa,aAAa;QAC9C,MAAMC,QAAQD,YAAY,IAAI,CAAC,KAAK;QAGpC,IAAIL,uBAAuBM,OACzB,OAAO;QAKT,IAAI,YAAYA,SAASA,MAAM,MAAM,EACnC,OAAO;IAEX;IAEA,OAAO;AACT;AAEO,MAAMC,2BAA2B,CAACH;IACvC3E,OACE0E,uBAAuBC,QACvB;IAIF,IAAI,AAAiB,YAAjB,OAAOA,OACT,OAAOA;IAIT,IAAIA,SAAS,AAAiB,YAAjB,OAAOA,SAAsBA,MAAM,MAAM,EAAE;QAEtD,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,EACrB,OAAOA,MAAM,MAAM;QAGrB,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,IAAiBA,MAAM,MAAM,CAAC,MAAM,EACzD,OAAOA,MAAM,MAAM,CAAC,MAAM;IAE9B;IAGA,OAAOI,OAAOJ;AAChB;AAEO,MAAMK,8BAA8B,CACzCC,SACAC;IAEA,IAAI,CAACD,SACH,OAAO,EAAE;IAIX,MAAME,YAAYF;IAClB,IAAIE,UAAU,IAAI,EAAE,aAAa,eAAeA,UAAU,KAAK,EAAE;QAC/D,MAAMC,OAAOC,OAAO,IAAI,CAACF,UAAU,KAAK;QACxC,OAAOC,KAAK,MAAM,CAAC,CAACE;YAClB,MAAMX,QAAQQ,UAAU,KAAK,CAACG,IAAI;YAClC,IAAI,CAACZ,uBAAuBC,QAC1B,OAAO;YAIT,IAAIO,cACF,OAAOP,MAAM,IAAI,EAAE,aAAa;YAGlC,OAAO;QACT;IACF;IAGA,OAAO,EAAE;AACX;AAEO,MAAMZ,kBAAkB,CAC7BwB,YACAC;IAEA,MAAMC,gBAAgBT,4BAA4BQ;IAClD,MAAM3F,SAAS;QAAE,GAAG0F,UAAU;IAAC;IAE/B,KAAK,MAAMG,aAAaD,cAAe;QACrC,MAAME,aAAa9F,MAAM,CAAC6F,UAAU;QACpC,IAAIC,YAEF;YAAA,IAAI,AAAsB,YAAtB,OAAOA,YACT9F,MAAM,CAAC6F,UAAU,GAAGC;iBACf,IAAI,AAAsB,YAAtB,OAAOA,YAEhB;gBAAA,IAAIA,WAAW,MAAM,EAEnB;oBAAA,IAAI,AAA6B,YAA7B,OAAOA,WAAW,MAAM,EAC1B9F,MAAM,CAAC6F,UAAU,GAAGC,WAAW,MAAM;yBAChC,IACL,AAA6B,YAA7B,OAAOA,WAAW,MAAM,IACxBA,WAAW,MAAM,CAAC,MAAM,EAGxB9F,MAAM,CAAC6F,UAAU,GAAGC,WAAW,MAAM,CAAC,MAAM;gBAC9C;YACF;QACF;IAEJ;IAEA,OAAO9F;AACT;AAEO,MAAM+F,kBAAkB,CAC7BL,YACAC;IAEA,MAAMC,gBAAgBT,4BAA4BQ;IAClD,MAAM3F,SAAS;QAAE,GAAG0F,UAAU;IAAC;IAE/B,KAAK,MAAMG,aAAaD,cAAe;QACrC,MAAME,aAAa9F,MAAM,CAAC6F,UAAU;QACpC,IAAIC,cAAc,AAAsB,YAAtB,OAAOA,YACvB9F,MAAM,CAAC6F,UAAU,GAAG;YAClB,CAACnB,oBAAoB,EAAE;YACvB,QAAQoB;QACV;IAEJ;IAEA,OAAO9F;AACT;AAUO,MAAMgG,mBAAmB,CAC9BC,UACAN;IAGA,IAAI,CAACA,WACH;IAIF,MAAMO,QAAQD,YAAY,CAAC;IAG3B,MAAME,eAAehB,4BAA4BQ;IAGjD,IAAIQ,AAAwB,MAAxBA,aAAa,MAAM,EACrB,OAAOR,UAAU,KAAK,CAACO;IAIzB,MAAME,oBAAyC,CAAC;IAChD,KAAK,MAAMP,aAAaM,aACtB,IAAIN,aAAaK,OACfE,iBAAiB,CAACP,UAAU,GAAGK,KAAK,CAACL,UAAU;IAKnD,MAAMQ,sBAA2C,CAAC;IAClD,IAAK,MAAMZ,OAAOS,MAChB,IAAIC,aAAa,QAAQ,CAACV,MAExBY,mBAAmB,CAACZ,IAAI,GAAG;QAAE,QAAQ;IAAU;SAE/CY,mBAAmB,CAACZ,IAAI,GAAGS,KAAK,CAACT,IAAI;IAKzC,MAAMa,YAAYX,UAAU,KAAK,CAACU;IAGlC,IAAK,MAAMR,aAAaO,kBACtBE,SAAS,CAACT,UAAU,GAAGO,iBAAiB,CAACP,UAAU;IAGrD,OAAOS;AACT"}
|
|
File without changes
|