@midscene/core 1.2.1-beta-20260113073450.0 → 1.2.1-beta-20260114072539.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +19 -15
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +33 -17
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +1 -1
- package/dist/es/ai-model/auto-glm/actions.mjs +217 -0
- package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/index.mjs +5 -0
- package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
- package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/planning.mjs +63 -0
- package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
- package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/util.mjs +22 -0
- package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
- package/dist/es/ai-model/index.mjs +2 -1
- package/dist/es/ai-model/inspect.mjs +68 -3
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/latest-locate-recorder.mjs +29 -0
- package/dist/es/ai-model/latest-locate-recorder.mjs.map +1 -0
- package/dist/es/ai-model/llm-planning.mjs +3 -12
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +8 -40
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +5 -0
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs +42 -30
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/common.mjs +7 -15
- package/dist/es/common.mjs.map +1 -1
- package/dist/es/device/index.mjs +3 -28
- package/dist/es/device/index.mjs.map +1 -1
- package/dist/es/service/index.mjs +5 -0
- package/dist/es/service/index.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/agent/agent.js +18 -14
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/tasks.js +32 -16
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/utils.js +1 -1
- package/dist/lib/ai-model/auto-glm/actions.js +251 -0
- package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/index.js +59 -0
- package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/parser.js +282 -0
- package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/planning.js +97 -0
- package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
- package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/util.js +62 -0
- package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
- package/dist/lib/ai-model/index.js +15 -11
- package/dist/lib/ai-model/inspect.js +67 -2
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/latest-locate-recorder.js +63 -0
- package/dist/lib/ai-model/latest-locate-recorder.js.map +1 -0
- package/dist/lib/ai-model/llm-planning.js +2 -11
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +8 -40
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +5 -0
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js +42 -30
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/common.js +6 -20
- package/dist/lib/common.js.map +1 -1
- package/dist/lib/device/index.js +15 -52
- package/dist/lib/device/index.js.map +1 -1
- package/dist/lib/service/index.js +5 -0
- package/dist/lib/service/index.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/agent/agent.d.ts +15 -4
- package/dist/types/agent/tasks.d.ts +4 -4
- package/dist/types/ai-model/auto-glm/actions.d.ts +77 -0
- package/dist/types/ai-model/auto-glm/index.d.ts +5 -0
- package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
- package/dist/types/ai-model/auto-glm/planning.d.ts +9 -0
- package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
- package/dist/types/ai-model/auto-glm/util.d.ts +16 -0
- package/dist/types/ai-model/index.d.ts +1 -0
- package/dist/types/ai-model/latest-locate-recorder.d.ts +14 -0
- package/dist/types/common.d.ts +1 -8
- package/dist/types/device/index.d.ts +0 -22
- package/dist/types/types.d.ts +2 -2
- package/package.json +2 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/service-caller/index.js","sources":["webpack/runtime/compat_get_default_export","webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/service-caller/index.ts"],"sourcesContent":["// getDefaultExport function for compatibility with non-ESM modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};\n","__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import {\n AIResponseFormat,\n type AIUsageInfo,\n type DeepThinkOption,\n} from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_LANGFUSE_DEBUG,\n MIDSCENE_LANGSMITH_DEBUG,\n MIDSCENE_MODEL_MAX_TOKENS,\n OPENAI_MAX_TOKENS,\n type TVlModeTypes,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert, ifInBrowser } from '@midscene/shared/utils';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport type { AIArgs } from '../../common';\n\nasync function createChatClient({\n modelConfig,\n}: {\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n modelName: string;\n modelDescription: string;\n uiTarsVersion?: UITarsModelVersion;\n vlMode: TVlModeTypes | undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n modelDescription,\n uiTarsModelVersion: uiTarsVersion,\n vlMode,\n createOpenAIClient,\n timeout,\n } = modelConfig;\n\n let proxyAgent: any = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n\n // Helper function to sanitize proxy URL for logging (remove credentials)\n // Uses URL API instead of regex to avoid ReDoS vulnerabilities\n const sanitizeProxyUrl = (url: string): string => {\n try {\n const parsed = new URL(url);\n if (parsed.username) {\n // Keep username for debugging, hide password for security\n parsed.password = '****';\n return parsed.href;\n }\n return url;\n } catch {\n // If URL parsing fails, return original URL (will be caught later)\n return url;\n }\n };\n\n if (httpProxy) {\n debugProxy('using http proxy', sanitizeProxyUrl(httpProxy));\n if (ifInBrowser) {\n console.warn(\n 'HTTP proxy is configured but not supported in browser environment',\n );\n } else {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'undici';\n const { ProxyAgent } = await import(moduleName);\n proxyAgent = new ProxyAgent({\n uri: httpProxy,\n // Note: authentication is handled via the URI (e.g., http://user:pass@proxy.com:8080)\n });\n }\n } else if (socksProxy) {\n debugProxy('using socks proxy', sanitizeProxyUrl(socksProxy));\n if (ifInBrowser) {\n console.warn(\n 'SOCKS proxy is configured but not supported in browser environment',\n );\n } else {\n try {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'fetch-socks';\n const { socksDispatcher } = await import(moduleName);\n // Parse SOCKS proxy URL (e.g., socks5://127.0.0.1:1080)\n const proxyUrl = new URL(socksProxy);\n\n // Validate hostname\n if (!proxyUrl.hostname) {\n throw new Error('SOCKS proxy URL must include a valid hostname');\n }\n\n // Validate and parse port\n const port = Number.parseInt(proxyUrl.port, 10);\n if (!proxyUrl.port || Number.isNaN(port)) {\n throw new Error('SOCKS proxy URL must include a valid port');\n }\n\n // Parse SOCKS version from protocol\n const protocol = proxyUrl.protocol.replace(':', '');\n const socksType =\n protocol === 'socks4' ? 4 : protocol === 'socks5' ? 5 : 5;\n\n proxyAgent = socksDispatcher({\n type: socksType,\n host: proxyUrl.hostname,\n port,\n ...(proxyUrl.username\n ? {\n userId: decodeURIComponent(proxyUrl.username),\n password: decodeURIComponent(proxyUrl.password || ''),\n }\n : {}),\n });\n debugProxy('socks proxy configured successfully', {\n type: socksType,\n host: proxyUrl.hostname,\n port: port,\n });\n } catch (error) {\n console.error('Failed to configure SOCKS proxy:', error);\n throw new Error(\n `Invalid SOCKS proxy URL: ${socksProxy}. Expected format: socks4://host:port, socks5://host:port, or with authentication: socks5://user:pass@host:port`,\n );\n }\n }\n }\n\n const openAIOptions = {\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n // Use fetchOptions.dispatcher for fetch-based SDK instead of httpAgent\n // Note: Type assertion needed due to undici version mismatch between dependencies\n ...(proxyAgent ? { fetchOptions: { dispatcher: proxyAgent as any } } : {}),\n ...openaiExtraConfig,\n ...(typeof timeout === 'number' ? { timeout } : {}),\n dangerouslyAllowBrowser: true,\n };\n\n const baseOpenAI = new OpenAI(openAIOptions);\n\n let openai: OpenAI = baseOpenAI;\n\n // LangSmith wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langsmithModule = 'langsmith/wrappers';\n const { wrapOpenAI } = await import(langsmithModule);\n openai = wrapOpenAI(openai);\n }\n\n // Langfuse wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGFUSE_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langfuse is not supported in browser');\n }\n console.log('DEBUGGING MODE: langfuse wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langfuseModule = 'langfuse';\n const { observeOpenAI } = await import(langfuseModule);\n openai = observeOpenAI(openai);\n }\n\n if (createOpenAIClient) {\n const wrappedClient = await createOpenAIClient(baseOpenAI, openAIOptions);\n\n if (wrappedClient) {\n openai = wrappedClient as OpenAI;\n }\n }\n\n return {\n completion: openai.chat.completions,\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n };\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: string;\n reasoning_content?: string;\n usage?: AIUsageInfo;\n isStreamed: boolean;\n}> {\n const { completion, modelName, modelDescription, uiTarsVersion, vlMode } =\n await createChatClient({\n modelConfig,\n });\n\n const maxTokens =\n globalConfigManager.getEnvConfigValueAsNumber(MIDSCENE_MODEL_MAX_TOKENS) ??\n globalConfigManager.getEnvConfigValueAsNumber(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n const temperature = modelConfig.temperature ?? 0;\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let accumulatedReasoning = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n\n const buildUsageInfo = (usageData?: OpenAI.CompletionUsage) => {\n if (!usageData) return undefined;\n\n const cachedInputTokens = (\n usageData as { prompt_tokens_details?: { cached_tokens?: number } }\n )?.prompt_tokens_details?.cached_tokens;\n\n return {\n prompt_tokens: usageData.prompt_tokens ?? 0,\n completion_tokens: usageData.completion_tokens ?? 0,\n total_tokens: usageData.total_tokens ?? 0,\n cached_input: cachedInputTokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n } satisfies AIUsageInfo;\n };\n\n const commonConfig = {\n temperature,\n stream: !!isStreaming,\n max_tokens: maxTokens,\n ...(vlMode === 'qwen2.5-vl' // qwen vl v2 specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n const {\n config: deepThinkConfig,\n debugMessage,\n warningMessage,\n } = resolveDeepThinkConfig({\n deepThink: options?.deepThink,\n vlMode,\n });\n if (debugMessage) {\n debugCall(debugMessage);\n }\n if (warningMessage) {\n debugCall(warningMessage);\n console.warn(warningMessage);\n }\n\n try {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model: modelName,\n messages,\n ...commonConfig,\n ...deepThinkConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n accumulatedReasoning += reasoning_content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: buildUsageInfo(usage),\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}, temperature, ${temperature ?? ''}`,\n );\n } else {\n const result = await completion.create({\n model: modelName,\n messages,\n ...commonConfig,\n ...deepThinkConfig,\n } as any);\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}, temperature, ${temperature ?? ''}`,\n );\n\n debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);\n\n assert(\n result.choices,\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n content = result.choices[0].message.content!;\n accumulatedReasoning =\n (result.choices[0].message as any)?.reasoning_content || '';\n usage = result.usage;\n }\n\n debugCall(`response reasoning content: ${accumulatedReasoning}`);\n debugCall(`response content: ${content}`);\n assert(content, 'empty content');\n\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n } as OpenAI.CompletionUsage;\n }\n\n return {\n content: content || '',\n reasoning_content: accumulatedReasoning || undefined,\n usage: buildUsageInfo(usage),\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service (${modelName}): ${e.message}\\nTrouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport async function callAIWithObjectResponse<T>(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: T;\n contentString: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const response = await callAI(messages, modelConfig, {\n deepThink: options?.deepThink,\n });\n assert(response, 'empty response');\n const vlMode = modelConfig.vlMode;\n const jsonContent = safeParseJson(response.content, vlMode);\n assert(\n typeof jsonContent === 'object',\n `failed to parse json response from model (${modelConfig.modelName}): ${response.content}`,\n );\n return {\n content: jsonContent,\n contentString: response.content,\n usage: response.usage,\n reasoning_content: response.reasoning_content,\n };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n modelConfig: IModelConfig,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, modelConfig);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nexport function resolveDeepThinkConfig({\n deepThink,\n vlMode,\n}: {\n deepThink?: DeepThinkOption;\n vlMode?: TVlModeTypes;\n}): {\n config: Record<string, unknown>;\n debugMessage?: string;\n warningMessage?: string;\n} {\n const normalizedDeepThink = deepThink === 'unset' ? undefined : deepThink;\n\n if (normalizedDeepThink === undefined) {\n return { config: {}, debugMessage: undefined };\n }\n\n if (vlMode === 'qwen3-vl') {\n return {\n config: { enable_thinking: normalizedDeepThink },\n debugMessage: `deepThink mapped to enable_thinking=${normalizedDeepThink} for qwen3-vl`,\n };\n }\n\n if (vlMode === 'doubao-vision') {\n return {\n config: {\n thinking: { type: normalizedDeepThink ? 'enabled' : 'disabled' },\n },\n debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for doubao-vision`,\n };\n }\n\n return {\n config: {},\n debugMessage: `deepThink ignored: unsupported model_family \"${vlMode ?? 'default'}\"`,\n warningMessage: `The \"deepThink\" option is not supported for model_family \"${vlMode ?? 'default'}\".`,\n };\n}\n\n/**\n * Normalize a parsed JSON object by trimming whitespace from:\n * 1. All object keys (e.g., \" prompt \" -> \"prompt\")\n * 2. All string values (e.g., \" Tap \" -> \"Tap\")\n * This handles LLM output that may include leading/trailing spaces.\n */\nfunction normalizeJsonObject(obj: any): any {\n // Handle null and undefined\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n // Handle arrays - recursively normalize each element\n if (Array.isArray(obj)) {\n return obj.map((item) => normalizeJsonObject(item));\n }\n\n // Handle objects\n if (typeof obj === 'object') {\n const normalized: any = {};\n\n for (const [key, value] of Object.entries(obj)) {\n // Trim the key to remove leading/trailing spaces\n const trimmedKey = key.trim();\n\n // Recursively normalize the value\n let normalizedValue = normalizeJsonObject(value);\n\n // Trim all string values\n if (typeof normalizedValue === 'string') {\n normalizedValue = normalizedValue.trim();\n }\n\n normalized[trimmedKey] = normalizedValue;\n }\n\n return normalized;\n }\n\n // Handle primitive strings\n if (typeof obj === 'string') {\n return obj.trim();\n }\n\n // Return other primitives as-is\n return obj;\n}\n\nexport function safeParseJson(input: string, vlMode: TVlModeTypes | undefined) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n\n let parsed: any;\n let lastError: unknown;\n try {\n parsed = JSON.parse(cleanJsonString);\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n try {\n parsed = JSON.parse(jsonrepair(cleanJsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n try {\n parsed = JSON.parse(jsonrepair(jsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n }\n throw Error(\n `failed to parse LLM response into JSON. Error - ${String(\n lastError ?? 'unknown error',\n )}. Response - \\n ${input}`,\n );\n}\n"],"names":["__webpack_require__","module","getter","definition","key","Object","obj","prop","Symbol","createChatClient","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","modelDescription","uiTarsVersion","vlMode","createOpenAIClient","timeout","proxyAgent","debugProxy","getDebug","sanitizeProxyUrl","url","parsed","URL","ifInBrowser","console","moduleName","ProxyAgent","socksDispatcher","proxyUrl","Error","port","Number","protocol","socksType","decodeURIComponent","error","openAIOptions","baseOpenAI","OpenAI","openai","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","langsmithModule","wrapOpenAI","MIDSCENE_LANGFUSE_DEBUG","langfuseModule","observeOpenAI","wrappedClient","callAI","messages","options","completion","maxTokens","MIDSCENE_MODEL_MAX_TOKENS","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","temperature","isStreaming","content","accumulated","accumulatedReasoning","usage","timeCost","buildUsageInfo","usageData","cachedInputTokens","commonConfig","deepThinkConfig","debugMessage","warningMessage","resolveDeepThinkConfig","stream","chunk","reasoning_content","chunkData","undefined","estimatedTokens","Math","finalChunk","result","JSON","assert","e","newError","callAIWithObjectResponse","response","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","deepThink","normalizedDeepThink","normalizeJsonObject","Array","item","normalized","value","trimmedKey","normalizedValue","cleanJsonString","lastError","jsonrepair","jsonString","String"],"mappings":";;;IACAA,oBAAoB,CAAC,GAAG,CAACC;QACxB,IAAIC,SAASD,UAAUA,OAAO,UAAU,GACvC,IAAOA,MAAM,CAAC,UAAU,GACxB,IAAOA;QACRD,oBAAoB,CAAC,CAACE,QAAQ;YAAE,GAAGA;QAAO;QAC1C,OAAOA;IACR;;;ICPAF,oBAAoB,CAAC,GAAG,CAAC,UAASG;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGH,oBAAoB,CAAC,CAACG,YAAYC,QAAQ,CAACJ,oBAAoB,CAAC,CAAC,UAASI,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAJ,oBAAoB,CAAC,GAAG,CAACM,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFP,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOQ,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;ACmBA,eAAeI,iBAAiB,EAC9BC,WAAW,EAGZ;IAOC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,gBAAgB,EAChB,oBAAoBC,aAAa,EACjCC,MAAM,EACNC,kBAAkB,EAClBC,OAAO,EACR,GAAGX;IAEJ,IAAIY;IACJ,MAAMC,aAAaC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IAI5B,MAAMC,mBAAmB,CAACC;QACxB,IAAI;YACF,MAAMC,SAAS,IAAIC,IAAIF;YACvB,IAAIC,OAAO,QAAQ,EAAE;gBAEnBA,OAAO,QAAQ,GAAG;gBAClB,OAAOA,OAAO,IAAI;YACpB;YACA,OAAOD;QACT,EAAE,OAAM;YAEN,OAAOA;QACT;IACF;IAEA,IAAId,WAAW;QACbW,WAAW,oBAAoBE,iBAAiBb;QAChD,IAAIiB,sBAAAA,WAAWA,EACbC,QAAQ,IAAI,CACV;aAEG;YAEL,MAAMC,aAAa;YACnB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;YACpCT,aAAa,IAAIU,WAAW;gBAC1B,KAAKpB;YAEP;QACF;IACF,OAAO,IAAID,YAAY;QACrBY,WAAW,qBAAqBE,iBAAiBd;QACjD,IAAIkB,sBAAAA,WAAWA,EACbC,QAAQ,IAAI,CACV;aAGF,IAAI;YAEF,MAAMC,aAAa;YACnB,MAAM,EAAEE,eAAe,EAAE,GAAG,MAAM,MAAM,CAACF;YAEzC,MAAMG,WAAW,IAAIN,IAAIjB;YAGzB,IAAI,CAACuB,SAAS,QAAQ,EACpB,MAAM,IAAIC,MAAM;YAIlB,MAAMC,OAAOC,OAAO,QAAQ,CAACH,SAAS,IAAI,EAAE;YAC5C,IAAI,CAACA,SAAS,IAAI,IAAIG,OAAO,KAAK,CAACD,OACjC,MAAM,IAAID,MAAM;YAIlB,MAAMG,WAAWJ,SAAS,QAAQ,CAAC,OAAO,CAAC,KAAK;YAChD,MAAMK,YACJD,AAAa,aAAbA,WAAwB,IAAIA,AAAa,aAAbA,WAAwB,IAAI;YAE1DhB,aAAaW,gBAAgB;gBAC3B,MAAMM;gBACN,MAAML,SAAS,QAAQ;gBACvBE;gBACA,GAAIF,SAAS,QAAQ,GACjB;oBACE,QAAQM,mBAAmBN,SAAS,QAAQ;oBAC5C,UAAUM,mBAAmBN,SAAS,QAAQ,IAAI;gBACpD,IACA,CAAC,CAAC;YACR;YACAX,WAAW,uCAAuC;gBAChD,MAAMgB;gBACN,MAAML,SAAS,QAAQ;gBACvB,MAAME;YACR;QACF,EAAE,OAAOK,OAAO;YACdX,QAAQ,KAAK,CAAC,oCAAoCW;YAClD,MAAM,IAAIN,MACR,CAAC,yBAAyB,EAAExB,WAAW,+GAA+G,CAAC;QAE3J;IAEJ;IAEA,MAAM+B,gBAAgB;QACpB,SAAS5B;QACT,QAAQC;QAGR,GAAIO,aAAa;YAAE,cAAc;gBAAE,YAAYA;YAAkB;QAAE,IAAI,CAAC,CAAC;QACzE,GAAGN,iBAAiB;QACpB,GAAI,AAAmB,YAAnB,OAAOK,UAAuB;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAClD,yBAAyB;IAC3B;IAEA,MAAMsB,aAAa,IAAIC,CAAAA,yBAAAA,EAAOF;IAE9B,IAAIG,SAAiBF;IAGrB,IACEE,UACAC,oBAAAA,mBAAAA,CAAAA,qBAAyC,CAACC,oBAAAA,wBAAwBA,GAClE;QACA,IAAIlB,sBAAAA,WAAWA,EACb,MAAM,IAAIM,MAAM;QAElBL,QAAQ,GAAG,CAAC;QAEZ,MAAMkB,kBAAkB;QACxB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;QACpCH,SAASI,WAAWJ;IACtB;IAGA,IACEA,UACAC,oBAAAA,mBAAAA,CAAAA,qBAAyC,CAACI,oBAAAA,uBAAuBA,GACjE;QACA,IAAIrB,sBAAAA,WAAWA,EACb,MAAM,IAAIM,MAAM;QAElBL,QAAQ,GAAG,CAAC;QAEZ,MAAMqB,iBAAiB;QACvB,MAAM,EAAEC,aAAa,EAAE,GAAG,MAAM,MAAM,CAACD;QACvCN,SAASO,cAAcP;IACzB;IAEA,IAAIzB,oBAAoB;QACtB,MAAMiC,gBAAgB,MAAMjC,mBAAmBuB,YAAYD;QAE3D,IAAIW,eACFR,SAASQ;IAEb;IAEA,OAAO;QACL,YAAYR,OAAO,IAAI,CAAC,WAAW;QACnChC;QACAI;QACAC;QACAC;IACF;AACF;AAEO,eAAemC,OACpBC,QAAsC,EACtC7C,WAAyB,EACzB8C,OAIC;IAOD,MAAM,EAAEC,UAAU,EAAE5C,SAAS,EAAEI,gBAAgB,EAAEC,aAAa,EAAEC,MAAM,EAAE,GACtE,MAAMV,iBAAiB;QACrBC;IACF;IAEF,MAAMgD,YACJZ,oBAAAA,mBAAAA,CAAAA,yBAA6C,CAACa,oBAAAA,yBAAyBA,KACvEb,oBAAAA,mBAAAA,CAAAA,yBAA6C,CAACc,oBAAAA,iBAAiBA;IACjE,MAAMC,YAAYrC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IAC3B,MAAMsC,oBAAoBtC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IACnC,MAAMuC,qBAAqBvC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IAEpC,MAAMwC,YAAYC,KAAK,GAAG;IAC1B,MAAMC,cAAcxD,YAAY,WAAW,IAAI;IAE/C,MAAMyD,cAAcX,SAAS,UAAUA,SAAS;IAChD,IAAIY;IACJ,IAAIC,cAAc;IAClB,IAAIC,uBAAuB;IAC3B,IAAIC;IACJ,IAAIC;IAEJ,MAAMC,iBAAiB,CAACC;QACtB,IAAI,CAACA,WAAW;QAEhB,MAAMC,oBACJD,WACC,uBAAuB;QAE1B,OAAO;YACL,eAAeA,UAAU,aAAa,IAAI;YAC1C,mBAAmBA,UAAU,iBAAiB,IAAI;YAClD,cAAcA,UAAU,YAAY,IAAI;YACxC,cAAcC,qBAAqB;YACnC,WAAWH,YAAY;YACvB,YAAY3D;YACZ,mBAAmBI;YACnB,QAAQP,YAAY,MAAM;QAC5B;IACF;IAEA,MAAMkE,eAAe;QACnBV;QACA,QAAQ,CAAC,CAACC;QACV,YAAYT;QACZ,GAAIvC,AAAW,iBAAXA,SACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IACA,MAAM,EACJ,QAAQ0D,eAAe,EACvBC,YAAY,EACZC,cAAc,EACf,GAAGC,uBAAuB;QACzB,WAAWxB,SAAS;QACpBrC;IACF;IACA,IAAI2D,cACFjB,UAAUiB;IAEZ,IAAIC,gBAAgB;QAClBlB,UAAUkB;QACVjD,QAAQ,IAAI,CAACiD;IACf;IAEA,IAAI;QACFlB,UACE,CAAC,QAAQ,EAAEM,cAAc,eAAe,GAAG,WAAW,EAAEtD,WAAW;QAGrE,IAAIsD,aAAa;YACf,MAAMc,SAAU,MAAMxB,WAAW,MAAM,CACrC;gBACE,OAAO5C;gBACP0C;gBACA,GAAGqB,YAAY;gBACf,GAAGC,eAAe;YACpB,GACA;gBACE,QAAQ;YACV;YAKF,WAAW,MAAMK,SAASD,OAAQ;gBAChC,MAAMb,UAAUc,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAO,WAAW;gBACtD,MAAMC,oBACHD,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAe,qBAAqB;gBAG3D,IAAIA,MAAM,KAAK,EACbX,QAAQW,MAAM,KAAK;gBAGrB,IAAId,WAAWe,mBAAmB;oBAChCd,eAAeD;oBACfE,wBAAwBa;oBACxB,MAAMC,YAAiC;wBACrChB;wBACAe;wBACAd;wBACA,YAAY;wBACZ,OAAOgB;oBACT;oBACA7B,QAAQ,OAAO,CAAE4B;gBACnB;gBAGA,IAAIF,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,eAAe;oBACrCV,WAAWP,KAAK,GAAG,KAAKD;oBAGxB,IAAI,CAACO,OAAO;wBAEV,MAAMe,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAClB,YAAY,MAAM,GAAG;wBAElCE,QAAQ;4BACN,eAAee;4BACf,mBAAmBA;4BACnB,cAAcA,AAAkB,IAAlBA;wBAChB;oBACF;oBAGA,MAAME,aAAkC;wBACtC,SAAS;wBACTnB;wBACA,mBAAmB;wBACnB,YAAY;wBACZ,OAAOI,eAAeF;oBACxB;oBACAf,QAAQ,OAAO,CAAEgC;oBACjB;gBACF;YACF;YACApB,UAAUC;YACVP,kBACE,CAAC,iBAAiB,EAAEjD,UAAU,QAAQ,EAAEM,UAAU,UAAU,WAAW,EAAEqD,SAAS,eAAe,EAAEN,eAAe,IAAI;QAE1H,OAAO;YACL,MAAMuB,SAAS,MAAMhC,WAAW,MAAM,CAAC;gBACrC,OAAO5C;gBACP0C;gBACA,GAAGqB,YAAY;gBACf,GAAGC,eAAe;YACpB;YACAL,WAAWP,KAAK,GAAG,KAAKD;YAExBF,kBACE,CAAC,OAAO,EAAEjD,UAAU,QAAQ,EAAEM,UAAU,UAAU,mBAAmB,EAAED,cAAc,iBAAiB,EAAEuE,OAAO,KAAK,EAAE,iBAAiB,GAAG,qBAAqB,EAAEA,OAAO,KAAK,EAAE,qBAAqB,GAAG,gBAAgB,EAAEA,OAAO,KAAK,EAAE,gBAAgB,GAAG,WAAW,EAAEjB,SAAS,aAAa,EAAEiB,OAAO,WAAW,IAAI,GAAG,eAAe,EAAEvB,eAAe,IAAI;YAG9VH,mBAAmB,CAAC,oBAAoB,EAAE2B,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;YAExEE,IAAAA,sBAAAA,MAAAA,AAAAA,EACEF,OAAO,OAAO,EACd,CAAC,mCAAmC,EAAEC,KAAK,SAAS,CAACD,SAAS;YAEhErB,UAAUqB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;YAC3CnB,uBACGmB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,EAAU,qBAAqB;YAC3DlB,QAAQkB,OAAO,KAAK;QACtB;QAEA5B,UAAU,CAAC,4BAA4B,EAAES,sBAAsB;QAC/DT,UAAU,CAAC,kBAAkB,EAAEO,SAAS;QACxCuB,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOvB,SAAS;QAGhB,IAAID,eAAe,CAACI,OAAO;YAEzB,MAAMe,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEnB,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtCG,QAAQ;gBACN,eAAee;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASlB,WAAW;YACpB,mBAAmBE,wBAAwBe;YAC3C,OAAOZ,eAAeF;YACtB,YAAY,CAAC,CAACJ;QAChB;IACF,EAAE,OAAOyB,GAAQ;QACf9D,QAAQ,KAAK,CAAC,kBAAkB8D;QAChC,MAAMC,WAAW,IAAI1D,MACnB,CAAC,eAAe,EAAEgC,cAAc,eAAe,GAAG,kBAAkB,EAAEtD,UAAU,GAAG,EAAE+E,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC9J;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,eAAeC,yBACpBvC,QAAsC,EACtC7C,WAAyB,EACzB8C,OAEC;IAOD,MAAMuC,WAAW,MAAMzC,OAAOC,UAAU7C,aAAa;QACnD,WAAW8C,SAAS;IACtB;IACAmC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOI,UAAU;IACjB,MAAM5E,SAAST,YAAY,MAAM;IACjC,MAAMsF,cAAcC,cAAcF,SAAS,OAAO,EAAE5E;IACpDwE,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,AAAuB,YAAvB,OAAOK,aACP,CAAC,0CAA0C,EAAEtF,YAAY,SAAS,CAAC,GAAG,EAAEqF,SAAS,OAAO,EAAE;IAE5F,OAAO;QACL,SAASC;QACT,eAAeD,SAAS,OAAO;QAC/B,OAAOA,SAAS,KAAK;QACrB,mBAAmBA,SAAS,iBAAiB;IAC/C;AACF;AAEO,eAAeG,yBACpBC,IAAY,EACZzF,WAAyB;IAEzB,MAAM,EAAE0D,OAAO,EAAEG,KAAK,EAAE,GAAG,MAAMjB,OAAO6C,MAAMzF;IAC9C,OAAO;QAAE0D;QAASG;IAAM;AAC1B;AAEO,SAAS6B,yBAAyBL,QAAgB;IACvD,IAAI;QAEF,MAAMM,YAAYN,SAAS,KAAK,CAAC;QACjC,IAAIM,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBP,SAAS,KAAK,CACnC;QAEF,IAAIO,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBR,SAAS,KAAK,CAAC;QACrC,IAAIQ,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOR;AACT;AAEO,SAASS,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEO,SAASzB,uBAAuB,EACrC0B,SAAS,EACTvF,MAAM,EAIP;IAKC,MAAMwF,sBAAsBD,AAAc,YAAdA,YAAwBrB,SAAYqB;IAEhE,IAAIC,AAAwBtB,WAAxBsB,qBACF,OAAO;QAAE,QAAQ,CAAC;QAAG,cAActB;IAAU;IAG/C,IAAIlE,AAAW,eAAXA,QACF,OAAO;QACL,QAAQ;YAAE,iBAAiBwF;QAAoB;QAC/C,cAAc,CAAC,oCAAoC,EAAEA,oBAAoB,aAAa,CAAC;IACzF;IAGF,IAAIxF,AAAW,oBAAXA,QACF,OAAO;QACL,QAAQ;YACN,UAAU;gBAAE,MAAMwF,sBAAsB,YAAY;YAAW;QACjE;QACA,cAAc,CAAC,kCAAkC,EAAEA,sBAAsB,YAAY,WAAW,kBAAkB,CAAC;IACrH;IAGF,OAAO;QACL,QAAQ,CAAC;QACT,cAAc,CAAC,6CAA6C,EAAExF,UAAU,UAAU,CAAC,CAAC;QACpF,gBAAgB,CAAC,0DAA0D,EAAEA,UAAU,UAAU,EAAE,CAAC;IACtG;AACF;AAQA,SAASyF,oBAAoBtG,GAAQ;IAEnC,IAAIA,QAAAA,KACF,OAAOA;IAIT,IAAIuG,MAAM,OAAO,CAACvG,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACwG,OAASF,oBAAoBE;IAI/C,IAAI,AAAe,YAAf,OAAOxG,KAAkB;QAC3B,MAAMyG,aAAkB,CAAC;QAEzB,KAAK,MAAM,CAAC3G,KAAK4G,MAAM,IAAI3G,OAAO,OAAO,CAACC,KAAM;YAE9C,MAAM2G,aAAa7G,IAAI,IAAI;YAG3B,IAAI8G,kBAAkBN,oBAAoBI;YAG1C,IAAI,AAA2B,YAA3B,OAAOE,iBACTA,kBAAkBA,gBAAgB,IAAI;YAGxCH,UAAU,CAACE,WAAW,GAAGC;QAC3B;QAEA,OAAOH;IACT;IAGA,IAAI,AAAe,YAAf,OAAOzG,KACT,OAAOA,IAAI,IAAI;IAIjB,OAAOA;AACT;AAEO,SAAS2F,cAAcQ,KAAa,EAAEtF,MAAgC;IAC3E,MAAMgG,kBAAkBf,yBAAyBK;IAEjD,IAAIU,iBAAiB,MAAM,oBACzB,OAAOA,gBACJ,KAAK,CAAC,oBACL,MAAM,GACP,IAAI9E;IAGT,IAAIV;IACJ,IAAIyF;IACJ,IAAI;QACFzF,SAAS+D,KAAK,KAAK,CAACyB;QACpB,OAAOP,oBAAoBjF;IAC7B,EAAE,OAAOc,OAAO;QACd2E,YAAY3E;IACd;IACA,IAAI;QACFd,SAAS+D,KAAK,KAAK,CAAC2B,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAWF;QAC/B,OAAOP,oBAAoBjF;IAC7B,EAAE,OAAOc,OAAO;QACd2E,YAAY3E;IACd;IAEA,IAAItB,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,QAA0B;QAC1D,MAAMmG,aAAad,yBAAyBW;QAC5C,IAAI;YACFxF,SAAS+D,KAAK,KAAK,CAAC2B,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAWC;YAC/B,OAAOV,oBAAoBjF;QAC7B,EAAE,OAAOc,OAAO;YACd2E,YAAY3E;QACd;IACF;IACA,MAAMN,MACJ,CAAC,gDAAgD,EAAEoF,OACjDH,aAAa,iBACb,gBAAgB,EAAEX,OAAO;AAE/B"}
|
|
1
|
+
{"version":3,"file":"ai-model/service-caller/index.js","sources":["webpack/runtime/compat_get_default_export","webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/service-caller/index.ts"],"sourcesContent":["// getDefaultExport function for compatibility with non-ESM modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};\n","__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import {\n AIResponseFormat,\n type AIUsageInfo,\n type DeepThinkOption,\n} from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_LANGFUSE_DEBUG,\n MIDSCENE_LANGSMITH_DEBUG,\n MIDSCENE_MODEL_MAX_TOKENS,\n OPENAI_MAX_TOKENS,\n type TVlModeTypes,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert, ifInBrowser } from '@midscene/shared/utils';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport type { AIArgs } from '../../common';\nimport { isAutoGLM } from '../auto-glm/util';\n\nasync function createChatClient({\n modelConfig,\n}: {\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n modelName: string;\n modelDescription: string;\n uiTarsVersion?: UITarsModelVersion;\n vlMode: TVlModeTypes | undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n modelDescription,\n uiTarsModelVersion: uiTarsVersion,\n vlMode,\n createOpenAIClient,\n timeout,\n } = modelConfig;\n\n let proxyAgent: any = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n\n // Helper function to sanitize proxy URL for logging (remove credentials)\n // Uses URL API instead of regex to avoid ReDoS vulnerabilities\n const sanitizeProxyUrl = (url: string): string => {\n try {\n const parsed = new URL(url);\n if (parsed.username) {\n // Keep username for debugging, hide password for security\n parsed.password = '****';\n return parsed.href;\n }\n return url;\n } catch {\n // If URL parsing fails, return original URL (will be caught later)\n return url;\n }\n };\n\n if (httpProxy) {\n debugProxy('using http proxy', sanitizeProxyUrl(httpProxy));\n if (ifInBrowser) {\n console.warn(\n 'HTTP proxy is configured but not supported in browser environment',\n );\n } else {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'undici';\n const { ProxyAgent } = await import(moduleName);\n proxyAgent = new ProxyAgent({\n uri: httpProxy,\n // Note: authentication is handled via the URI (e.g., http://user:pass@proxy.com:8080)\n });\n }\n } else if (socksProxy) {\n debugProxy('using socks proxy', sanitizeProxyUrl(socksProxy));\n if (ifInBrowser) {\n console.warn(\n 'SOCKS proxy is configured but not supported in browser environment',\n );\n } else {\n try {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'fetch-socks';\n const { socksDispatcher } = await import(moduleName);\n // Parse SOCKS proxy URL (e.g., socks5://127.0.0.1:1080)\n const proxyUrl = new URL(socksProxy);\n\n // Validate hostname\n if (!proxyUrl.hostname) {\n throw new Error('SOCKS proxy URL must include a valid hostname');\n }\n\n // Validate and parse port\n const port = Number.parseInt(proxyUrl.port, 10);\n if (!proxyUrl.port || Number.isNaN(port)) {\n throw new Error('SOCKS proxy URL must include a valid port');\n }\n\n // Parse SOCKS version from protocol\n const protocol = proxyUrl.protocol.replace(':', '');\n const socksType =\n protocol === 'socks4' ? 4 : protocol === 'socks5' ? 5 : 5;\n\n proxyAgent = socksDispatcher({\n type: socksType,\n host: proxyUrl.hostname,\n port,\n ...(proxyUrl.username\n ? {\n userId: decodeURIComponent(proxyUrl.username),\n password: decodeURIComponent(proxyUrl.password || ''),\n }\n : {}),\n });\n debugProxy('socks proxy configured successfully', {\n type: socksType,\n host: proxyUrl.hostname,\n port: port,\n });\n } catch (error) {\n console.error('Failed to configure SOCKS proxy:', error);\n throw new Error(\n `Invalid SOCKS proxy URL: ${socksProxy}. Expected format: socks4://host:port, socks5://host:port, or with authentication: socks5://user:pass@host:port`,\n );\n }\n }\n }\n\n const openAIOptions = {\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n // Use fetchOptions.dispatcher for fetch-based SDK instead of httpAgent\n // Note: Type assertion needed due to undici version mismatch between dependencies\n ...(proxyAgent ? { fetchOptions: { dispatcher: proxyAgent as any } } : {}),\n ...openaiExtraConfig,\n ...(typeof timeout === 'number' ? { timeout } : {}),\n dangerouslyAllowBrowser: true,\n };\n\n const baseOpenAI = new OpenAI(openAIOptions);\n\n let openai: OpenAI = baseOpenAI;\n\n // LangSmith wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langsmithModule = 'langsmith/wrappers';\n const { wrapOpenAI } = await import(langsmithModule);\n openai = wrapOpenAI(openai);\n }\n\n // Langfuse wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGFUSE_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langfuse is not supported in browser');\n }\n console.log('DEBUGGING MODE: langfuse wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langfuseModule = 'langfuse';\n const { observeOpenAI } = await import(langfuseModule);\n openai = observeOpenAI(openai);\n }\n\n if (createOpenAIClient) {\n const wrappedClient = await createOpenAIClient(baseOpenAI, openAIOptions);\n\n if (wrappedClient) {\n openai = wrappedClient as OpenAI;\n }\n }\n\n return {\n completion: openai.chat.completions,\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n };\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: string;\n reasoning_content?: string;\n usage?: AIUsageInfo;\n isStreamed: boolean;\n}> {\n const { completion, modelName, modelDescription, uiTarsVersion, vlMode } =\n await createChatClient({\n modelConfig,\n });\n\n const maxTokens =\n globalConfigManager.getEnvConfigValueAsNumber(MIDSCENE_MODEL_MAX_TOKENS) ??\n globalConfigManager.getEnvConfigValueAsNumber(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n const temperature = modelConfig.temperature ?? 0;\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let accumulatedReasoning = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n\n const buildUsageInfo = (usageData?: OpenAI.CompletionUsage) => {\n if (!usageData) return undefined;\n\n const cachedInputTokens = (\n usageData as { prompt_tokens_details?: { cached_tokens?: number } }\n )?.prompt_tokens_details?.cached_tokens;\n\n return {\n prompt_tokens: usageData.prompt_tokens ?? 0,\n completion_tokens: usageData.completion_tokens ?? 0,\n total_tokens: usageData.total_tokens ?? 0,\n cached_input: cachedInputTokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n } satisfies AIUsageInfo;\n };\n\n const commonConfig = {\n temperature,\n stream: !!isStreaming,\n max_tokens: maxTokens,\n ...(vlMode === 'qwen2.5-vl' // qwen vl v2 specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n if (isAutoGLM(vlMode)) {\n (commonConfig as unknown as Record<string, number>).top_p = 0.85;\n (commonConfig as unknown as Record<string, number>).frequency_penalty = 0.2;\n }\n\n const {\n config: deepThinkConfig,\n debugMessage,\n warningMessage,\n } = resolveDeepThinkConfig({\n deepThink: options?.deepThink,\n vlMode,\n });\n if (debugMessage) {\n debugCall(debugMessage);\n }\n if (warningMessage) {\n debugCall(warningMessage);\n console.warn(warningMessage);\n }\n\n try {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model: modelName,\n messages,\n ...commonConfig,\n ...deepThinkConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n accumulatedReasoning += reasoning_content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: buildUsageInfo(usage),\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}, temperature, ${temperature ?? ''}`,\n );\n } else {\n const result = await completion.create({\n model: modelName,\n messages,\n ...commonConfig,\n ...deepThinkConfig,\n } as any);\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}, temperature, ${temperature ?? ''}`,\n );\n\n debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);\n\n assert(\n result.choices,\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n content = result.choices[0].message.content!;\n accumulatedReasoning =\n (result.choices[0].message as any)?.reasoning_content || '';\n usage = result.usage;\n }\n\n debugCall(`response reasoning content: ${accumulatedReasoning}`);\n debugCall(`response content: ${content}`);\n assert(content, 'empty content');\n\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n } as OpenAI.CompletionUsage;\n }\n\n return {\n content: content || '',\n reasoning_content: accumulatedReasoning || undefined,\n usage: buildUsageInfo(usage),\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service (${modelName}): ${e.message}\\nTrouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport async function callAIWithObjectResponse<T>(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: T;\n contentString: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const response = await callAI(messages, modelConfig, {\n deepThink: options?.deepThink,\n });\n assert(response, 'empty response');\n const vlMode = modelConfig.vlMode;\n const jsonContent = safeParseJson(response.content, vlMode);\n assert(\n typeof jsonContent === 'object',\n `failed to parse json response from model (${modelConfig.modelName}): ${response.content}`,\n );\n return {\n content: jsonContent,\n contentString: response.content,\n usage: response.usage,\n reasoning_content: response.reasoning_content,\n };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n modelConfig: IModelConfig,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, modelConfig);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nexport function resolveDeepThinkConfig({\n deepThink,\n vlMode,\n}: {\n deepThink?: DeepThinkOption;\n vlMode?: TVlModeTypes;\n}): {\n config: Record<string, unknown>;\n debugMessage?: string;\n warningMessage?: string;\n} {\n const normalizedDeepThink = deepThink === 'unset' ? undefined : deepThink;\n\n if (normalizedDeepThink === undefined) {\n return { config: {}, debugMessage: undefined };\n }\n\n if (vlMode === 'qwen3-vl') {\n return {\n config: { enable_thinking: normalizedDeepThink },\n debugMessage: `deepThink mapped to enable_thinking=${normalizedDeepThink} for qwen3-vl`,\n };\n }\n\n if (vlMode === 'doubao-vision') {\n return {\n config: {\n thinking: { type: normalizedDeepThink ? 'enabled' : 'disabled' },\n },\n debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for doubao-vision`,\n };\n }\n\n return {\n config: {},\n debugMessage: `deepThink ignored: unsupported model_family \"${vlMode ?? 'default'}\"`,\n warningMessage: `The \"deepThink\" option is not supported for model_family \"${vlMode ?? 'default'}\".`,\n };\n}\n\n/**\n * Normalize a parsed JSON object by trimming whitespace from:\n * 1. All object keys (e.g., \" prompt \" -> \"prompt\")\n * 2. All string values (e.g., \" Tap \" -> \"Tap\")\n * This handles LLM output that may include leading/trailing spaces.\n */\nfunction normalizeJsonObject(obj: any): any {\n // Handle null and undefined\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n // Handle arrays - recursively normalize each element\n if (Array.isArray(obj)) {\n return obj.map((item) => normalizeJsonObject(item));\n }\n\n // Handle objects\n if (typeof obj === 'object') {\n const normalized: any = {};\n\n for (const [key, value] of Object.entries(obj)) {\n // Trim the key to remove leading/trailing spaces\n const trimmedKey = key.trim();\n\n // Recursively normalize the value\n let normalizedValue = normalizeJsonObject(value);\n\n // Trim all string values\n if (typeof normalizedValue === 'string') {\n normalizedValue = normalizedValue.trim();\n }\n\n normalized[trimmedKey] = normalizedValue;\n }\n\n return normalized;\n }\n\n // Handle primitive strings\n if (typeof obj === 'string') {\n return obj.trim();\n }\n\n // Return other primitives as-is\n return obj;\n}\n\nexport function safeParseJson(input: string, vlMode: TVlModeTypes | undefined) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n\n let parsed: any;\n let lastError: unknown;\n try {\n parsed = JSON.parse(cleanJsonString);\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n try {\n parsed = JSON.parse(jsonrepair(cleanJsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n try {\n parsed = JSON.parse(jsonrepair(jsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n }\n throw Error(\n `failed to parse LLM response into JSON. Error - ${String(\n lastError ?? 'unknown error',\n )}. Response - \\n ${input}`,\n );\n}\n"],"names":["__webpack_require__","module","getter","definition","key","Object","obj","prop","Symbol","createChatClient","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","modelDescription","uiTarsVersion","vlMode","createOpenAIClient","timeout","proxyAgent","debugProxy","getDebug","sanitizeProxyUrl","url","parsed","URL","ifInBrowser","console","moduleName","ProxyAgent","socksDispatcher","proxyUrl","Error","port","Number","protocol","socksType","decodeURIComponent","error","openAIOptions","baseOpenAI","OpenAI","openai","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","langsmithModule","wrapOpenAI","MIDSCENE_LANGFUSE_DEBUG","langfuseModule","observeOpenAI","wrappedClient","callAI","messages","options","completion","maxTokens","MIDSCENE_MODEL_MAX_TOKENS","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","temperature","isStreaming","content","accumulated","accumulatedReasoning","usage","timeCost","buildUsageInfo","usageData","cachedInputTokens","commonConfig","isAutoGLM","deepThinkConfig","debugMessage","warningMessage","resolveDeepThinkConfig","stream","chunk","reasoning_content","chunkData","undefined","estimatedTokens","Math","finalChunk","result","JSON","assert","e","newError","callAIWithObjectResponse","response","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","deepThink","normalizedDeepThink","normalizeJsonObject","Array","item","normalized","value","trimmedKey","normalizedValue","cleanJsonString","lastError","jsonrepair","jsonString","String"],"mappings":";;;IACAA,oBAAoB,CAAC,GAAG,CAACC;QACxB,IAAIC,SAASD,UAAUA,OAAO,UAAU,GACvC,IAAOA,MAAM,CAAC,UAAU,GACxB,IAAOA;QACRD,oBAAoB,CAAC,CAACE,QAAQ;YAAE,GAAGA;QAAO;QAC1C,OAAOA;IACR;;;ICPAF,oBAAoB,CAAC,GAAG,CAAC,UAASG;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGH,oBAAoB,CAAC,CAACG,YAAYC,QAAQ,CAACJ,oBAAoB,CAAC,CAAC,UAASI,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAJ,oBAAoB,CAAC,GAAG,CAACM,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFP,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOQ,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;ACoBA,eAAeI,iBAAiB,EAC9BC,WAAW,EAGZ;IAOC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,gBAAgB,EAChB,oBAAoBC,aAAa,EACjCC,MAAM,EACNC,kBAAkB,EAClBC,OAAO,EACR,GAAGX;IAEJ,IAAIY;IACJ,MAAMC,aAAaC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IAI5B,MAAMC,mBAAmB,CAACC;QACxB,IAAI;YACF,MAAMC,SAAS,IAAIC,IAAIF;YACvB,IAAIC,OAAO,QAAQ,EAAE;gBAEnBA,OAAO,QAAQ,GAAG;gBAClB,OAAOA,OAAO,IAAI;YACpB;YACA,OAAOD;QACT,EAAE,OAAM;YAEN,OAAOA;QACT;IACF;IAEA,IAAId,WAAW;QACbW,WAAW,oBAAoBE,iBAAiBb;QAChD,IAAIiB,sBAAAA,WAAWA,EACbC,QAAQ,IAAI,CACV;aAEG;YAEL,MAAMC,aAAa;YACnB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;YACpCT,aAAa,IAAIU,WAAW;gBAC1B,KAAKpB;YAEP;QACF;IACF,OAAO,IAAID,YAAY;QACrBY,WAAW,qBAAqBE,iBAAiBd;QACjD,IAAIkB,sBAAAA,WAAWA,EACbC,QAAQ,IAAI,CACV;aAGF,IAAI;YAEF,MAAMC,aAAa;YACnB,MAAM,EAAEE,eAAe,EAAE,GAAG,MAAM,MAAM,CAACF;YAEzC,MAAMG,WAAW,IAAIN,IAAIjB;YAGzB,IAAI,CAACuB,SAAS,QAAQ,EACpB,MAAM,IAAIC,MAAM;YAIlB,MAAMC,OAAOC,OAAO,QAAQ,CAACH,SAAS,IAAI,EAAE;YAC5C,IAAI,CAACA,SAAS,IAAI,IAAIG,OAAO,KAAK,CAACD,OACjC,MAAM,IAAID,MAAM;YAIlB,MAAMG,WAAWJ,SAAS,QAAQ,CAAC,OAAO,CAAC,KAAK;YAChD,MAAMK,YACJD,AAAa,aAAbA,WAAwB,IAAIA,AAAa,aAAbA,WAAwB,IAAI;YAE1DhB,aAAaW,gBAAgB;gBAC3B,MAAMM;gBACN,MAAML,SAAS,QAAQ;gBACvBE;gBACA,GAAIF,SAAS,QAAQ,GACjB;oBACE,QAAQM,mBAAmBN,SAAS,QAAQ;oBAC5C,UAAUM,mBAAmBN,SAAS,QAAQ,IAAI;gBACpD,IACA,CAAC,CAAC;YACR;YACAX,WAAW,uCAAuC;gBAChD,MAAMgB;gBACN,MAAML,SAAS,QAAQ;gBACvB,MAAME;YACR;QACF,EAAE,OAAOK,OAAO;YACdX,QAAQ,KAAK,CAAC,oCAAoCW;YAClD,MAAM,IAAIN,MACR,CAAC,yBAAyB,EAAExB,WAAW,+GAA+G,CAAC;QAE3J;IAEJ;IAEA,MAAM+B,gBAAgB;QACpB,SAAS5B;QACT,QAAQC;QAGR,GAAIO,aAAa;YAAE,cAAc;gBAAE,YAAYA;YAAkB;QAAE,IAAI,CAAC,CAAC;QACzE,GAAGN,iBAAiB;QACpB,GAAI,AAAmB,YAAnB,OAAOK,UAAuB;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAClD,yBAAyB;IAC3B;IAEA,MAAMsB,aAAa,IAAIC,CAAAA,yBAAAA,EAAOF;IAE9B,IAAIG,SAAiBF;IAGrB,IACEE,UACAC,oBAAAA,mBAAAA,CAAAA,qBAAyC,CAACC,oBAAAA,wBAAwBA,GAClE;QACA,IAAIlB,sBAAAA,WAAWA,EACb,MAAM,IAAIM,MAAM;QAElBL,QAAQ,GAAG,CAAC;QAEZ,MAAMkB,kBAAkB;QACxB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;QACpCH,SAASI,WAAWJ;IACtB;IAGA,IACEA,UACAC,oBAAAA,mBAAAA,CAAAA,qBAAyC,CAACI,oBAAAA,uBAAuBA,GACjE;QACA,IAAIrB,sBAAAA,WAAWA,EACb,MAAM,IAAIM,MAAM;QAElBL,QAAQ,GAAG,CAAC;QAEZ,MAAMqB,iBAAiB;QACvB,MAAM,EAAEC,aAAa,EAAE,GAAG,MAAM,MAAM,CAACD;QACvCN,SAASO,cAAcP;IACzB;IAEA,IAAIzB,oBAAoB;QACtB,MAAMiC,gBAAgB,MAAMjC,mBAAmBuB,YAAYD;QAE3D,IAAIW,eACFR,SAASQ;IAEb;IAEA,OAAO;QACL,YAAYR,OAAO,IAAI,CAAC,WAAW;QACnChC;QACAI;QACAC;QACAC;IACF;AACF;AAEO,eAAemC,OACpBC,QAAsC,EACtC7C,WAAyB,EACzB8C,OAIC;IAOD,MAAM,EAAEC,UAAU,EAAE5C,SAAS,EAAEI,gBAAgB,EAAEC,aAAa,EAAEC,MAAM,EAAE,GACtE,MAAMV,iBAAiB;QACrBC;IACF;IAEF,MAAMgD,YACJZ,oBAAAA,mBAAAA,CAAAA,yBAA6C,CAACa,oBAAAA,yBAAyBA,KACvEb,oBAAAA,mBAAAA,CAAAA,yBAA6C,CAACc,oBAAAA,iBAAiBA;IACjE,MAAMC,YAAYrC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IAC3B,MAAMsC,oBAAoBtC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IACnC,MAAMuC,qBAAqBvC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IAEpC,MAAMwC,YAAYC,KAAK,GAAG;IAC1B,MAAMC,cAAcxD,YAAY,WAAW,IAAI;IAE/C,MAAMyD,cAAcX,SAAS,UAAUA,SAAS;IAChD,IAAIY;IACJ,IAAIC,cAAc;IAClB,IAAIC,uBAAuB;IAC3B,IAAIC;IACJ,IAAIC;IAEJ,MAAMC,iBAAiB,CAACC;QACtB,IAAI,CAACA,WAAW;QAEhB,MAAMC,oBACJD,WACC,uBAAuB;QAE1B,OAAO;YACL,eAAeA,UAAU,aAAa,IAAI;YAC1C,mBAAmBA,UAAU,iBAAiB,IAAI;YAClD,cAAcA,UAAU,YAAY,IAAI;YACxC,cAAcC,qBAAqB;YACnC,WAAWH,YAAY;YACvB,YAAY3D;YACZ,mBAAmBI;YACnB,QAAQP,YAAY,MAAM;QAC5B;IACF;IAEA,MAAMkE,eAAe;QACnBV;QACA,QAAQ,CAAC,CAACC;QACV,YAAYT;QACZ,GAAIvC,AAAW,iBAAXA,SACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAI0D,AAAAA,IAAAA,wBAAAA,SAAAA,AAAAA,EAAU1D,SAAS;QACpByD,aAAmD,KAAK,GAAG;QAC3DA,aAAmD,iBAAiB,GAAG;IAC1E;IAEA,MAAM,EACJ,QAAQE,eAAe,EACvBC,YAAY,EACZC,cAAc,EACf,GAAGC,uBAAuB;QACzB,WAAWzB,SAAS;QACpBrC;IACF;IACA,IAAI4D,cACFlB,UAAUkB;IAEZ,IAAIC,gBAAgB;QAClBnB,UAAUmB;QACVlD,QAAQ,IAAI,CAACkD;IACf;IAEA,IAAI;QACFnB,UACE,CAAC,QAAQ,EAAEM,cAAc,eAAe,GAAG,WAAW,EAAEtD,WAAW;QAGrE,IAAIsD,aAAa;YACf,MAAMe,SAAU,MAAMzB,WAAW,MAAM,CACrC;gBACE,OAAO5C;gBACP0C;gBACA,GAAGqB,YAAY;gBACf,GAAGE,eAAe;YACpB,GACA;gBACE,QAAQ;YACV;YAKF,WAAW,MAAMK,SAASD,OAAQ;gBAChC,MAAMd,UAAUe,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAO,WAAW;gBACtD,MAAMC,oBACHD,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAe,qBAAqB;gBAG3D,IAAIA,MAAM,KAAK,EACbZ,QAAQY,MAAM,KAAK;gBAGrB,IAAIf,WAAWgB,mBAAmB;oBAChCf,eAAeD;oBACfE,wBAAwBc;oBACxB,MAAMC,YAAiC;wBACrCjB;wBACAgB;wBACAf;wBACA,YAAY;wBACZ,OAAOiB;oBACT;oBACA9B,QAAQ,OAAO,CAAE6B;gBACnB;gBAGA,IAAIF,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,eAAe;oBACrCX,WAAWP,KAAK,GAAG,KAAKD;oBAGxB,IAAI,CAACO,OAAO;wBAEV,MAAMgB,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACnB,YAAY,MAAM,GAAG;wBAElCE,QAAQ;4BACN,eAAegB;4BACf,mBAAmBA;4BACnB,cAAcA,AAAkB,IAAlBA;wBAChB;oBACF;oBAGA,MAAME,aAAkC;wBACtC,SAAS;wBACTpB;wBACA,mBAAmB;wBACnB,YAAY;wBACZ,OAAOI,eAAeF;oBACxB;oBACAf,QAAQ,OAAO,CAAEiC;oBACjB;gBACF;YACF;YACArB,UAAUC;YACVP,kBACE,CAAC,iBAAiB,EAAEjD,UAAU,QAAQ,EAAEM,UAAU,UAAU,WAAW,EAAEqD,SAAS,eAAe,EAAEN,eAAe,IAAI;QAE1H,OAAO;YACL,MAAMwB,SAAS,MAAMjC,WAAW,MAAM,CAAC;gBACrC,OAAO5C;gBACP0C;gBACA,GAAGqB,YAAY;gBACf,GAAGE,eAAe;YACpB;YACAN,WAAWP,KAAK,GAAG,KAAKD;YAExBF,kBACE,CAAC,OAAO,EAAEjD,UAAU,QAAQ,EAAEM,UAAU,UAAU,mBAAmB,EAAED,cAAc,iBAAiB,EAAEwE,OAAO,KAAK,EAAE,iBAAiB,GAAG,qBAAqB,EAAEA,OAAO,KAAK,EAAE,qBAAqB,GAAG,gBAAgB,EAAEA,OAAO,KAAK,EAAE,gBAAgB,GAAG,WAAW,EAAElB,SAAS,aAAa,EAAEkB,OAAO,WAAW,IAAI,GAAG,eAAe,EAAExB,eAAe,IAAI;YAG9VH,mBAAmB,CAAC,oBAAoB,EAAE4B,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;YAExEE,IAAAA,sBAAAA,MAAAA,AAAAA,EACEF,OAAO,OAAO,EACd,CAAC,mCAAmC,EAAEC,KAAK,SAAS,CAACD,SAAS;YAEhEtB,UAAUsB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;YAC3CpB,uBACGoB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,EAAU,qBAAqB;YAC3DnB,QAAQmB,OAAO,KAAK;QACtB;QAEA7B,UAAU,CAAC,4BAA4B,EAAES,sBAAsB;QAC/DT,UAAU,CAAC,kBAAkB,EAAEO,SAAS;QACxCwB,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOxB,SAAS;QAGhB,IAAID,eAAe,CAACI,OAAO;YAEzB,MAAMgB,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEpB,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtCG,QAAQ;gBACN,eAAegB;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASnB,WAAW;YACpB,mBAAmBE,wBAAwBgB;YAC3C,OAAOb,eAAeF;YACtB,YAAY,CAAC,CAACJ;QAChB;IACF,EAAE,OAAO0B,GAAQ;QACf/D,QAAQ,KAAK,CAAC,kBAAkB+D;QAChC,MAAMC,WAAW,IAAI3D,MACnB,CAAC,eAAe,EAAEgC,cAAc,eAAe,GAAG,kBAAkB,EAAEtD,UAAU,GAAG,EAAEgF,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC9J;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,eAAeC,yBACpBxC,QAAsC,EACtC7C,WAAyB,EACzB8C,OAEC;IAOD,MAAMwC,WAAW,MAAM1C,OAAOC,UAAU7C,aAAa;QACnD,WAAW8C,SAAS;IACtB;IACAoC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOI,UAAU;IACjB,MAAM7E,SAAST,YAAY,MAAM;IACjC,MAAMuF,cAAcC,cAAcF,SAAS,OAAO,EAAE7E;IACpDyE,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,AAAuB,YAAvB,OAAOK,aACP,CAAC,0CAA0C,EAAEvF,YAAY,SAAS,CAAC,GAAG,EAAEsF,SAAS,OAAO,EAAE;IAE5F,OAAO;QACL,SAASC;QACT,eAAeD,SAAS,OAAO;QAC/B,OAAOA,SAAS,KAAK;QACrB,mBAAmBA,SAAS,iBAAiB;IAC/C;AACF;AAEO,eAAeG,yBACpBC,IAAY,EACZ1F,WAAyB;IAEzB,MAAM,EAAE0D,OAAO,EAAEG,KAAK,EAAE,GAAG,MAAMjB,OAAO8C,MAAM1F;IAC9C,OAAO;QAAE0D;QAASG;IAAM;AAC1B;AAEO,SAAS8B,yBAAyBL,QAAgB;IACvD,IAAI;QAEF,MAAMM,YAAYN,SAAS,KAAK,CAAC;QACjC,IAAIM,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBP,SAAS,KAAK,CACnC;QAEF,IAAIO,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBR,SAAS,KAAK,CAAC;QACrC,IAAIQ,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOR;AACT;AAEO,SAASS,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEO,SAASzB,uBAAuB,EACrC0B,SAAS,EACTxF,MAAM,EAIP;IAKC,MAAMyF,sBAAsBD,AAAc,YAAdA,YAAwBrB,SAAYqB;IAEhE,IAAIC,AAAwBtB,WAAxBsB,qBACF,OAAO;QAAE,QAAQ,CAAC;QAAG,cAActB;IAAU;IAG/C,IAAInE,AAAW,eAAXA,QACF,OAAO;QACL,QAAQ;YAAE,iBAAiByF;QAAoB;QAC/C,cAAc,CAAC,oCAAoC,EAAEA,oBAAoB,aAAa,CAAC;IACzF;IAGF,IAAIzF,AAAW,oBAAXA,QACF,OAAO;QACL,QAAQ;YACN,UAAU;gBAAE,MAAMyF,sBAAsB,YAAY;YAAW;QACjE;QACA,cAAc,CAAC,kCAAkC,EAAEA,sBAAsB,YAAY,WAAW,kBAAkB,CAAC;IACrH;IAGF,OAAO;QACL,QAAQ,CAAC;QACT,cAAc,CAAC,6CAA6C,EAAEzF,UAAU,UAAU,CAAC,CAAC;QACpF,gBAAgB,CAAC,0DAA0D,EAAEA,UAAU,UAAU,EAAE,CAAC;IACtG;AACF;AAQA,SAAS0F,oBAAoBvG,GAAQ;IAEnC,IAAIA,QAAAA,KACF,OAAOA;IAIT,IAAIwG,MAAM,OAAO,CAACxG,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACyG,OAASF,oBAAoBE;IAI/C,IAAI,AAAe,YAAf,OAAOzG,KAAkB;QAC3B,MAAM0G,aAAkB,CAAC;QAEzB,KAAK,MAAM,CAAC5G,KAAK6G,MAAM,IAAI5G,OAAO,OAAO,CAACC,KAAM;YAE9C,MAAM4G,aAAa9G,IAAI,IAAI;YAG3B,IAAI+G,kBAAkBN,oBAAoBI;YAG1C,IAAI,AAA2B,YAA3B,OAAOE,iBACTA,kBAAkBA,gBAAgB,IAAI;YAGxCH,UAAU,CAACE,WAAW,GAAGC;QAC3B;QAEA,OAAOH;IACT;IAGA,IAAI,AAAe,YAAf,OAAO1G,KACT,OAAOA,IAAI,IAAI;IAIjB,OAAOA;AACT;AAEO,SAAS4F,cAAcQ,KAAa,EAAEvF,MAAgC;IAC3E,MAAMiG,kBAAkBf,yBAAyBK;IAEjD,IAAIU,iBAAiB,MAAM,oBACzB,OAAOA,gBACJ,KAAK,CAAC,oBACL,MAAM,GACP,IAAI/E;IAGT,IAAIV;IACJ,IAAI0F;IACJ,IAAI;QACF1F,SAASgE,KAAK,KAAK,CAACyB;QACpB,OAAOP,oBAAoBlF;IAC7B,EAAE,OAAOc,OAAO;QACd4E,YAAY5E;IACd;IACA,IAAI;QACFd,SAASgE,KAAK,KAAK,CAAC2B,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAWF;QAC/B,OAAOP,oBAAoBlF;IAC7B,EAAE,OAAOc,OAAO;QACd4E,YAAY5E;IACd;IAEA,IAAItB,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,QAA0B;QAC1D,MAAMoG,aAAad,yBAAyBW;QAC5C,IAAI;YACFzF,SAASgE,KAAK,KAAK,CAAC2B,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAWC;YAC/B,OAAOV,oBAAoBlF;QAC7B,EAAE,OAAOc,OAAO;YACd4E,YAAY5E;QACd;IACF;IACA,MAAMN,MACJ,CAAC,gDAAgD,EAAEqF,OACjDH,aAAa,iBACb,gBAAgB,EAAEX,OAAO;AAE/B"}
|
|
@@ -33,6 +33,7 @@ const logger_namespaceObject = require("@midscene/shared/logger");
|
|
|
33
33
|
const us_keyboard_layout_namespaceObject = require("@midscene/shared/us-keyboard-layout");
|
|
34
34
|
const utils_namespaceObject = require("@midscene/shared/utils");
|
|
35
35
|
const action_parser_namespaceObject = require("@ui-tars/action-parser");
|
|
36
|
+
const external_latest_locate_recorder_js_namespaceObject = require("./latest-locate-recorder.js");
|
|
36
37
|
const ui_tars_planning_js_namespaceObject = require("./prompt/ui-tars-planning.js");
|
|
37
38
|
const index_js_namespaceObject = require("./service-caller/index.js");
|
|
38
39
|
const debug = (0, logger_namespaceObject.getDebug)('ui-tars-planning');
|
|
@@ -43,6 +44,7 @@ const pointToBbox = (point, width, height)=>[
|
|
|
43
44
|
Math.round(Math.min(point.x + bboxSize / 2, width)),
|
|
44
45
|
Math.round(Math.min(point.y + bboxSize / 2, height))
|
|
45
46
|
];
|
|
47
|
+
const lastLocateRecorder = new external_latest_locate_recorder_js_namespaceObject.LatestLocateRecorder();
|
|
46
48
|
async function uiTarsPlanning(userInstruction, options) {
|
|
47
49
|
const { conversationHistory, context, modelConfig, actionContext } = options;
|
|
48
50
|
const { uiTarsModelVersion } = modelConfig;
|
|
@@ -91,47 +93,53 @@ async function uiTarsPlanning(userInstruction, options) {
|
|
|
91
93
|
if ('click' === actionType) {
|
|
92
94
|
(0, utils_namespaceObject.assert)(action.action_inputs.start_box, 'start_box is required');
|
|
93
95
|
const point = getPoint(action.action_inputs.start_box, size);
|
|
96
|
+
const locate = {
|
|
97
|
+
prompt: action.thought || '',
|
|
98
|
+
bbox: pointToBbox({
|
|
99
|
+
x: point[0],
|
|
100
|
+
y: point[1]
|
|
101
|
+
}, size.width, size.height)
|
|
102
|
+
};
|
|
103
|
+
lastLocateRecorder.recordLocate(locate, 'click');
|
|
94
104
|
transformActions.push({
|
|
95
105
|
type: 'Tap',
|
|
96
106
|
param: {
|
|
97
|
-
locate:
|
|
98
|
-
prompt: action.thought || '',
|
|
99
|
-
bbox: pointToBbox({
|
|
100
|
-
x: point[0],
|
|
101
|
-
y: point[1]
|
|
102
|
-
}, size.width, size.height)
|
|
103
|
-
}
|
|
107
|
+
locate: locate
|
|
104
108
|
}
|
|
105
109
|
});
|
|
106
110
|
} else if ('left_double' === actionType) {
|
|
107
111
|
(0, utils_namespaceObject.assert)(action.action_inputs.start_box, 'start_box is required');
|
|
108
112
|
const point = getPoint(action.action_inputs.start_box, size);
|
|
113
|
+
const locate = {
|
|
114
|
+
prompt: action.thought || '',
|
|
115
|
+
bbox: pointToBbox({
|
|
116
|
+
x: point[0],
|
|
117
|
+
y: point[1]
|
|
118
|
+
}, size.width, size.height)
|
|
119
|
+
};
|
|
120
|
+
lastLocateRecorder.recordLocate(locate, 'left_double');
|
|
109
121
|
transformActions.push({
|
|
110
122
|
type: 'DoubleClick',
|
|
111
123
|
param: {
|
|
112
|
-
locate:
|
|
113
|
-
prompt: action.thought || '',
|
|
114
|
-
bbox: pointToBbox({
|
|
115
|
-
x: point[0],
|
|
116
|
-
y: point[1]
|
|
117
|
-
}, size.width, size.height)
|
|
118
|
-
}
|
|
124
|
+
locate: locate
|
|
119
125
|
},
|
|
120
126
|
thought: action.thought || ''
|
|
121
127
|
});
|
|
122
128
|
} else if ('right_single' === actionType) {
|
|
123
129
|
(0, utils_namespaceObject.assert)(action.action_inputs.start_box, 'start_box is required');
|
|
124
130
|
const point = getPoint(action.action_inputs.start_box, size);
|
|
131
|
+
const locate = {
|
|
132
|
+
prompt: action.thought || '',
|
|
133
|
+
bbox: pointToBbox({
|
|
134
|
+
x: point[0],
|
|
135
|
+
y: point[1]
|
|
136
|
+
}, size.width, size.height)
|
|
137
|
+
};
|
|
138
|
+
lastLocateRecorder.recordLocate(locate, 'right_single');
|
|
125
139
|
transformActions.push({
|
|
126
140
|
type: 'RightClick',
|
|
127
141
|
param: {
|
|
128
|
-
locate:
|
|
129
|
-
prompt: action.thought || '',
|
|
130
|
-
bbox: pointToBbox({
|
|
131
|
-
x: point[0],
|
|
132
|
-
y: point[1]
|
|
133
|
-
}, size.width, size.height)
|
|
134
|
-
}
|
|
142
|
+
locate: locate
|
|
135
143
|
},
|
|
136
144
|
thought: action.thought || ''
|
|
137
145
|
});
|
|
@@ -160,14 +168,18 @@ async function uiTarsPlanning(userInstruction, options) {
|
|
|
160
168
|
},
|
|
161
169
|
thought: action.thought || ''
|
|
162
170
|
});
|
|
163
|
-
} else if ('type' === actionType)
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
+
} else if ('type' === actionType) {
|
|
172
|
+
const { locate: latestLocate, source } = lastLocateRecorder.getLatestLocate();
|
|
173
|
+
debug(`use latestLocate from ${source} as locate when Input`, latestLocate);
|
|
174
|
+
transformActions.push({
|
|
175
|
+
type: 'Input',
|
|
176
|
+
param: {
|
|
177
|
+
value: action.action_inputs.content,
|
|
178
|
+
locate: latestLocate
|
|
179
|
+
},
|
|
180
|
+
thought: action.thought || ''
|
|
181
|
+
});
|
|
182
|
+
} else if ('scroll' === actionType) transformActions.push({
|
|
171
183
|
type: 'Scroll',
|
|
172
184
|
param: {
|
|
173
185
|
direction: action.action_inputs.direction
|
|
@@ -242,7 +254,7 @@ async function uiTarsPlanning(userInstruction, options) {
|
|
|
242
254
|
log,
|
|
243
255
|
usage: res.usage,
|
|
244
256
|
rawResponse: JSON.stringify(res.content, void 0, 2),
|
|
245
|
-
|
|
257
|
+
more_actions_needed_by_instruction: shouldContinue
|
|
246
258
|
};
|
|
247
259
|
}
|
|
248
260
|
function convertBboxToCoordinates(text) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/ui-tars-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/ui-tars-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n PlanningAIResponse,\n PlanningAction,\n Size,\n UIContext,\n} from '@/types';\nimport { type IModelConfig, UITarsModelVersion } from '@midscene/shared/env';\nimport { resizeImgBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout';\nimport { assert } from '@midscene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport type { ConversationHistory } from './conversation-history';\nimport { getSummary, getUiTarsPlanningPrompt } from './prompt/ui-tars-planning';\nimport { callAIWithStringResponse } from './service-caller/index';\ntype ActionType =\n | 'click'\n | 'left_double'\n | 'right_single'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait';\n\nconst debug = getDebug('ui-tars-planning');\nconst bboxSize = 10;\nconst pointToBbox = (\n point: { x: number; y: number },\n width: number,\n height: number,\n): [number, number, number, number] => {\n return [\n Math.round(Math.max(point.x - bboxSize / 2, 0)),\n Math.round(Math.max(point.y - bboxSize / 2, 0)),\n Math.round(Math.min(point.x + bboxSize / 2, width)),\n Math.round(Math.min(point.y + bboxSize / 2, height)),\n ];\n};\n\nexport async function uiTarsPlanning(\n userInstruction: string,\n options: {\n conversationHistory: ConversationHistory;\n context: UIContext;\n modelConfig: IModelConfig;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelConfig, actionContext } = options;\n const { uiTarsModelVersion } = modelConfig;\n\n let instruction = userInstruction;\n if (actionContext) {\n instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\\n<user_instruction>${userInstruction}</user_instruction>`;\n }\n\n const systemPrompt = getUiTarsPlanningPrompt() + instruction;\n\n const imagePayload = await resizeImageForUiTars(\n context.screenshotBase64,\n context.size,\n uiTarsModelVersion,\n );\n\n conversationHistory.append({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n },\n },\n ],\n });\n\n const res = await callAIWithStringResponse(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...conversationHistory.snapshot(),\n ],\n modelConfig,\n );\n const convertedText = convertBboxToCoordinates(res.content);\n\n const { size } = context;\n const { parsed } = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: size.width,\n height: size.height,\n },\n modelVer: uiTarsModelVersion,\n });\n\n debug(\n 'ui-tars modelVer',\n uiTarsModelVersion,\n ', parsed',\n JSON.stringify(parsed),\n );\n\n const transformActions: PlanningAction[] = [];\n const unhandledActions: Array<{ type: string; thought: string }> = [];\n let shouldContinue = true;\n parsed.forEach((action) => {\n const actionType = (action.action_type || '').toLowerCase();\n if (actionType === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n transformActions.push({\n type: 'Tap',\n param: {\n locate: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n },\n },\n });\n } else if (actionType === 'left_double') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n transformActions.push({\n type: 'DoubleClick',\n param: {\n locate: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n },\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'right_single') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n transformActions.push({\n type: 'RightClick',\n param: {\n locate: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n },\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box, size);\n const endPoint = getPoint(action.action_inputs.end_box, size);\n transformActions.push({\n type: 'DragAndDrop',\n param: {\n from: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: startPoint[0], y: startPoint[1] },\n size.width,\n size.height,\n ),\n },\n to: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: endPoint[0], y: endPoint[1] },\n size.width,\n size.height,\n ),\n },\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'type') {\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'finished') {\n shouldContinue = false;\n transformActions.push({\n type: 'Finished',\n param: {},\n thought: action.thought || '',\n });\n } else if (actionType === 'hotkey') {\n if (!action.action_inputs.key) {\n console.warn(\n 'No key found in action: hotkey. Will not perform action.',\n );\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n keyName: keys.join('+'),\n },\n thought: action.thought || '',\n });\n }\n } else if (actionType === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n thought: action.thought || '',\n });\n } else if (actionType) {\n // Track unhandled action types\n unhandledActions.push({\n type: actionType,\n thought: action.thought || '',\n });\n debug('Unhandled action type:', actionType, 'thought:', action.thought);\n }\n });\n\n if (transformActions.length === 0) {\n const errorDetails: string[] = [];\n\n // Check if parsing failed\n if (parsed.length === 0) {\n errorDetails.push('Action parser returned no actions');\n\n // Check if response has Thought but no Action\n if (\n res.content.includes('Thought:') &&\n !res.content.includes('Action:')\n ) {\n errorDetails.push(\n 'Response contains \"Thought:\" but missing \"Action:\" line',\n );\n } else {\n errorDetails.push('Response may be malformed or empty');\n }\n }\n\n // Check if we have unhandled action types\n if (unhandledActions.length > 0) {\n const types = unhandledActions.map((a) => a.type).join(', ');\n errorDetails.push(`Unhandled action types: ${types}`);\n }\n\n const errorMessage = [\n 'No actions found in UI-TARS response.',\n ...errorDetails,\n `\\nRaw response: ${res.content}`,\n ].join('\\n');\n\n throw new Error(errorMessage, {\n cause: {\n prediction: res.content,\n parsed,\n unhandledActions,\n convertedText,\n },\n });\n }\n\n debug('transformActions', JSON.stringify(transformActions, null, 2));\n const log = getSummary(res.content);\n\n conversationHistory.append({\n role: 'assistant',\n content: log,\n });\n\n return {\n actions: transformActions,\n log,\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n shouldContinuePlanning: shouldContinue,\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove [EOS] and replace <bbox> coordinates\n const cleanedText = text.replace(/\\[EOS\\]/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string, size: { width: number; height: number }) {\n const [x, y] = JSON.parse(startBox);\n return [x * size.width, y * size.height];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface LeftDoubleAction extends BaseAction {\n action_type: 'left_double';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface RightSingleAction extends BaseAction {\n action_type: 'right_single';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: Record<string, never>;\n}\n\nexport type Action =\n | ClickAction\n | LeftDoubleAction\n | RightSingleAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction;\n\nexport async function resizeImageForUiTars(\n imageBase64: string,\n size: Size,\n uiTarsVersion: UITarsModelVersion | undefined,\n) {\n if (uiTarsVersion === UITarsModelVersion.V1_5) {\n debug('ui-tars-v1.5, will check image size', size);\n const currentPixels = size.width * size.height;\n const maxPixels = 16384 * 28 * 28; //\n if (currentPixels > maxPixels) {\n const resizeFactor = Math.sqrt(maxPixels / currentPixels);\n const newWidth = Math.floor(size.width * resizeFactor);\n const newHeight = Math.floor(size.height * resizeFactor);\n debug(\n 'resize image for ui-tars, new width: %s, new height: %s',\n newWidth,\n newHeight,\n );\n const resizedImage = await resizeImgBase64(imageBase64, {\n width: newWidth,\n height: newHeight,\n });\n return resizedImage;\n }\n }\n return imageBase64;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","bboxSize","pointToBbox","point","width","height","Math","uiTarsPlanning","userInstruction","options","conversationHistory","context","modelConfig","actionContext","uiTarsModelVersion","instruction","systemPrompt","getUiTarsPlanningPrompt","imagePayload","resizeImageForUiTars","res","callAIWithStringResponse","convertedText","convertBboxToCoordinates","size","parsed","actionParser","JSON","transformActions","unhandledActions","shouldContinue","action","actionType","assert","getPoint","startPoint","endPoint","keys","transformHotkeyInput","console","errorDetails","types","a","errorMessage","Error","log","getSummary","undefined","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","y","cleanedText","startBox","imageBase64","uiTarsVersion","UITarsModelVersion","currentPixels","maxPixels","resizeFactor","newWidth","newHeight","resizedImage","resizeImgBase64"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;ACoBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,WAAW;AACjB,MAAMC,cAAc,CAClBC,OACAC,OACAC,SAEO;QACLC,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGG;QAC5CE,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGI;KAC7C;AAGI,eAAeE,eACpBC,eAAuB,EACvBC,OAKC;IAED,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,WAAW,EAAEC,aAAa,EAAE,GAAGJ;IACrE,MAAM,EAAEK,kBAAkB,EAAE,GAAGF;IAE/B,IAAIG,cAAcP;IAClB,IAAIK,eACFE,cAAc,CAAC,yBAAyB,EAAEF,cAAc,8CAA8C,EAAEL,gBAAgB,mBAAmB,CAAC;IAG9I,MAAMQ,eAAeC,AAAAA,IAAAA,oCAAAA,uBAAAA,AAAAA,MAA4BF;IAEjD,MAAMG,eAAe,MAAMC,qBACzBR,QAAQ,gBAAgB,EACxBA,QAAQ,IAAI,EACZG;IAGFJ,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKQ;gBACP;YACF;SACD;IACH;IAEA,MAAME,MAAM,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAChB;QACE;YACE,MAAM;YACN,SAASL;QACX;WACGN,oBAAoB,QAAQ;KAChC,EACDE;IAEF,MAAMU,gBAAgBC,yBAAyBH,IAAI,OAAO;IAE1D,MAAM,EAAEI,IAAI,EAAE,GAAGb;IACjB,MAAM,EAAEc,MAAM,EAAE,GAAGC,AAAAA,IAAAA,8BAAAA,YAAAA,AAAAA,EAAa;QAC9B,YAAYJ;QACZ,QAAQ;YAAC;YAAM;SAAK;QACpB,eAAe;YACb,OAAOE,KAAK,KAAK;YACjB,QAAQA,KAAK,MAAM;QACrB;QACA,UAAUV;IACZ;IAEAf,MACE,oBACAe,oBACA,YACAa,KAAK,SAAS,CAACF;IAGjB,MAAMG,mBAAqC,EAAE;IAC7C,MAAMC,mBAA6D,EAAE;IACrE,IAAIC,iBAAiB;IACrBL,OAAO,OAAO,CAAC,CAACM;QACd,MAAMC,aAAcD,AAAAA,CAAAA,OAAO,WAAW,IAAI,EAAC,EAAG,WAAW;QACzD,IAAIC,AAAe,YAAfA,YAAwB;YAC1BC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAM5B,QAAQ+B,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YACvDI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQ;wBACN,QAAQG,OAAO,OAAO,IAAI;wBAC1B,MAAM7B,YACJ;4BAAE,GAAGC,KAAK,CAAC,EAAE;4BAAE,GAAGA,KAAK,CAAC,EAAE;wBAAC,GAC3BqB,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;YACF;QACF,OAAO,IAAIQ,AAAe,kBAAfA,YAA8B;YACvCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAM5B,QAAQ+B,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YACvDI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQ;wBACN,QAAQG,OAAO,OAAO,IAAI;wBAC1B,MAAM7B,YACJ;4BAAE,GAAGC,KAAK,CAAC,EAAE;4BAAE,GAAGA,KAAK,CAAC,EAAE;wBAAC,GAC3BqB,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;gBACA,SAASO,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,mBAAfA,YAA+B;YACxCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAM5B,QAAQ+B,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YACvDI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQ;wBACN,QAAQG,OAAO,OAAO,IAAI;wBAC1B,MAAM7B,YACJ;4BAAE,GAAGC,KAAK,CAAC,EAAE;4BAAE,GAAGA,KAAK,CAAC,EAAE;wBAAC,GAC3BqB,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;gBACA,SAASO,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YAAuB;YAChCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCE,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMI,aAAaD,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YAC5D,MAAMY,WAAWF,SAASH,OAAO,aAAa,CAAC,OAAO,EAAEP;YACxDI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,MAAM;wBACJ,QAAQG,OAAO,OAAO,IAAI;wBAC1B,MAAM7B,YACJ;4BAAE,GAAGiC,UAAU,CAAC,EAAE;4BAAE,GAAGA,UAAU,CAAC,EAAE;wBAAC,GACrCX,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;oBACA,IAAI;wBACF,QAAQO,OAAO,OAAO,IAAI;wBAC1B,MAAM7B,YACJ;4BAAE,GAAGkC,QAAQ,CAAC,EAAE;4BAAE,GAAGA,QAAQ,CAAC,EAAE;wBAAC,GACjCZ,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;gBACA,SAASO,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,OAAOG,OAAO,aAAa,CAAC,OAAO;YACrC;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,aAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWG,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,eAAfA,YAA2B;YACpCF,iBAAiB;YACjBF,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO,CAAC;gBACR,SAASG,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,aAAfA,YACT,IAAKD,OAAO,aAAa,CAAC,GAAG,EAItB;YACL,MAAMM,OAAOC,AAAAA,IAAAA,mCAAAA,oBAAAA,AAAAA,EAAqBP,OAAO,aAAa,CAAC,GAAG;YAE1DH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,SAASS,KAAK,IAAI,CAAC;gBACrB;gBACA,SAASN,OAAO,OAAO,IAAI;YAC7B;QACF,OAbEQ,QAAQ,IAAI,CACV;aAaC,IAAIP,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,SAASG,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,YAAY;YAErBH,iBAAiB,IAAI,CAAC;gBACpB,MAAMG;gBACN,SAASD,OAAO,OAAO,IAAI;YAC7B;YACAhC,MAAM,0BAA0BiC,YAAY,YAAYD,OAAO,OAAO;QACxE;IACF;IAEA,IAAIH,AAA4B,MAA5BA,iBAAiB,MAAM,EAAQ;QACjC,MAAMY,eAAyB,EAAE;QAGjC,IAAIf,AAAkB,MAAlBA,OAAO,MAAM,EAAQ;YACvBe,aAAa,IAAI,CAAC;YAGlB,IACEpB,IAAI,OAAO,CAAC,QAAQ,CAAC,eACrB,CAACA,IAAI,OAAO,CAAC,QAAQ,CAAC,YAEtBoB,aAAa,IAAI,CACf;iBAGFA,aAAa,IAAI,CAAC;QAEtB;QAGA,IAAIX,iBAAiB,MAAM,GAAG,GAAG;YAC/B,MAAMY,QAAQZ,iBAAiB,GAAG,CAAC,CAACa,IAAMA,EAAE,IAAI,EAAE,IAAI,CAAC;YACvDF,aAAa,IAAI,CAAC,CAAC,wBAAwB,EAAEC,OAAO;QACtD;QAEA,MAAME,eAAe;YACnB;eACGH;YACH,CAAC,gBAAgB,EAAEpB,IAAI,OAAO,EAAE;SACjC,CAAC,IAAI,CAAC;QAEP,MAAM,IAAIwB,MAAMD,cAAc;YAC5B,OAAO;gBACL,YAAYvB,IAAI,OAAO;gBACvBK;gBACAI;gBACAP;YACF;QACF;IACF;IAEAvB,MAAM,oBAAoB4B,KAAK,SAAS,CAACC,kBAAkB,MAAM;IACjE,MAAMiB,MAAMC,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAW1B,IAAI,OAAO;IAElCV,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAASmC;IACX;IAEA,OAAO;QACL,SAASjB;QACTiB;QACA,OAAOzB,IAAI,KAAK;QAChB,aAAaO,KAAK,SAAS,CAACP,IAAI,OAAO,EAAE2B,QAAW;QACpD,wBAAwBjB;IAC1B;AACF;AAOA,SAASP,yBAAyByB,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAIvD,KAAK,KAAK,CAAEkD,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMG,IAAIxD,KAAK,KAAK,CAAEoD,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEC,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAcf,KAAK,OAAO,CAAC,YAAY;IAC7C,OAAOe,YAAY,OAAO,CAACd,SAASC,cAAc,IAAI;AACxD;AAEA,SAAShB,SAAS8B,QAAgB,EAAExC,IAAuC;IACzE,MAAM,CAACqC,GAAGC,EAAE,GAAGnC,KAAK,KAAK,CAACqC;IAC1B,OAAO;QAACH,IAAIrC,KAAK,KAAK;QAAEsC,IAAItC,KAAK,MAAM;KAAC;AAC1C;AAkFO,eAAeL,qBACpB8C,WAAmB,EACnBzC,IAAU,EACV0C,aAA6C;IAE7C,IAAIA,kBAAkBC,oBAAAA,kBAAAA,CAAAA,IAAuB,EAAE;QAC7CpE,MAAM,uCAAuCyB;QAC7C,MAAM4C,gBAAgB5C,KAAK,KAAK,GAAGA,KAAK,MAAM;QAC9C,MAAM6C,YAAY;QAClB,IAAID,gBAAgBC,WAAW;YAC7B,MAAMC,eAAehE,KAAK,IAAI,CAAC+D,YAAYD;YAC3C,MAAMG,WAAWjE,KAAK,KAAK,CAACkB,KAAK,KAAK,GAAG8C;YACzC,MAAME,YAAYlE,KAAK,KAAK,CAACkB,KAAK,MAAM,GAAG8C;YAC3CvE,MACE,2DACAwE,UACAC;YAEF,MAAMC,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,eAAAA,AAAAA,EAAgBT,aAAa;gBACtD,OAAOM;gBACP,QAAQC;YACV;YACA,OAAOC;QACT;IACF;IACA,OAAOR;AACT"}
|
|
1
|
+
{"version":3,"file":"ai-model/ui-tars-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/ui-tars-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n PlanningAIResponse,\n PlanningAction,\n Size,\n UIContext,\n} from '@/types';\nimport { type IModelConfig, UITarsModelVersion } from '@midscene/shared/env';\nimport { resizeImgBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout';\nimport { assert } from '@midscene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport type { ConversationHistory } from './conversation-history';\nimport { LatestLocateRecorder } from './latest-locate-recorder';\nimport { getSummary, getUiTarsPlanningPrompt } from './prompt/ui-tars-planning';\nimport { callAIWithStringResponse } from './service-caller/index';\n\ntype ActionType =\n | 'click'\n | 'left_double'\n | 'right_single'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait';\n\nconst debug = getDebug('ui-tars-planning');\nconst bboxSize = 10;\nconst pointToBbox = (\n point: { x: number; y: number },\n width: number,\n height: number,\n): [number, number, number, number] => {\n return [\n Math.round(Math.max(point.x - bboxSize / 2, 0)),\n Math.round(Math.max(point.y - bboxSize / 2, 0)),\n Math.round(Math.min(point.x + bboxSize / 2, width)),\n Math.round(Math.min(point.y + bboxSize / 2, height)),\n ];\n};\n\nconst lastLocateRecorder = new LatestLocateRecorder();\n\nexport async function uiTarsPlanning(\n userInstruction: string,\n options: {\n conversationHistory: ConversationHistory;\n context: UIContext;\n modelConfig: IModelConfig;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelConfig, actionContext } = options;\n const { uiTarsModelVersion } = modelConfig;\n\n let instruction = userInstruction;\n if (actionContext) {\n instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\\n<user_instruction>${userInstruction}</user_instruction>`;\n }\n\n const systemPrompt = getUiTarsPlanningPrompt() + instruction;\n\n const imagePayload = await resizeImageForUiTars(\n context.screenshotBase64,\n context.size,\n uiTarsModelVersion,\n );\n\n conversationHistory.append({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n },\n },\n ],\n });\n\n const res = await callAIWithStringResponse(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...conversationHistory.snapshot(),\n ],\n modelConfig,\n );\n const convertedText = convertBboxToCoordinates(res.content);\n\n const { size } = context;\n const { parsed } = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: size.width,\n height: size.height,\n },\n modelVer: uiTarsModelVersion,\n });\n\n debug(\n 'ui-tars modelVer',\n uiTarsModelVersion,\n ', parsed',\n JSON.stringify(parsed),\n );\n\n const transformActions: PlanningAction[] = [];\n const unhandledActions: Array<{ type: string; thought: string }> = [];\n let shouldContinue = true;\n parsed.forEach((action) => {\n const actionType = (action.action_type || '').toLowerCase();\n if (actionType === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n\n const locate = {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n };\n\n lastLocateRecorder.recordLocate(locate, 'click');\n transformActions.push({\n type: 'Tap',\n param: {\n locate: locate,\n },\n });\n } else if (actionType === 'left_double') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n\n const locate = {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n };\n\n lastLocateRecorder.recordLocate(locate, 'left_double');\n transformActions.push({\n type: 'DoubleClick',\n param: {\n locate: locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'right_single') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n\n const locate = {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n };\n\n lastLocateRecorder.recordLocate(locate, 'right_single');\n transformActions.push({\n type: 'RightClick',\n param: {\n locate: locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box, size);\n const endPoint = getPoint(action.action_inputs.end_box, size);\n transformActions.push({\n type: 'DragAndDrop',\n param: {\n from: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: startPoint[0], y: startPoint[1] },\n size.width,\n size.height,\n ),\n },\n to: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: endPoint[0], y: endPoint[1] },\n size.width,\n size.height,\n ),\n },\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'type') {\n const { locate: latestLocate, source } =\n lastLocateRecorder.getLatestLocate();\n debug(\n `use latestLocate from ${source} as locate when Input`,\n latestLocate,\n );\n\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n locate: latestLocate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'finished') {\n shouldContinue = false;\n transformActions.push({\n type: 'Finished',\n param: {},\n thought: action.thought || '',\n });\n } else if (actionType === 'hotkey') {\n if (!action.action_inputs.key) {\n console.warn(\n 'No key found in action: hotkey. Will not perform action.',\n );\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n keyName: keys.join('+'),\n },\n thought: action.thought || '',\n });\n }\n } else if (actionType === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n thought: action.thought || '',\n });\n } else if (actionType) {\n // Track unhandled action types\n unhandledActions.push({\n type: actionType,\n thought: action.thought || '',\n });\n debug('Unhandled action type:', actionType, 'thought:', action.thought);\n }\n });\n\n if (transformActions.length === 0) {\n const errorDetails: string[] = [];\n\n // Check if parsing failed\n if (parsed.length === 0) {\n errorDetails.push('Action parser returned no actions');\n\n // Check if response has Thought but no Action\n if (\n res.content.includes('Thought:') &&\n !res.content.includes('Action:')\n ) {\n errorDetails.push(\n 'Response contains \"Thought:\" but missing \"Action:\" line',\n );\n } else {\n errorDetails.push('Response may be malformed or empty');\n }\n }\n\n // Check if we have unhandled action types\n if (unhandledActions.length > 0) {\n const types = unhandledActions.map((a) => a.type).join(', ');\n errorDetails.push(`Unhandled action types: ${types}`);\n }\n\n const errorMessage = [\n 'No actions found in UI-TARS response.',\n ...errorDetails,\n `\\nRaw response: ${res.content}`,\n ].join('\\n');\n\n throw new Error(errorMessage, {\n cause: {\n prediction: res.content,\n parsed,\n unhandledActions,\n convertedText,\n },\n });\n }\n\n debug('transformActions', JSON.stringify(transformActions, null, 2));\n const log = getSummary(res.content);\n\n conversationHistory.append({\n role: 'assistant',\n content: log,\n });\n\n return {\n actions: transformActions,\n log,\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n more_actions_needed_by_instruction: shouldContinue,\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove [EOS] and replace <bbox> coordinates\n const cleanedText = text.replace(/\\[EOS\\]/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string, size: { width: number; height: number }) {\n const [x, y] = JSON.parse(startBox);\n return [x * size.width, y * size.height];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface LeftDoubleAction extends BaseAction {\n action_type: 'left_double';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface RightSingleAction extends BaseAction {\n action_type: 'right_single';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: Record<string, never>;\n}\n\nexport type Action =\n | ClickAction\n | LeftDoubleAction\n | RightSingleAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction;\n\nexport async function resizeImageForUiTars(\n imageBase64: string,\n size: Size,\n uiTarsVersion: UITarsModelVersion | undefined,\n) {\n if (uiTarsVersion === UITarsModelVersion.V1_5) {\n debug('ui-tars-v1.5, will check image size', size);\n const currentPixels = size.width * size.height;\n const maxPixels = 16384 * 28 * 28; //\n if (currentPixels > maxPixels) {\n const resizeFactor = Math.sqrt(maxPixels / currentPixels);\n const newWidth = Math.floor(size.width * resizeFactor);\n const newHeight = Math.floor(size.height * resizeFactor);\n debug(\n 'resize image for ui-tars, new width: %s, new height: %s',\n newWidth,\n newHeight,\n );\n const resizedImage = await resizeImgBase64(imageBase64, {\n width: newWidth,\n height: newHeight,\n });\n return resizedImage;\n }\n }\n return imageBase64;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","bboxSize","pointToBbox","point","width","height","Math","lastLocateRecorder","LatestLocateRecorder","uiTarsPlanning","userInstruction","options","conversationHistory","context","modelConfig","actionContext","uiTarsModelVersion","instruction","systemPrompt","getUiTarsPlanningPrompt","imagePayload","resizeImageForUiTars","res","callAIWithStringResponse","convertedText","convertBboxToCoordinates","size","parsed","actionParser","JSON","transformActions","unhandledActions","shouldContinue","action","actionType","assert","getPoint","locate","startPoint","endPoint","latestLocate","source","keys","transformHotkeyInput","console","errorDetails","types","a","errorMessage","Error","log","getSummary","undefined","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","y","cleanedText","startBox","imageBase64","uiTarsVersion","UITarsModelVersion","currentPixels","maxPixels","resizeFactor","newWidth","newHeight","resizedImage","resizeImgBase64"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;ACsBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,WAAW;AACjB,MAAMC,cAAc,CAClBC,OACAC,OACAC,SAEO;QACLC,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGG;QAC5CE,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGI;KAC7C;AAGH,MAAME,qBAAqB,IAAIC,mDAAAA,oBAAoBA;AAE5C,eAAeC,eACpBC,eAAuB,EACvBC,OAKC;IAED,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,WAAW,EAAEC,aAAa,EAAE,GAAGJ;IACrE,MAAM,EAAEK,kBAAkB,EAAE,GAAGF;IAE/B,IAAIG,cAAcP;IAClB,IAAIK,eACFE,cAAc,CAAC,yBAAyB,EAAEF,cAAc,8CAA8C,EAAEL,gBAAgB,mBAAmB,CAAC;IAG9I,MAAMQ,eAAeC,AAAAA,IAAAA,oCAAAA,uBAAAA,AAAAA,MAA4BF;IAEjD,MAAMG,eAAe,MAAMC,qBACzBR,QAAQ,gBAAgB,EACxBA,QAAQ,IAAI,EACZG;IAGFJ,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKQ;gBACP;YACF;SACD;IACH;IAEA,MAAME,MAAM,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAChB;QACE;YACE,MAAM;YACN,SAASL;QACX;WACGN,oBAAoB,QAAQ;KAChC,EACDE;IAEF,MAAMU,gBAAgBC,yBAAyBH,IAAI,OAAO;IAE1D,MAAM,EAAEI,IAAI,EAAE,GAAGb;IACjB,MAAM,EAAEc,MAAM,EAAE,GAAGC,AAAAA,IAAAA,8BAAAA,YAAAA,AAAAA,EAAa;QAC9B,YAAYJ;QACZ,QAAQ;YAAC;YAAM;SAAK;QACpB,eAAe;YACb,OAAOE,KAAK,KAAK;YACjB,QAAQA,KAAK,MAAM;QACrB;QACA,UAAUV;IACZ;IAEAjB,MACE,oBACAiB,oBACA,YACAa,KAAK,SAAS,CAACF;IAGjB,MAAMG,mBAAqC,EAAE;IAC7C,MAAMC,mBAA6D,EAAE;IACrE,IAAIC,iBAAiB;IACrBL,OAAO,OAAO,CAAC,CAACM;QACd,MAAMC,aAAcD,AAAAA,CAAAA,OAAO,WAAW,IAAI,EAAC,EAAG,WAAW;QACzD,IAAIC,AAAe,YAAfA,YAAwB;YAC1BC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAM9B,QAAQiC,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YAEvD,MAAMW,SAAS;gBACb,QAAQJ,OAAO,OAAO,IAAI;gBAC1B,MAAM/B,YACJ;oBAAE,GAAGC,KAAK,CAAC,EAAE;oBAAE,GAAGA,KAAK,CAAC,EAAE;gBAAC,GAC3BuB,KAAK,KAAK,EACVA,KAAK,MAAM;YAEf;YAEAnB,mBAAmB,YAAY,CAAC8B,QAAQ;YACxCP,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQO;gBACV;YACF;QACF,OAAO,IAAIH,AAAe,kBAAfA,YAA8B;YACvCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAM9B,QAAQiC,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YAEvD,MAAMW,SAAS;gBACb,QAAQJ,OAAO,OAAO,IAAI;gBAC1B,MAAM/B,YACJ;oBAAE,GAAGC,KAAK,CAAC,EAAE;oBAAE,GAAGA,KAAK,CAAC,EAAE;gBAAC,GAC3BuB,KAAK,KAAK,EACVA,KAAK,MAAM;YAEf;YAEAnB,mBAAmB,YAAY,CAAC8B,QAAQ;YACxCP,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQO;gBACV;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,mBAAfA,YAA+B;YACxCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAM9B,QAAQiC,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YAEvD,MAAMW,SAAS;gBACb,QAAQJ,OAAO,OAAO,IAAI;gBAC1B,MAAM/B,YACJ;oBAAE,GAAGC,KAAK,CAAC,EAAE;oBAAE,GAAGA,KAAK,CAAC,EAAE;gBAAC,GAC3BuB,KAAK,KAAK,EACVA,KAAK,MAAM;YAEf;YAEAnB,mBAAmB,YAAY,CAAC8B,QAAQ;YACxCP,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQO;gBACV;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YAAuB;YAChCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCE,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMK,aAAaF,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YAC5D,MAAMa,WAAWH,SAASH,OAAO,aAAa,CAAC,OAAO,EAAEP;YACxDI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,MAAM;wBACJ,QAAQG,OAAO,OAAO,IAAI;wBAC1B,MAAM/B,YACJ;4BAAE,GAAGoC,UAAU,CAAC,EAAE;4BAAE,GAAGA,UAAU,CAAC,EAAE;wBAAC,GACrCZ,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;oBACA,IAAI;wBACF,QAAQO,OAAO,OAAO,IAAI;wBAC1B,MAAM/B,YACJ;4BAAE,GAAGqC,QAAQ,CAAC,EAAE;4BAAE,GAAGA,QAAQ,CAAC,EAAE;wBAAC,GACjCb,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;gBACA,SAASO,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YAAuB;YAChC,MAAM,EAAE,QAAQM,YAAY,EAAEC,MAAM,EAAE,GACpClC,mBAAmB,eAAe;YACpCR,MACE,CAAC,sBAAsB,EAAE0C,OAAO,qBAAqB,CAAC,EACtDD;YAGFV,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,OAAOG,OAAO,aAAa,CAAC,OAAO;oBACnC,QAAQO;gBACV;gBACA,SAASP,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,aAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWG,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,eAAfA,YAA2B;YACpCF,iBAAiB;YACjBF,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO,CAAC;gBACR,SAASG,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,aAAfA,YACT,IAAKD,OAAO,aAAa,CAAC,GAAG,EAItB;YACL,MAAMS,OAAOC,AAAAA,IAAAA,mCAAAA,oBAAAA,AAAAA,EAAqBV,OAAO,aAAa,CAAC,GAAG;YAE1DH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,SAASY,KAAK,IAAI,CAAC;gBACrB;gBACA,SAAST,OAAO,OAAO,IAAI;YAC7B;QACF,OAbEW,QAAQ,IAAI,CACV;aAaC,IAAIV,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,SAASG,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,YAAY;YAErBH,iBAAiB,IAAI,CAAC;gBACpB,MAAMG;gBACN,SAASD,OAAO,OAAO,IAAI;YAC7B;YACAlC,MAAM,0BAA0BmC,YAAY,YAAYD,OAAO,OAAO;QACxE;IACF;IAEA,IAAIH,AAA4B,MAA5BA,iBAAiB,MAAM,EAAQ;QACjC,MAAMe,eAAyB,EAAE;QAGjC,IAAIlB,AAAkB,MAAlBA,OAAO,MAAM,EAAQ;YACvBkB,aAAa,IAAI,CAAC;YAGlB,IACEvB,IAAI,OAAO,CAAC,QAAQ,CAAC,eACrB,CAACA,IAAI,OAAO,CAAC,QAAQ,CAAC,YAEtBuB,aAAa,IAAI,CACf;iBAGFA,aAAa,IAAI,CAAC;QAEtB;QAGA,IAAId,iBAAiB,MAAM,GAAG,GAAG;YAC/B,MAAMe,QAAQf,iBAAiB,GAAG,CAAC,CAACgB,IAAMA,EAAE,IAAI,EAAE,IAAI,CAAC;YACvDF,aAAa,IAAI,CAAC,CAAC,wBAAwB,EAAEC,OAAO;QACtD;QAEA,MAAME,eAAe;YACnB;eACGH;YACH,CAAC,gBAAgB,EAAEvB,IAAI,OAAO,EAAE;SACjC,CAAC,IAAI,CAAC;QAEP,MAAM,IAAI2B,MAAMD,cAAc;YAC5B,OAAO;gBACL,YAAY1B,IAAI,OAAO;gBACvBK;gBACAI;gBACAP;YACF;QACF;IACF;IAEAzB,MAAM,oBAAoB8B,KAAK,SAAS,CAACC,kBAAkB,MAAM;IACjE,MAAMoB,MAAMC,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAW7B,IAAI,OAAO;IAElCV,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAASsC;IACX;IAEA,OAAO;QACL,SAASpB;QACToB;QACA,OAAO5B,IAAI,KAAK;QAChB,aAAaO,KAAK,SAAS,CAACP,IAAI,OAAO,EAAE8B,QAAW;QACpD,oCAAoCpB;IACtC;AACF;AAOA,SAASP,yBAAyB4B,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAI5D,KAAK,KAAK,CAAEuD,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMG,IAAI7D,KAAK,KAAK,CAAEyD,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEC,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAcf,KAAK,OAAO,CAAC,YAAY;IAC7C,OAAOe,YAAY,OAAO,CAACd,SAASC,cAAc,IAAI;AACxD;AAEA,SAASnB,SAASiC,QAAgB,EAAE3C,IAAuC;IACzE,MAAM,CAACwC,GAAGC,EAAE,GAAGtC,KAAK,KAAK,CAACwC;IAC1B,OAAO;QAACH,IAAIxC,KAAK,KAAK;QAAEyC,IAAIzC,KAAK,MAAM;KAAC;AAC1C;AAkFO,eAAeL,qBACpBiD,WAAmB,EACnB5C,IAAU,EACV6C,aAA6C;IAE7C,IAAIA,kBAAkBC,oBAAAA,kBAAAA,CAAAA,IAAuB,EAAE;QAC7CzE,MAAM,uCAAuC2B;QAC7C,MAAM+C,gBAAgB/C,KAAK,KAAK,GAAGA,KAAK,MAAM;QAC9C,MAAMgD,YAAY;QAClB,IAAID,gBAAgBC,WAAW;YAC7B,MAAMC,eAAerE,KAAK,IAAI,CAACoE,YAAYD;YAC3C,MAAMG,WAAWtE,KAAK,KAAK,CAACoB,KAAK,KAAK,GAAGiD;YACzC,MAAME,YAAYvE,KAAK,KAAK,CAACoB,KAAK,MAAM,GAAGiD;YAC3C5E,MACE,2DACA6E,UACAC;YAEF,MAAMC,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,eAAAA,AAAAA,EAAgBT,aAAa;gBACtD,OAAOM;gBACP,QAAQC;YACV;YACA,OAAOC;QACT;IACF;IACA,OAAOR;AACT"}
|
package/dist/lib/common.js
CHANGED
|
@@ -24,7 +24,6 @@ var __webpack_require__ = {};
|
|
|
24
24
|
var __webpack_exports__ = {};
|
|
25
25
|
__webpack_require__.r(__webpack_exports__);
|
|
26
26
|
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
-
loadActionParam: ()=>loadActionParam,
|
|
28
27
|
dumpMidsceneLocatorField: ()=>dumpMidsceneLocatorField,
|
|
29
28
|
TUserPromptSchema: ()=>TUserPromptSchema,
|
|
30
29
|
buildYamlFlowFromPlans: ()=>buildYamlFlowFromPlans,
|
|
@@ -32,7 +31,6 @@ __webpack_require__.d(__webpack_exports__, {
|
|
|
32
31
|
adaptGeminiBbox: ()=>adaptGeminiBbox,
|
|
33
32
|
parseActionParam: ()=>parseActionParam,
|
|
34
33
|
adaptBboxToRect: ()=>adaptBboxToRect,
|
|
35
|
-
getReadableTimeString: ()=>getReadableTimeString,
|
|
36
34
|
expandSearchArea: ()=>expandSearchArea,
|
|
37
35
|
mergeRects: ()=>mergeRects,
|
|
38
36
|
normalized01000: ()=>normalized01000,
|
|
@@ -42,7 +40,7 @@ __webpack_require__.d(__webpack_exports__, {
|
|
|
42
40
|
getMidsceneLocationSchema: ()=>getMidsceneLocationSchema,
|
|
43
41
|
adaptDoubaoBbox: ()=>adaptDoubaoBbox,
|
|
44
42
|
fillBboxParam: ()=>fillBboxParam,
|
|
45
|
-
|
|
43
|
+
loadActionParam: ()=>loadActionParam,
|
|
46
44
|
TMultimodalPromptSchema: ()=>TMultimodalPromptSchema,
|
|
47
45
|
PointSchema: ()=>PointSchema,
|
|
48
46
|
RectSchema: ()=>RectSchema,
|
|
@@ -230,7 +228,7 @@ async function markupImageForLLM(screenshotBase64, tree, size) {
|
|
|
230
228
|
});
|
|
231
229
|
return imagePayload;
|
|
232
230
|
}
|
|
233
|
-
function buildYamlFlowFromPlans(plans, actionSpace) {
|
|
231
|
+
function buildYamlFlowFromPlans(plans, actionSpace, sleep) {
|
|
234
232
|
const flow = [];
|
|
235
233
|
for (const plan of plans){
|
|
236
234
|
const verb = plan.type;
|
|
@@ -247,6 +245,9 @@ function buildYamlFlowFromPlans(plans, actionSpace) {
|
|
|
247
245
|
};
|
|
248
246
|
flow.push(flowItem);
|
|
249
247
|
}
|
|
248
|
+
if (sleep) flow.push({
|
|
249
|
+
sleep
|
|
250
|
+
});
|
|
250
251
|
return flow;
|
|
251
252
|
}
|
|
252
253
|
const PointSchema = external_zod_namespaceObject.z.object({
|
|
@@ -331,6 +332,7 @@ const findAllMidsceneLocatorField = (zodType, requiredOnly)=>{
|
|
|
331
332
|
return [];
|
|
332
333
|
};
|
|
333
334
|
const dumpActionParam = (jsonObject, zodSchema)=>{
|
|
335
|
+
if (!(0, utils_namespaceObject.isPlainObject)(jsonObject)) return {};
|
|
334
336
|
const locatorFields = findAllMidsceneLocatorField(zodSchema);
|
|
335
337
|
const result = {
|
|
336
338
|
...jsonObject
|
|
@@ -379,18 +381,6 @@ const parseActionParam = (rawParam, zodSchema)=>{
|
|
|
379
381
|
for(const fieldName in locateFieldValues)validated[fieldName] = locateFieldValues[fieldName];
|
|
380
382
|
return validated;
|
|
381
383
|
};
|
|
382
|
-
const finalizeActionName = 'Finalize';
|
|
383
|
-
const getReadableTimeString = (format = 'YYYY-MM-DD HH:mm:ss')=>{
|
|
384
|
-
const now = new Date();
|
|
385
|
-
const year = now.getFullYear();
|
|
386
|
-
const month = String(now.getMonth() + 1).padStart(2, '0');
|
|
387
|
-
const day = String(now.getDate()).padStart(2, '0');
|
|
388
|
-
const hours = String(now.getHours()).padStart(2, '0');
|
|
389
|
-
const minutes = String(now.getMinutes()).padStart(2, '0');
|
|
390
|
-
const seconds = String(now.getSeconds()).padStart(2, '0');
|
|
391
|
-
const timeString = format.replace('YYYY', String(year)).replace('MM', month).replace('DD', day).replace('HH', hours).replace('mm', minutes).replace('ss', seconds);
|
|
392
|
-
return `${timeString} (${format})`;
|
|
393
|
-
};
|
|
394
384
|
exports.PointSchema = __webpack_exports__.PointSchema;
|
|
395
385
|
exports.RectSchema = __webpack_exports__.RectSchema;
|
|
396
386
|
exports.SizeSchema = __webpack_exports__.SizeSchema;
|
|
@@ -406,10 +396,8 @@ exports.dumpActionParam = __webpack_exports__.dumpActionParam;
|
|
|
406
396
|
exports.dumpMidsceneLocatorField = __webpack_exports__.dumpMidsceneLocatorField;
|
|
407
397
|
exports.expandSearchArea = __webpack_exports__.expandSearchArea;
|
|
408
398
|
exports.fillBboxParam = __webpack_exports__.fillBboxParam;
|
|
409
|
-
exports.finalizeActionName = __webpack_exports__.finalizeActionName;
|
|
410
399
|
exports.findAllMidsceneLocatorField = __webpack_exports__.findAllMidsceneLocatorField;
|
|
411
400
|
exports.getMidsceneLocationSchema = __webpack_exports__.getMidsceneLocationSchema;
|
|
412
|
-
exports.getReadableTimeString = __webpack_exports__.getReadableTimeString;
|
|
413
401
|
exports.ifMidsceneLocatorField = __webpack_exports__.ifMidsceneLocatorField;
|
|
414
402
|
exports.loadActionParam = __webpack_exports__.loadActionParam;
|
|
415
403
|
exports.markupImageForLLM = __webpack_exports__.markupImageForLLM;
|
|
@@ -432,10 +420,8 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
|
|
|
432
420
|
"dumpMidsceneLocatorField",
|
|
433
421
|
"expandSearchArea",
|
|
434
422
|
"fillBboxParam",
|
|
435
|
-
"finalizeActionName",
|
|
436
423
|
"findAllMidsceneLocatorField",
|
|
437
424
|
"getMidsceneLocationSchema",
|
|
438
|
-
"getReadableTimeString",
|
|
439
425
|
"ifMidsceneLocatorField",
|
|
440
426
|
"loadActionParam",
|
|
441
427
|
"markupImageForLLM",
|