@midscene/core 0.26.7-beta-20250818035341.0 → 0.26.7-beta-20250818081955.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/ai-model/action-executor.mjs +0 -8
- package/dist/es/ai-model/action-executor.mjs.map +1 -1
- package/dist/es/ai-model/common.mjs +16 -37
- package/dist/es/ai-model/common.mjs.map +1 -1
- package/dist/es/ai-model/index.mjs +4 -4
- package/dist/es/ai-model/inspect.mjs +2 -51
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +1 -1
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/assertion.mjs +1 -25
- package/dist/es/ai-model/prompt/assertion.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +6 -3
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/index.mjs +2 -2
- package/dist/es/index.mjs.map +1 -1
- package/dist/es/insight/index.mjs +1 -36
- package/dist/es/insight/index.mjs.map +1 -1
- package/dist/es/insight/utils.mjs +1 -3
- package/dist/es/insight/utils.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/ai-model/action-executor.js +0 -8
- package/dist/lib/ai-model/action-executor.js.map +1 -1
- package/dist/lib/ai-model/common.js +18 -36
- package/dist/lib/ai-model/common.js.map +1 -1
- package/dist/lib/ai-model/index.js +7 -4
- package/dist/lib/ai-model/inspect.js +2 -54
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +1 -1
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/assertion.js +2 -29
- package/dist/lib/ai-model/prompt/assertion.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +6 -3
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/index.js +0 -3
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/insight/index.js +0 -35
- package/dist/lib/insight/index.js.map +1 -1
- package/dist/lib/insight/utils.js +1 -3
- package/dist/lib/insight/utils.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/ai-model/common.d.ts +3 -2
- package/dist/types/ai-model/index.d.ts +3 -2
- package/dist/types/ai-model/inspect.d.ts +1 -8
- package/dist/types/ai-model/prompt/assertion.d.ts +0 -3
- package/dist/types/index.d.ts +1 -1
- package/dist/types/insight/index.d.ts +1 -2
- package/dist/types/types.d.ts +4 -2
- package/dist/types/yaml.d.ts +6 -6
- package/package.json +3 -3
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/service-caller/index.mjs","sources":["webpack://@midscene/core/./src/ai-model/service-caller/index.ts"],"sourcesContent":["import { AIResponseFormat, type AIUsageInfo } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport { Anthropic } from '@anthropic-ai/sdk';\nimport {\n DefaultAzureCredential,\n getBearerTokenProvider,\n} from '@azure/identity';\nimport {\n ANTHROPIC_API_KEY,\n AZURE_OPENAI_API_VERSION,\n AZURE_OPENAI_DEPLOYMENT,\n AZURE_OPENAI_ENDPOINT,\n AZURE_OPENAI_KEY,\n MIDSCENE_API_TYPE,\n MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,\n MIDSCENE_AZURE_OPENAI_SCOPE,\n MIDSCENE_DEBUG_AI_PROFILE,\n MIDSCENE_DEBUG_AI_RESPONSE,\n MIDSCENE_LANGSMITH_DEBUG,\n MIDSCENE_MODEL_NAME,\n MIDSCENE_OPENAI_HTTP_PROXY,\n MIDSCENE_OPENAI_INIT_CONFIG_JSON,\n MIDSCENE_OPENAI_SOCKS_PROXY,\n MIDSCENE_USE_ANTHROPIC_SDK,\n MIDSCENE_USE_AZURE_OPENAI,\n OPENAI_API_KEY,\n OPENAI_BASE_URL,\n OPENAI_MAX_TOKENS,\n OPENAI_USE_AZURE,\n getAIConfig,\n getAIConfigInBoolean,\n getAIConfigInJson,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { parseBase64 } from '@midscene/shared/img';\nimport { enableDebug, getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ifInBrowser } from '@midscene/shared/utils';\nimport { HttpsProxyAgent } from 'https-proxy-agent';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI, { AzureOpenAI } from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport { SocksProxyAgent } from 'socks-proxy-agent';\nimport { AIActionType, type AIArgs } from '../common';\nimport { assertSchema } from '../prompt/assertion';\nimport { locatorSchema } from '../prompt/llm-locator';\nimport { planSchema } from '../prompt/llm-planning';\n\nexport function checkAIConfig() {\n const openaiKey = getAIConfig(OPENAI_API_KEY);\n const azureConfig = getAIConfig(MIDSCENE_USE_AZURE_OPENAI);\n const anthropicKey = getAIConfig(ANTHROPIC_API_KEY);\n const initConfigJson = getAIConfig(MIDSCENE_OPENAI_INIT_CONFIG_JSON);\n\n if (openaiKey) return true;\n if (azureConfig) return true;\n if (anthropicKey) return true;\n\n return Boolean(initConfigJson);\n}\n\n// if debug config is initialized\nlet debugConfigInitialized = false;\n\nfunction initDebugConfig() {\n // if debug config is initialized, return\n if (debugConfigInitialized) return;\n\n const shouldPrintTiming = getAIConfigInBoolean(MIDSCENE_DEBUG_AI_PROFILE);\n let debugConfig = '';\n if (shouldPrintTiming) {\n console.warn(\n 'MIDSCENE_DEBUG_AI_PROFILE is deprecated, use DEBUG=midscene:ai:profile instead',\n );\n debugConfig = 'ai:profile';\n }\n const shouldPrintAIResponse = getAIConfigInBoolean(\n MIDSCENE_DEBUG_AI_RESPONSE,\n );\n if (shouldPrintAIResponse) {\n console.warn(\n 'MIDSCENE_DEBUG_AI_RESPONSE is deprecated, use DEBUG=midscene:ai:response instead',\n );\n if (debugConfig) {\n debugConfig = 'ai:*';\n } else {\n debugConfig = 'ai:call';\n }\n }\n if (debugConfig) {\n enableDebug(debugConfig);\n }\n\n // mark as initialized\n debugConfigInitialized = true;\n}\n\n// default model\nconst defaultModel = 'gpt-4o';\nexport function getModelName() {\n let modelName = defaultModel;\n const nameInConfig = getAIConfig(MIDSCENE_MODEL_NAME);\n if (nameInConfig) {\n modelName = nameInConfig;\n }\n return modelName;\n}\n\nasync function createChatClient({\n AIActionTypeValue,\n}: {\n AIActionTypeValue: AIActionType;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n style: 'openai' | 'anthropic';\n}> {\n initDebugConfig();\n let openai: OpenAI | AzureOpenAI | undefined;\n const extraConfig = getAIConfigInJson(MIDSCENE_OPENAI_INIT_CONFIG_JSON);\n\n const socksProxy = getAIConfig(MIDSCENE_OPENAI_SOCKS_PROXY);\n const httpProxy = getAIConfig(MIDSCENE_OPENAI_HTTP_PROXY);\n\n let proxyAgent = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n if (httpProxy) {\n debugProxy('using http proxy', httpProxy);\n proxyAgent = new HttpsProxyAgent(httpProxy);\n } else if (socksProxy) {\n debugProxy('using socks proxy', socksProxy);\n proxyAgent = new SocksProxyAgent(socksProxy);\n }\n\n if (getAIConfig(OPENAI_USE_AZURE)) {\n // this is deprecated\n openai = new AzureOpenAI({\n baseURL: getAIConfig(OPENAI_BASE_URL),\n apiKey: getAIConfig(OPENAI_API_KEY),\n httpAgent: proxyAgent,\n ...extraConfig,\n dangerouslyAllowBrowser: true,\n }) as OpenAI;\n } else if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI)) {\n const extraAzureConfig = getAIConfigInJson(\n MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,\n );\n\n // https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart?tabs=bash%2Cjavascript-key%2Ctypescript-keyless%2Cpython&pivots=programming-language-javascript#rest-api\n // keyless authentication\n const scope = getAIConfig(MIDSCENE_AZURE_OPENAI_SCOPE);\n let tokenProvider: any = undefined;\n if (scope) {\n assert(\n !ifInBrowser,\n 'Azure OpenAI is not supported in browser with Midscene.',\n );\n const credential = new DefaultAzureCredential();\n\n assert(scope, 'MIDSCENE_AZURE_OPENAI_SCOPE is required');\n tokenProvider = getBearerTokenProvider(credential, scope);\n\n openai = new AzureOpenAI({\n azureADTokenProvider: tokenProvider,\n endpoint: getAIConfig(AZURE_OPENAI_ENDPOINT),\n apiVersion: getAIConfig(AZURE_OPENAI_API_VERSION),\n deployment: getAIConfig(AZURE_OPENAI_DEPLOYMENT),\n ...extraConfig,\n ...extraAzureConfig,\n });\n } else {\n // endpoint, apiKey, apiVersion, deployment\n openai = new AzureOpenAI({\n apiKey: getAIConfig(AZURE_OPENAI_KEY),\n endpoint: getAIConfig(AZURE_OPENAI_ENDPOINT),\n apiVersion: getAIConfig(AZURE_OPENAI_API_VERSION),\n deployment: getAIConfig(AZURE_OPENAI_DEPLOYMENT),\n dangerouslyAllowBrowser: true,\n ...extraConfig,\n ...extraAzureConfig,\n });\n }\n } else if (!getAIConfig(MIDSCENE_USE_ANTHROPIC_SDK)) {\n const baseURL = getAIConfig(OPENAI_BASE_URL);\n if (typeof baseURL === 'string') {\n if (!/^https?:\\/\\//.test(baseURL)) {\n throw new Error(\n `OPENAI_BASE_URL must be a valid URL starting with http:// or https://, but got: ${baseURL}\\nPlease check your config.`,\n );\n }\n }\n\n openai = new OpenAI({\n baseURL: getAIConfig(OPENAI_BASE_URL),\n apiKey: getAIConfig(OPENAI_API_KEY),\n httpAgent: proxyAgent,\n ...extraConfig,\n defaultHeaders: {\n ...(extraConfig?.defaultHeaders || {}),\n [MIDSCENE_API_TYPE]: AIActionTypeValue.toString(),\n },\n dangerouslyAllowBrowser: true,\n });\n }\n\n if (openai && getAIConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n const { wrapOpenAI } = await import('langsmith/wrappers');\n openai = wrapOpenAI(openai);\n }\n\n if (typeof openai !== 'undefined') {\n return {\n completion: openai.chat.completions,\n style: 'openai',\n };\n }\n\n // Anthropic\n if (getAIConfig(MIDSCENE_USE_ANTHROPIC_SDK)) {\n const apiKey = getAIConfig(ANTHROPIC_API_KEY);\n assert(apiKey, 'ANTHROPIC_API_KEY is required');\n openai = new Anthropic({\n apiKey,\n httpAgent: proxyAgent,\n dangerouslyAllowBrowser: true,\n }) as any;\n }\n\n if (typeof openai !== 'undefined' && (openai as any).messages) {\n return {\n completion: (openai as any).messages,\n style: 'anthropic',\n };\n }\n\n throw new Error('Openai SDK or Anthropic SDK is not initialized');\n}\n\nexport async function call(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n responseFormat?:\n | OpenAI.ChatCompletionCreateParams['response_format']\n | OpenAI.ResponseFormatJSONObject,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n },\n): Promise<{ content: string; usage?: AIUsageInfo; isStreamed: boolean }> {\n assert(\n checkAIConfig(),\n 'Cannot find config for AI model service. If you are using a self-hosted model without validating the API key, please set `OPENAI_API_KEY` to any non-null value. https://midscenejs.com/model-provider.html',\n );\n\n const { completion, style } = await createChatClient({\n AIActionTypeValue,\n });\n\n const maxTokens = getAIConfig(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n const model = getModelName();\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n\n const commonConfig = {\n temperature: vlLocateMode() === 'vlm-ui-tars' ? 0.0 : 0.1,\n stream: !!isStreaming,\n max_tokens:\n typeof maxTokens === 'number'\n ? maxTokens\n : Number.parseInt(maxTokens || '2048', 10),\n ...(vlLocateMode() === 'qwen-vl' // qwen specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n try {\n if (style === 'openai') {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${model}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: {\n prompt_tokens: usage.prompt_tokens ?? 0,\n completion_tokens: usage.completion_tokens ?? 0,\n total_tokens: usage.total_tokens ?? 0,\n time_cost: timeCost ?? 0,\n },\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${model}, mode, ${vlLocateMode() || 'default'}, cost-ms, ${timeCost}`,\n );\n } else {\n const result = await completion.create({\n model,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${model}, mode, ${vlLocateMode() || 'default'}, ui-tars-version, ${uiTarsModelVersion()}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`,\n );\n\n debugProfileDetail(\n `model usage detail: ${JSON.stringify(result.usage)}`,\n );\n\n assert(\n result.choices,\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n content = result.choices[0].message.content!;\n usage = result.usage;\n }\n\n debugCall(`response: ${content}`);\n assert(content, 'empty content');\n } else if (style === 'anthropic') {\n const convertImageContent = (content: any) => {\n if (content.type === 'image_url') {\n const imgBase64 = content.image_url.url;\n assert(imgBase64, 'image_url is required');\n const { mimeType, body } = parseBase64(content.image_url.url);\n return {\n source: {\n type: 'base64',\n media_type: mimeType,\n data: body,\n },\n type: 'image',\n };\n }\n return content;\n };\n\n if (isStreaming) {\n const stream = (await completion.create({\n model,\n system: 'You are a versatile professional in software UI automation',\n messages: messages.map((m) => ({\n role: 'user',\n content: Array.isArray(m.content)\n ? (m.content as any).map(convertImageContent)\n : m.content,\n })),\n response_format: responseFormat,\n ...commonConfig,\n } as any)) as any;\n\n for await (const chunk of stream) {\n const content = chunk.delta?.text || '';\n if (content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n accumulated,\n reasoning_content: '',\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.type === 'message_stop') {\n timeCost = Date.now() - startTime;\n const anthropicUsage = chunk.usage;\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: anthropicUsage\n ? {\n prompt_tokens: anthropicUsage.input_tokens ?? 0,\n completion_tokens: anthropicUsage.output_tokens ?? 0,\n total_tokens:\n (anthropicUsage.input_tokens ?? 0) +\n (anthropicUsage.output_tokens ?? 0),\n time_cost: timeCost ?? 0,\n }\n : undefined,\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n } else {\n const result = await completion.create({\n model,\n system: 'You are a versatile professional in software UI automation',\n messages: messages.map((m) => ({\n role: 'user',\n content: Array.isArray(m.content)\n ? (m.content as any).map(convertImageContent)\n : m.content,\n })),\n response_format: responseFormat,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n content = (result as any).content[0].text as string;\n usage = result.usage;\n }\n\n assert(content, 'empty content');\n }\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n return {\n content: content || '',\n usage: usage\n ? {\n prompt_tokens: usage.prompt_tokens ?? 0,\n completion_tokens: usage.completion_tokens ?? 0,\n total_tokens: usage.total_tokens ?? 0,\n time_cost: timeCost ?? 0,\n }\n : undefined,\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport async function callToGetJSONObject<T>(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n): Promise<{ content: T; usage?: AIUsageInfo }> {\n let responseFormat:\n | OpenAI.ChatCompletionCreateParams['response_format']\n | OpenAI.ResponseFormatJSONObject\n | undefined;\n\n const model = getModelName();\n\n if (model.includes('gpt-4')) {\n switch (AIActionTypeValue) {\n case AIActionType.ASSERT:\n responseFormat = assertSchema;\n break;\n case AIActionType.INSPECT_ELEMENT:\n responseFormat = locatorSchema;\n break;\n case AIActionType.PLAN:\n responseFormat = planSchema;\n break;\n case AIActionType.EXTRACT_DATA:\n case AIActionType.DESCRIBE_ELEMENT:\n responseFormat = { type: AIResponseFormat.JSON };\n break;\n }\n }\n\n // gpt-4o-2024-05-13 only supports json_object response format\n if (model === 'gpt-4o-2024-05-13') {\n responseFormat = { type: AIResponseFormat.JSON };\n }\n\n const response = await call(messages, AIActionTypeValue, responseFormat);\n assert(response, 'empty response');\n const jsonContent = safeParseJson(response.content);\n return { content: jsonContent, usage: response.usage };\n}\n\nexport async function callAiFnWithStringResponse<T>(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await call(msgs, AIActionTypeValue);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nexport function safeParseJson(input: string) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n try {\n return JSON.parse(cleanJsonString);\n } catch {}\n try {\n return JSON.parse(jsonrepair(cleanJsonString));\n } catch (e) {}\n\n if (vlLocateMode() === 'doubao-vision' || vlLocateMode() === 'vlm-ui-tars') {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n return JSON.parse(jsonrepair(jsonString));\n }\n throw Error(`failed to parse json response: ${input}`);\n}\n"],"names":["checkAIConfig","openaiKey","getAIConfig","OPENAI_API_KEY","azureConfig","MIDSCENE_USE_AZURE_OPENAI","anthropicKey","ANTHROPIC_API_KEY","initConfigJson","MIDSCENE_OPENAI_INIT_CONFIG_JSON","Boolean","debugConfigInitialized","initDebugConfig","shouldPrintTiming","getAIConfigInBoolean","MIDSCENE_DEBUG_AI_PROFILE","debugConfig","console","shouldPrintAIResponse","MIDSCENE_DEBUG_AI_RESPONSE","enableDebug","defaultModel","getModelName","modelName","nameInConfig","MIDSCENE_MODEL_NAME","createChatClient","AIActionTypeValue","openai","extraConfig","getAIConfigInJson","socksProxy","MIDSCENE_OPENAI_SOCKS_PROXY","httpProxy","MIDSCENE_OPENAI_HTTP_PROXY","proxyAgent","debugProxy","getDebug","HttpsProxyAgent","SocksProxyAgent","OPENAI_USE_AZURE","AzureOpenAI","OPENAI_BASE_URL","extraAzureConfig","MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON","scope","MIDSCENE_AZURE_OPENAI_SCOPE","tokenProvider","assert","ifInBrowser","credential","DefaultAzureCredential","getBearerTokenProvider","AZURE_OPENAI_ENDPOINT","AZURE_OPENAI_API_VERSION","AZURE_OPENAI_DEPLOYMENT","AZURE_OPENAI_KEY","MIDSCENE_USE_ANTHROPIC_SDK","baseURL","Error","OpenAI","MIDSCENE_API_TYPE","MIDSCENE_LANGSMITH_DEBUG","wrapOpenAI","apiKey","Anthropic","call","messages","responseFormat","options","completion","style","maxTokens","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","model","isStreaming","content","accumulated","usage","timeCost","commonConfig","vlLocateMode","Number","stream","chunk","_chunk_choices__delta","_chunk_choices__delta1","_chunk_choices_2","reasoning_content","chunkData","undefined","estimatedTokens","Math","finalChunk","_result_usage","_result_usage1","_result_usage2","result","uiTarsModelVersion","JSON","convertImageContent","imgBase64","mimeType","body","parseBase64","m","Array","_chunk_delta","anthropicUsage","e","newError","callToGetJSONObject","AIActionType","assertSchema","locatorSchema","planSchema","AIResponseFormat","response","jsonContent","safeParseJson","callAiFnWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","cleanJsonString","_cleanJsonString_match","jsonrepair","jsonString"],"mappings":";;;;;;;;;;;;;;;AAkDO,SAASA;IACd,MAAMC,YAAYC,YAAYC;IAC9B,MAAMC,cAAcF,YAAYG;IAChC,MAAMC,eAAeJ,YAAYK;IACjC,MAAMC,iBAAiBN,YAAYO;IAEnC,IAAIR,WAAW,OAAO;IACtB,IAAIG,aAAa,OAAO;IACxB,IAAIE,cAAc,OAAO;IAEzB,OAAOI,QAAQF;AACjB;AAGA,IAAIG,yBAAyB;AAE7B,SAASC;IAEP,IAAID,wBAAwB;IAE5B,MAAME,oBAAoBC,qBAAqBC;IAC/C,IAAIC,cAAc;IAClB,IAAIH,mBAAmB;QACrBI,QAAQ,IAAI,CACV;QAEFD,cAAc;IAChB;IACA,MAAME,wBAAwBJ,qBAC5BK;IAEF,IAAID,uBAAuB;QACzBD,QAAQ,IAAI,CACV;QAGAD,cADEA,cACY,SAEA;IAElB;IACA,IAAIA,aACFI,YAAYJ;IAIdL,yBAAyB;AAC3B;AAGA,MAAMU,eAAe;AACd,SAASC;IACd,IAAIC,YAAYF;IAChB,MAAMG,eAAetB,YAAYuB;IACjC,IAAID,cACFD,YAAYC;IAEd,OAAOD;AACT;AAEA,eAAeG,iBAAiB,EAC9BC,iBAAiB,EAGlB;IAICf;IACA,IAAIgB;IACJ,MAAMC,cAAcC,kBAAkBrB;IAEtC,MAAMsB,aAAa7B,YAAY8B;IAC/B,MAAMC,YAAY/B,YAAYgC;IAE9B,IAAIC;IACJ,MAAMC,aAAaC,SAAS;IAC5B,IAAIJ,WAAW;QACbG,WAAW,oBAAoBH;QAC/BE,aAAa,IAAIG,gBAAgBL;IACnC,OAAO,IAAIF,YAAY;QACrBK,WAAW,qBAAqBL;QAChCI,aAAa,IAAII,gBAAgBR;IACnC;IAEA,IAAI7B,YAAYsC,mBAEdZ,SAAS,IAAIa,YAAY;QACvB,SAASvC,YAAYwC;QACrB,QAAQxC,YAAYC;QACpB,WAAWgC;QACX,GAAGN,WAAW;QACd,yBAAyB;IAC3B;SACK,IAAI3B,YAAYG,4BAA4B;QACjD,MAAMsC,mBAAmBb,kBACvBc;QAKF,MAAMC,QAAQ3C,YAAY4C;QAC1B,IAAIC;QACJ,IAAIF,OAAO;YACTG,OACE,CAACC,aACD;YAEF,MAAMC,aAAa,IAAIC;YAEvBH,OAAOH,OAAO;YACdE,gBAAgBK,uBAAuBF,YAAYL;YAEnDjB,SAAS,IAAIa,YAAY;gBACvB,sBAAsBM;gBACtB,UAAU7C,YAAYmD;gBACtB,YAAYnD,YAAYoD;gBACxB,YAAYpD,YAAYqD;gBACxB,GAAG1B,WAAW;gBACd,GAAGc,gBAAgB;YACrB;QACF,OAEEf,SAAS,IAAIa,YAAY;YACvB,QAAQvC,YAAYsD;YACpB,UAAUtD,YAAYmD;YACtB,YAAYnD,YAAYoD;YACxB,YAAYpD,YAAYqD;YACxB,yBAAyB;YACzB,GAAG1B,WAAW;YACd,GAAGc,gBAAgB;QACrB;IAEJ,OAAO,IAAI,CAACzC,YAAYuD,6BAA6B;QACnD,MAAMC,UAAUxD,YAAYwC;QAC5B,IAAI,AAAmB,YAAnB,OAAOgB,SACT;YAAA,IAAI,CAAC,eAAe,IAAI,CAACA,UACvB,MAAM,IAAIC,MACR,CAAC,gFAAgF,EAAED,QAAQ,2BAA2B,CAAC;QAE3H;QAGF9B,SAAS,IAAIgC,SAAO;YAClB,SAAS1D,YAAYwC;YACrB,QAAQxC,YAAYC;YACpB,WAAWgC;YACX,GAAGN,WAAW;YACd,gBAAgB;gBACd,GAAIA,AAAAA,CAAAA,QAAAA,cAAAA,KAAAA,IAAAA,YAAa,cAAc,AAAD,KAAK,CAAC,CAAC;gBACrC,CAACgC,kBAAkB,EAAElC,kBAAkB,QAAQ;YACjD;YACA,yBAAyB;QAC3B;IACF;IAEA,IAAIC,UAAUd,qBAAqBgD,2BAA2B;QAC5D,IAAIb,aACF,MAAM,IAAIU,MAAM;QAElB1C,QAAQ,GAAG,CAAC;QACZ,MAAM,EAAE8C,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC;QACpCnC,SAASmC,WAAWnC;IACtB;IAEA,IAAI,AAAkB,WAAXA,QACT,OAAO;QACL,YAAYA,OAAO,IAAI,CAAC,WAAW;QACnC,OAAO;IACT;IAIF,IAAI1B,YAAYuD,6BAA6B;QAC3C,MAAMO,SAAS9D,YAAYK;QAC3ByC,OAAOgB,QAAQ;QACfpC,SAAS,IAAIqC,UAAU;YACrBD;YACA,WAAW7B;YACX,yBAAyB;QAC3B;IACF;IAEA,IAAI,AAAkB,WAAXP,UAA2BA,OAAe,QAAQ,EAC3D,OAAO;QACL,YAAaA,OAAe,QAAQ;QACpC,OAAO;IACT;IAGF,MAAM,IAAI+B,MAAM;AAClB;AAEO,eAAeO,KACpBC,QAAsC,EACtCxC,iBAA+B,EAC/ByC,cAEmC,EACnCC,OAGC;IAEDrB,OACEhD,iBACA;IAGF,MAAM,EAAEsE,UAAU,EAAEC,KAAK,EAAE,GAAG,MAAM7C,iBAAiB;QACnDC;IACF;IAEA,MAAM6C,YAAYtE,YAAYuE;IAC9B,MAAMC,YAAYrC,SAAS;IAC3B,MAAMsC,oBAAoBtC,SAAS;IACnC,MAAMuC,qBAAqBvC,SAAS;IAEpC,MAAMwC,YAAYC,KAAK,GAAG;IAC1B,MAAMC,QAAQzD;IACd,MAAM0D,cAAcX,AAAAA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,MAAM,AAAD,KAAKA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,AAAD;IACtD,IAAIY;IACJ,IAAIC,cAAc;IAClB,IAAIC;IACJ,IAAIC;IAEJ,MAAMC,eAAe;QACnB,aAAaC,AAAmB,kBAAnBA,iBAAmC,MAAM;QACtD,QAAQ,CAAC,CAACN;QACV,YACE,AAAqB,YAArB,OAAOR,YACHA,YACAe,OAAO,QAAQ,CAACf,aAAa,QAAQ;QAC3C,GAAIc,AAAmB,cAAnBA,iBACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAI;QACF,IAAIf,AAAU,aAAVA,OAAoB;YACtBG,UACE,CAAC,QAAQ,EAAEM,cAAc,eAAe,GAAG,WAAW,EAAED,OAAO;YAGjE,IAAIC,aAAa;gBACf,MAAMQ,SAAU,MAAMlB,WAAW,MAAM,CACrC;oBACES;oBACAZ;oBACA,iBAAiBC;oBACjB,GAAGiB,YAAY;gBACjB,GACA;oBACE,QAAQ;gBACV;gBAKF,WAAW,MAAMI,SAASD,OAAQ;wBAChBE,uBAAAA,iBAAAA,gBAEbC,wBAAAA,kBAAAA,iBAoBCC,kBAAAA;oBAtBJ,MAAMX,UAAUS,AAAAA,SAAAA,CAAAA,iBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,kBAAAA,cAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,QAAAA,CAAAA,wBAAAA,gBAAoB,KAAK,AAAD,IAAxBA,KAAAA,IAAAA,sBAA2B,OAAO,AAAD,KAAK;oBACtD,MAAMG,oBACJ,AAAC,SAAAF,CAAAA,kBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,mBAAAA,eAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,QAAAA,CAAAA,yBAAAA,iBAAoB,KAAK,AAAD,IAAxBA,KAAAA,IAAAA,uBAAmC,iBAAiB,AAAD,KAAK;oBAG3D,IAAIF,MAAM,KAAK,EACbN,QAAQM,MAAM,KAAK;oBAGrB,IAAIR,WAAWY,mBAAmB;wBAChCX,eAAeD;wBACf,MAAMa,YAAiC;4BACrCb;4BACAY;4BACAX;4BACA,YAAY;4BACZ,OAAOa;wBACT;wBACA1B,QAAQ,OAAO,CAAEyB;oBACnB;oBAGA,IAAI,QAAAF,CAAAA,kBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,mBAAAA,eAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,iBAAoB,aAAa,EAAE;wBACrCR,WAAWN,KAAK,GAAG,KAAKD;wBAGxB,IAAI,CAACM,OAAO;4BAEV,MAAMa,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACf,YAAY,MAAM,GAAG;4BAElCC,QAAQ;gCACN,eAAea;gCACf,mBAAmBA;gCACnB,cAAcA,AAAkB,IAAlBA;4BAChB;wBACF;wBAGA,MAAME,aAAkC;4BACtC,SAAS;4BACThB;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAO;gCACL,eAAeC,MAAM,aAAa,IAAI;gCACtC,mBAAmBA,MAAM,iBAAiB,IAAI;gCAC9C,cAAcA,MAAM,YAAY,IAAI;gCACpC,WAAWC,YAAY;4BACzB;wBACF;wBACAf,QAAQ,OAAO,CAAE6B;wBACjB;oBACF;gBACF;gBACAjB,UAAUC;gBACVP,kBACE,CAAC,iBAAiB,EAAEI,MAAM,QAAQ,EAAEO,kBAAkB,UAAU,WAAW,EAAEF,UAAU;YAE3F,OAAO;oBAUgHe,eAAyDC,gBAAwDC;gBATtO,MAAMC,SAAS,MAAMhC,WAAW,MAAM,CAAC;oBACrCS;oBACAZ;oBACA,iBAAiBC;oBACjB,GAAGiB,YAAY;gBACjB;gBACAD,WAAWN,KAAK,GAAG,KAAKD;gBAExBF,kBACE,CAAC,OAAO,EAAEI,MAAM,QAAQ,EAAEO,kBAAkB,UAAU,mBAAmB,EAAEiB,qBAAqB,iBAAiB,EAAEJ,AAAAA,SAAAA,CAAAA,gBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,cAAc,aAAa,AAAD,KAAK,GAAG,qBAAqB,EAAEC,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,eAAc,iBAAiB,AAAD,KAAK,GAAG,gBAAgB,EAAEC,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,eAAc,YAAY,AAAD,KAAK,GAAG,WAAW,EAAEjB,SAAS,aAAa,EAAEkB,OAAO,WAAW,IAAI,IAAI;gBAGtU1B,mBACE,CAAC,oBAAoB,EAAE4B,KAAK,SAAS,CAACF,OAAO,KAAK,GAAG;gBAGvDtD,OACEsD,OAAO,OAAO,EACd,CAAC,mCAAmC,EAAEE,KAAK,SAAS,CAACF,SAAS;gBAEhErB,UAAUqB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;gBAC3CnB,QAAQmB,OAAO,KAAK;YACtB;YAEA5B,UAAU,CAAC,UAAU,EAAEO,SAAS;YAChCjC,OAAOiC,SAAS;QAClB,OAAO,IAAIV,AAAU,gBAAVA,OAAuB;YAChC,MAAMkC,sBAAsB,CAACxB;gBAC3B,IAAIA,AAAiB,gBAAjBA,QAAQ,IAAI,EAAkB;oBAChC,MAAMyB,YAAYzB,QAAQ,SAAS,CAAC,GAAG;oBACvCjC,OAAO0D,WAAW;oBAClB,MAAM,EAAEC,QAAQ,EAAEC,IAAI,EAAE,GAAGC,YAAY5B,QAAQ,SAAS,CAAC,GAAG;oBAC5D,OAAO;wBACL,QAAQ;4BACN,MAAM;4BACN,YAAY0B;4BACZ,MAAMC;wBACR;wBACA,MAAM;oBACR;gBACF;gBACA,OAAO3B;YACT;YAEA,IAAID,aAAa;gBACf,MAAMQ,SAAU,MAAMlB,WAAW,MAAM,CAAC;oBACtCS;oBACA,QAAQ;oBACR,UAAUZ,SAAS,GAAG,CAAC,CAAC2C,IAAO;4BAC7B,MAAM;4BACN,SAASC,MAAM,OAAO,CAACD,EAAE,OAAO,IAC3BA,EAAE,OAAO,CAAS,GAAG,CAACL,uBACvBK,EAAE,OAAO;wBACf;oBACA,iBAAiB1C;oBACjB,GAAGiB,YAAY;gBACjB;gBAEA,WAAW,MAAMI,SAASD,OAAQ;wBAChBwB;oBAAhB,MAAM/B,UAAU+B,AAAAA,SAAAA,CAAAA,eAAAA,MAAM,KAAK,AAAD,IAAVA,KAAAA,IAAAA,aAAa,IAAI,AAAD,KAAK;oBACrC,IAAI/B,SAAS;wBACXC,eAAeD;wBACf,MAAMa,YAAiC;4BACrCb;4BACAC;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAOa;wBACT;wBACA1B,QAAQ,OAAO,CAAEyB;oBACnB;oBAGA,IAAIL,AAAe,mBAAfA,MAAM,IAAI,EAAqB;wBACjCL,WAAWN,KAAK,GAAG,KAAKD;wBACxB,MAAMoC,iBAAiBxB,MAAM,KAAK;wBAGlC,MAAMS,aAAkC;4BACtC,SAAS;4BACThB;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAO+B,iBACH;gCACE,eAAeA,eAAe,YAAY,IAAI;gCAC9C,mBAAmBA,eAAe,aAAa,IAAI;gCACnD,cACGA,AAAAA,CAAAA,eAAe,YAAY,IAAI,KAC/BA,CAAAA,eAAe,aAAa,IAAI;gCACnC,WAAW7B,YAAY;4BACzB,IACAW;wBACN;wBACA1B,QAAQ,OAAO,CAAE6B;wBACjB;oBACF;gBACF;gBACAjB,UAAUC;YACZ,OAAO;gBACL,MAAMoB,SAAS,MAAMhC,WAAW,MAAM,CAAC;oBACrCS;oBACA,QAAQ;oBACR,UAAUZ,SAAS,GAAG,CAAC,CAAC2C,IAAO;4BAC7B,MAAM;4BACN,SAASC,MAAM,OAAO,CAACD,EAAE,OAAO,IAC3BA,EAAE,OAAO,CAAS,GAAG,CAACL,uBACvBK,EAAE,OAAO;wBACf;oBACA,iBAAiB1C;oBACjB,GAAGiB,YAAY;gBACjB;gBACAD,WAAWN,KAAK,GAAG,KAAKD;gBACxBI,UAAWqB,OAAe,OAAO,CAAC,EAAE,CAAC,IAAI;gBACzCnB,QAAQmB,OAAO,KAAK;YACtB;YAEAtD,OAAOiC,SAAS;QAClB;QAEA,IAAID,eAAe,CAACG,OAAO;YAEzB,MAAMa,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEhB,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtCE,QAAQ;gBACN,eAAea;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASf,WAAW;YACpB,OAAOE,QACH;gBACE,eAAeA,MAAM,aAAa,IAAI;gBACtC,mBAAmBA,MAAM,iBAAiB,IAAI;gBAC9C,cAAcA,MAAM,YAAY,IAAI;gBACpC,WAAWC,YAAY;YACzB,IACAW;YACJ,YAAY,CAAC,CAACf;QAChB;IACF,EAAE,OAAOkC,GAAQ;QACfjG,QAAQ,KAAK,CAAC,kBAAkBiG;QAChC,MAAMC,WAAW,IAAIxD,MACnB,CAAC,eAAe,EAAEqB,cAAc,eAAe,GAAG,kBAAkB,EAAEkC,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC/I;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,eAAeC,oBACpBjD,QAAsC,EACtCxC,iBAA+B;IAE/B,IAAIyC;IAKJ,MAAMW,QAAQzD;IAEd,IAAIyD,MAAM,QAAQ,CAAC,UACjB,OAAQpD;QACN,KAAK0F,aAAa,MAAM;YACtBjD,iBAAiBkD;YACjB;QACF,KAAKD,aAAa,eAAe;YAC/BjD,iBAAiBmD;YACjB;QACF,KAAKF,aAAa,IAAI;YACpBjD,iBAAiBoD;YACjB;QACF,KAAKH,aAAa,YAAY;QAC9B,KAAKA,aAAa,gBAAgB;YAChCjD,iBAAiB;gBAAE,MAAMqD,iBAAiB,IAAI;YAAC;YAC/C;IACJ;IAIF,IAAI1C,AAAU,wBAAVA,OACFX,iBAAiB;QAAE,MAAMqD,iBAAiB,IAAI;IAAC;IAGjD,MAAMC,WAAW,MAAMxD,KAAKC,UAAUxC,mBAAmByC;IACzDpB,OAAO0E,UAAU;IACjB,MAAMC,cAAcC,cAAcF,SAAS,OAAO;IAClD,OAAO;QAAE,SAASC;QAAa,OAAOD,SAAS,KAAK;IAAC;AACvD;AAEO,eAAeG,2BACpBC,IAAY,EACZnG,iBAA+B;IAE/B,MAAM,EAAEsD,OAAO,EAAEE,KAAK,EAAE,GAAG,MAAMjB,KAAK4D,MAAMnG;IAC5C,OAAO;QAAEsD;QAASE;IAAM;AAC1B;AAEO,SAAS4C,yBAAyBL,QAAgB;IACvD,IAAI;QAEF,MAAMM,YAAYN,SAAS,KAAK,CAAC;QACjC,IAAIM,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBP,SAAS,KAAK,CACnC;QAEF,IAAIO,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBR,SAAS,KAAK,CAAC;QACrC,IAAIQ,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOR;AACT;AAEO,SAASS,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEO,SAASR,cAAcQ,KAAa;IACzC,MAAMC,kBAAkBN,yBAAyBK;IAEjD,IAAIC,QAAAA,kBAAAA,KAAAA,IAAAA,gBAAiB,KAAK,CAAC,oBAAoB;YACtCC;QAAP,OAAO,QAAAA,CAAAA,yBAAAA,gBACJ,KAAK,CAAC,kBAAiB,IADnBA,KAAAA,IAAAA,uBAEH,KAAK,CAAC,GACP,GAAG,CAAC/C;IACT;IACA,IAAI;QACF,OAAOiB,KAAK,KAAK,CAAC6B;IACpB,EAAE,OAAM,CAAC;IACT,IAAI;QACF,OAAO7B,KAAK,KAAK,CAAC+B,WAAWF;IAC/B,EAAE,OAAOnB,GAAG,CAAC;IAEb,IAAI5B,AAAmB,oBAAnBA,kBAAsCA,AAAmB,kBAAnBA,gBAAkC;QAC1E,MAAMkD,aAAaL,yBAAyBE;QAC5C,OAAO7B,KAAK,KAAK,CAAC+B,WAAWC;IAC/B;IACA,MAAM7E,MAAM,CAAC,+BAA+B,EAAEyE,OAAO;AACvD"}
|
|
1
|
+
{"version":3,"file":"ai-model/service-caller/index.mjs","sources":["webpack://@midscene/core/./src/ai-model/service-caller/index.ts"],"sourcesContent":["import { AIResponseFormat, type AIUsageInfo } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport { Anthropic } from '@anthropic-ai/sdk';\nimport {\n DefaultAzureCredential,\n getBearerTokenProvider,\n} from '@azure/identity';\nimport {\n ANTHROPIC_API_KEY,\n AZURE_OPENAI_API_VERSION,\n AZURE_OPENAI_DEPLOYMENT,\n AZURE_OPENAI_ENDPOINT,\n AZURE_OPENAI_KEY,\n MIDSCENE_API_TYPE,\n MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,\n MIDSCENE_AZURE_OPENAI_SCOPE,\n MIDSCENE_DEBUG_AI_PROFILE,\n MIDSCENE_DEBUG_AI_RESPONSE,\n MIDSCENE_LANGSMITH_DEBUG,\n MIDSCENE_MODEL_NAME,\n MIDSCENE_OPENAI_HTTP_PROXY,\n MIDSCENE_OPENAI_INIT_CONFIG_JSON,\n MIDSCENE_OPENAI_SOCKS_PROXY,\n MIDSCENE_USE_ANTHROPIC_SDK,\n MIDSCENE_USE_AZURE_OPENAI,\n OPENAI_API_KEY,\n OPENAI_BASE_URL,\n OPENAI_MAX_TOKENS,\n OPENAI_USE_AZURE,\n getAIConfig,\n getAIConfigInBoolean,\n getAIConfigInJson,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { parseBase64 } from '@midscene/shared/img';\nimport { enableDebug, getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ifInBrowser } from '@midscene/shared/utils';\nimport { HttpsProxyAgent } from 'https-proxy-agent';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI, { AzureOpenAI } from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport { SocksProxyAgent } from 'socks-proxy-agent';\nimport { AIActionType, type AIArgs } from '../common';\nimport { assertSchema } from '../prompt/assertion';\nimport { locatorSchema } from '../prompt/llm-locator';\nimport { planSchema } from '../prompt/llm-planning';\n\nexport function checkAIConfig() {\n const openaiKey = getAIConfig(OPENAI_API_KEY);\n const azureConfig = getAIConfig(MIDSCENE_USE_AZURE_OPENAI);\n const anthropicKey = getAIConfig(ANTHROPIC_API_KEY);\n const initConfigJson = getAIConfig(MIDSCENE_OPENAI_INIT_CONFIG_JSON);\n\n if (openaiKey) return true;\n if (azureConfig) return true;\n if (anthropicKey) return true;\n\n return Boolean(initConfigJson);\n}\n\n// if debug config is initialized\nlet debugConfigInitialized = false;\n\nfunction initDebugConfig() {\n // if debug config is initialized, return\n if (debugConfigInitialized) return;\n\n const shouldPrintTiming = getAIConfigInBoolean(MIDSCENE_DEBUG_AI_PROFILE);\n let debugConfig = '';\n if (shouldPrintTiming) {\n console.warn(\n 'MIDSCENE_DEBUG_AI_PROFILE is deprecated, use DEBUG=midscene:ai:profile instead',\n );\n debugConfig = 'ai:profile';\n }\n const shouldPrintAIResponse = getAIConfigInBoolean(\n MIDSCENE_DEBUG_AI_RESPONSE,\n );\n if (shouldPrintAIResponse) {\n console.warn(\n 'MIDSCENE_DEBUG_AI_RESPONSE is deprecated, use DEBUG=midscene:ai:response instead',\n );\n if (debugConfig) {\n debugConfig = 'ai:*';\n } else {\n debugConfig = 'ai:call';\n }\n }\n if (debugConfig) {\n enableDebug(debugConfig);\n }\n\n // mark as initialized\n debugConfigInitialized = true;\n}\n\n// default model\nconst defaultModel = 'gpt-4o';\nexport function getModelName() {\n let modelName = defaultModel;\n const nameInConfig = getAIConfig(MIDSCENE_MODEL_NAME);\n if (nameInConfig) {\n modelName = nameInConfig;\n }\n return modelName;\n}\n\nasync function createChatClient({\n AIActionTypeValue,\n}: {\n AIActionTypeValue: AIActionType;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n style: 'openai' | 'anthropic';\n}> {\n initDebugConfig();\n let openai: OpenAI | AzureOpenAI | undefined;\n const extraConfig = getAIConfigInJson(MIDSCENE_OPENAI_INIT_CONFIG_JSON);\n\n const socksProxy = getAIConfig(MIDSCENE_OPENAI_SOCKS_PROXY);\n const httpProxy = getAIConfig(MIDSCENE_OPENAI_HTTP_PROXY);\n\n let proxyAgent = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n if (httpProxy) {\n debugProxy('using http proxy', httpProxy);\n proxyAgent = new HttpsProxyAgent(httpProxy);\n } else if (socksProxy) {\n debugProxy('using socks proxy', socksProxy);\n proxyAgent = new SocksProxyAgent(socksProxy);\n }\n\n if (getAIConfig(OPENAI_USE_AZURE)) {\n // this is deprecated\n openai = new AzureOpenAI({\n baseURL: getAIConfig(OPENAI_BASE_URL),\n apiKey: getAIConfig(OPENAI_API_KEY),\n httpAgent: proxyAgent,\n ...extraConfig,\n dangerouslyAllowBrowser: true,\n }) as OpenAI;\n } else if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI)) {\n const extraAzureConfig = getAIConfigInJson(\n MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,\n );\n\n // https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart?tabs=bash%2Cjavascript-key%2Ctypescript-keyless%2Cpython&pivots=programming-language-javascript#rest-api\n // keyless authentication\n const scope = getAIConfig(MIDSCENE_AZURE_OPENAI_SCOPE);\n let tokenProvider: any = undefined;\n if (scope) {\n assert(\n !ifInBrowser,\n 'Azure OpenAI is not supported in browser with Midscene.',\n );\n const credential = new DefaultAzureCredential();\n\n assert(scope, 'MIDSCENE_AZURE_OPENAI_SCOPE is required');\n tokenProvider = getBearerTokenProvider(credential, scope);\n\n openai = new AzureOpenAI({\n azureADTokenProvider: tokenProvider,\n endpoint: getAIConfig(AZURE_OPENAI_ENDPOINT),\n apiVersion: getAIConfig(AZURE_OPENAI_API_VERSION),\n deployment: getAIConfig(AZURE_OPENAI_DEPLOYMENT),\n ...extraConfig,\n ...extraAzureConfig,\n });\n } else {\n // endpoint, apiKey, apiVersion, deployment\n openai = new AzureOpenAI({\n apiKey: getAIConfig(AZURE_OPENAI_KEY),\n endpoint: getAIConfig(AZURE_OPENAI_ENDPOINT),\n apiVersion: getAIConfig(AZURE_OPENAI_API_VERSION),\n deployment: getAIConfig(AZURE_OPENAI_DEPLOYMENT),\n dangerouslyAllowBrowser: true,\n ...extraConfig,\n ...extraAzureConfig,\n });\n }\n } else if (!getAIConfig(MIDSCENE_USE_ANTHROPIC_SDK)) {\n const baseURL = getAIConfig(OPENAI_BASE_URL);\n if (typeof baseURL === 'string') {\n if (!/^https?:\\/\\//.test(baseURL)) {\n throw new Error(\n `OPENAI_BASE_URL must be a valid URL starting with http:// or https://, but got: ${baseURL}\\nPlease check your config.`,\n );\n }\n }\n\n openai = new OpenAI({\n baseURL: getAIConfig(OPENAI_BASE_URL),\n apiKey: getAIConfig(OPENAI_API_KEY),\n httpAgent: proxyAgent,\n ...extraConfig,\n defaultHeaders: {\n ...(extraConfig?.defaultHeaders || {}),\n [MIDSCENE_API_TYPE]: AIActionTypeValue.toString(),\n },\n dangerouslyAllowBrowser: true,\n });\n }\n\n if (openai && getAIConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n const { wrapOpenAI } = await import('langsmith/wrappers');\n openai = wrapOpenAI(openai);\n }\n\n if (typeof openai !== 'undefined') {\n return {\n completion: openai.chat.completions,\n style: 'openai',\n };\n }\n\n // Anthropic\n if (getAIConfig(MIDSCENE_USE_ANTHROPIC_SDK)) {\n const apiKey = getAIConfig(ANTHROPIC_API_KEY);\n assert(apiKey, 'ANTHROPIC_API_KEY is required');\n openai = new Anthropic({\n apiKey,\n httpAgent: proxyAgent,\n dangerouslyAllowBrowser: true,\n }) as any;\n }\n\n if (typeof openai !== 'undefined' && (openai as any).messages) {\n return {\n completion: (openai as any).messages,\n style: 'anthropic',\n };\n }\n\n throw new Error('Openai SDK or Anthropic SDK is not initialized');\n}\n\nexport async function call(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n responseFormat?:\n | OpenAI.ChatCompletionCreateParams['response_format']\n | OpenAI.ResponseFormatJSONObject,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n },\n): Promise<{ content: string; usage?: AIUsageInfo; isStreamed: boolean }> {\n assert(\n checkAIConfig(),\n 'Cannot find config for AI model service. If you are using a self-hosted model without validating the API key, please set `OPENAI_API_KEY` to any non-null value. https://midscenejs.com/model-provider.html',\n );\n\n const { completion, style } = await createChatClient({\n AIActionTypeValue,\n });\n\n const maxTokens = getAIConfig(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n const model = getModelName();\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n\n const commonConfig = {\n temperature: vlLocateMode() === 'vlm-ui-tars' ? 0.0 : 0.1,\n stream: !!isStreaming,\n max_tokens:\n typeof maxTokens === 'number'\n ? maxTokens\n : Number.parseInt(maxTokens || '2048', 10),\n ...(vlLocateMode() === 'qwen-vl' // qwen specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n try {\n if (style === 'openai') {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${model}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: {\n prompt_tokens: usage.prompt_tokens ?? 0,\n completion_tokens: usage.completion_tokens ?? 0,\n total_tokens: usage.total_tokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: model,\n },\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${model}, mode, ${vlLocateMode() || 'default'}, cost-ms, ${timeCost}`,\n );\n } else {\n const result = await completion.create({\n model,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${model}, mode, ${vlLocateMode() || 'default'}, ui-tars-version, ${uiTarsModelVersion()}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`,\n );\n\n debugProfileDetail(\n `model usage detail: ${JSON.stringify(result.usage)}`,\n );\n\n assert(\n result.choices,\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n content = result.choices[0].message.content!;\n usage = result.usage;\n }\n\n debugCall(`response: ${content}`);\n assert(content, 'empty content');\n } else if (style === 'anthropic') {\n const convertImageContent = (content: any) => {\n if (content.type === 'image_url') {\n const imgBase64 = content.image_url.url;\n assert(imgBase64, 'image_url is required');\n const { mimeType, body } = parseBase64(content.image_url.url);\n return {\n source: {\n type: 'base64',\n media_type: mimeType,\n data: body,\n },\n type: 'image',\n };\n }\n return content;\n };\n\n if (isStreaming) {\n const stream = (await completion.create({\n model,\n system: 'You are a versatile professional in software UI automation',\n messages: messages.map((m) => ({\n role: 'user',\n content: Array.isArray(m.content)\n ? (m.content as any).map(convertImageContent)\n : m.content,\n })),\n response_format: responseFormat,\n ...commonConfig,\n } as any)) as any;\n\n for await (const chunk of stream) {\n const content = chunk.delta?.text || '';\n if (content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n accumulated,\n reasoning_content: '',\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.type === 'message_stop') {\n timeCost = Date.now() - startTime;\n const anthropicUsage = chunk.usage;\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: anthropicUsage\n ? {\n prompt_tokens: anthropicUsage.input_tokens ?? 0,\n completion_tokens: anthropicUsage.output_tokens ?? 0,\n total_tokens:\n (anthropicUsage.input_tokens ?? 0) +\n (anthropicUsage.output_tokens ?? 0),\n time_cost: timeCost ?? 0,\n model_name: model,\n }\n : undefined,\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n } else {\n const result = await completion.create({\n model,\n system: 'You are a versatile professional in software UI automation',\n messages: messages.map((m) => ({\n role: 'user',\n content: Array.isArray(m.content)\n ? (m.content as any).map(convertImageContent)\n : m.content,\n })),\n response_format: responseFormat,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n content = (result as any).content[0].text as string;\n usage = result.usage;\n }\n\n assert(content, 'empty content');\n }\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n return {\n content: content || '',\n usage: usage\n ? {\n prompt_tokens: usage.prompt_tokens ?? 0,\n completion_tokens: usage.completion_tokens ?? 0,\n total_tokens: usage.total_tokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: model,\n }\n : undefined,\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport async function callToGetJSONObject<T>(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n): Promise<{ content: T; usage?: AIUsageInfo }> {\n let responseFormat:\n | OpenAI.ChatCompletionCreateParams['response_format']\n | OpenAI.ResponseFormatJSONObject\n | undefined;\n\n const model = getModelName();\n\n if (model.includes('gpt-4')) {\n switch (AIActionTypeValue) {\n case AIActionType.ASSERT:\n responseFormat = assertSchema;\n break;\n case AIActionType.INSPECT_ELEMENT:\n responseFormat = locatorSchema;\n break;\n case AIActionType.PLAN:\n responseFormat = planSchema;\n break;\n case AIActionType.EXTRACT_DATA:\n case AIActionType.DESCRIBE_ELEMENT:\n responseFormat = { type: AIResponseFormat.JSON };\n break;\n }\n }\n\n // gpt-4o-2024-05-13 only supports json_object response format\n if (model === 'gpt-4o-2024-05-13') {\n responseFormat = { type: AIResponseFormat.JSON };\n }\n\n const response = await call(messages, AIActionTypeValue, responseFormat);\n assert(response, 'empty response');\n const jsonContent = safeParseJson(response.content);\n return { content: jsonContent, usage: response.usage };\n}\n\nexport async function callAiFnWithStringResponse<T>(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await call(msgs, AIActionTypeValue);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nexport function safeParseJson(input: string) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n try {\n return JSON.parse(cleanJsonString);\n } catch {}\n try {\n return JSON.parse(jsonrepair(cleanJsonString));\n } catch (e) {}\n\n if (vlLocateMode() === 'doubao-vision' || vlLocateMode() === 'vlm-ui-tars') {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n return JSON.parse(jsonrepair(jsonString));\n }\n throw Error(`failed to parse json response: ${input}`);\n}\n"],"names":["checkAIConfig","openaiKey","getAIConfig","OPENAI_API_KEY","azureConfig","MIDSCENE_USE_AZURE_OPENAI","anthropicKey","ANTHROPIC_API_KEY","initConfigJson","MIDSCENE_OPENAI_INIT_CONFIG_JSON","Boolean","debugConfigInitialized","initDebugConfig","shouldPrintTiming","getAIConfigInBoolean","MIDSCENE_DEBUG_AI_PROFILE","debugConfig","console","shouldPrintAIResponse","MIDSCENE_DEBUG_AI_RESPONSE","enableDebug","defaultModel","getModelName","modelName","nameInConfig","MIDSCENE_MODEL_NAME","createChatClient","AIActionTypeValue","openai","extraConfig","getAIConfigInJson","socksProxy","MIDSCENE_OPENAI_SOCKS_PROXY","httpProxy","MIDSCENE_OPENAI_HTTP_PROXY","proxyAgent","debugProxy","getDebug","HttpsProxyAgent","SocksProxyAgent","OPENAI_USE_AZURE","AzureOpenAI","OPENAI_BASE_URL","extraAzureConfig","MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON","scope","MIDSCENE_AZURE_OPENAI_SCOPE","tokenProvider","assert","ifInBrowser","credential","DefaultAzureCredential","getBearerTokenProvider","AZURE_OPENAI_ENDPOINT","AZURE_OPENAI_API_VERSION","AZURE_OPENAI_DEPLOYMENT","AZURE_OPENAI_KEY","MIDSCENE_USE_ANTHROPIC_SDK","baseURL","Error","OpenAI","MIDSCENE_API_TYPE","MIDSCENE_LANGSMITH_DEBUG","wrapOpenAI","apiKey","Anthropic","call","messages","responseFormat","options","completion","style","maxTokens","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","model","isStreaming","content","accumulated","usage","timeCost","commonConfig","vlLocateMode","Number","stream","chunk","_chunk_choices__delta","_chunk_choices__delta1","_chunk_choices_2","reasoning_content","chunkData","undefined","estimatedTokens","Math","finalChunk","_result_usage","_result_usage1","_result_usage2","result","uiTarsModelVersion","JSON","convertImageContent","imgBase64","mimeType","body","parseBase64","m","Array","_chunk_delta","anthropicUsage","e","newError","callToGetJSONObject","AIActionType","assertSchema","locatorSchema","planSchema","AIResponseFormat","response","jsonContent","safeParseJson","callAiFnWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","cleanJsonString","_cleanJsonString_match","jsonrepair","jsonString"],"mappings":";;;;;;;;;;;;;;;AAkDO,SAASA;IACd,MAAMC,YAAYC,YAAYC;IAC9B,MAAMC,cAAcF,YAAYG;IAChC,MAAMC,eAAeJ,YAAYK;IACjC,MAAMC,iBAAiBN,YAAYO;IAEnC,IAAIR,WAAW,OAAO;IACtB,IAAIG,aAAa,OAAO;IACxB,IAAIE,cAAc,OAAO;IAEzB,OAAOI,QAAQF;AACjB;AAGA,IAAIG,yBAAyB;AAE7B,SAASC;IAEP,IAAID,wBAAwB;IAE5B,MAAME,oBAAoBC,qBAAqBC;IAC/C,IAAIC,cAAc;IAClB,IAAIH,mBAAmB;QACrBI,QAAQ,IAAI,CACV;QAEFD,cAAc;IAChB;IACA,MAAME,wBAAwBJ,qBAC5BK;IAEF,IAAID,uBAAuB;QACzBD,QAAQ,IAAI,CACV;QAGAD,cADEA,cACY,SAEA;IAElB;IACA,IAAIA,aACFI,YAAYJ;IAIdL,yBAAyB;AAC3B;AAGA,MAAMU,eAAe;AACd,SAASC;IACd,IAAIC,YAAYF;IAChB,MAAMG,eAAetB,YAAYuB;IACjC,IAAID,cACFD,YAAYC;IAEd,OAAOD;AACT;AAEA,eAAeG,iBAAiB,EAC9BC,iBAAiB,EAGlB;IAICf;IACA,IAAIgB;IACJ,MAAMC,cAAcC,kBAAkBrB;IAEtC,MAAMsB,aAAa7B,YAAY8B;IAC/B,MAAMC,YAAY/B,YAAYgC;IAE9B,IAAIC;IACJ,MAAMC,aAAaC,SAAS;IAC5B,IAAIJ,WAAW;QACbG,WAAW,oBAAoBH;QAC/BE,aAAa,IAAIG,gBAAgBL;IACnC,OAAO,IAAIF,YAAY;QACrBK,WAAW,qBAAqBL;QAChCI,aAAa,IAAII,gBAAgBR;IACnC;IAEA,IAAI7B,YAAYsC,mBAEdZ,SAAS,IAAIa,YAAY;QACvB,SAASvC,YAAYwC;QACrB,QAAQxC,YAAYC;QACpB,WAAWgC;QACX,GAAGN,WAAW;QACd,yBAAyB;IAC3B;SACK,IAAI3B,YAAYG,4BAA4B;QACjD,MAAMsC,mBAAmBb,kBACvBc;QAKF,MAAMC,QAAQ3C,YAAY4C;QAC1B,IAAIC;QACJ,IAAIF,OAAO;YACTG,OACE,CAACC,aACD;YAEF,MAAMC,aAAa,IAAIC;YAEvBH,OAAOH,OAAO;YACdE,gBAAgBK,uBAAuBF,YAAYL;YAEnDjB,SAAS,IAAIa,YAAY;gBACvB,sBAAsBM;gBACtB,UAAU7C,YAAYmD;gBACtB,YAAYnD,YAAYoD;gBACxB,YAAYpD,YAAYqD;gBACxB,GAAG1B,WAAW;gBACd,GAAGc,gBAAgB;YACrB;QACF,OAEEf,SAAS,IAAIa,YAAY;YACvB,QAAQvC,YAAYsD;YACpB,UAAUtD,YAAYmD;YACtB,YAAYnD,YAAYoD;YACxB,YAAYpD,YAAYqD;YACxB,yBAAyB;YACzB,GAAG1B,WAAW;YACd,GAAGc,gBAAgB;QACrB;IAEJ,OAAO,IAAI,CAACzC,YAAYuD,6BAA6B;QACnD,MAAMC,UAAUxD,YAAYwC;QAC5B,IAAI,AAAmB,YAAnB,OAAOgB,SACT;YAAA,IAAI,CAAC,eAAe,IAAI,CAACA,UACvB,MAAM,IAAIC,MACR,CAAC,gFAAgF,EAAED,QAAQ,2BAA2B,CAAC;QAE3H;QAGF9B,SAAS,IAAIgC,SAAO;YAClB,SAAS1D,YAAYwC;YACrB,QAAQxC,YAAYC;YACpB,WAAWgC;YACX,GAAGN,WAAW;YACd,gBAAgB;gBACd,GAAIA,AAAAA,CAAAA,QAAAA,cAAAA,KAAAA,IAAAA,YAAa,cAAc,AAAD,KAAK,CAAC,CAAC;gBACrC,CAACgC,kBAAkB,EAAElC,kBAAkB,QAAQ;YACjD;YACA,yBAAyB;QAC3B;IACF;IAEA,IAAIC,UAAUd,qBAAqBgD,2BAA2B;QAC5D,IAAIb,aACF,MAAM,IAAIU,MAAM;QAElB1C,QAAQ,GAAG,CAAC;QACZ,MAAM,EAAE8C,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC;QACpCnC,SAASmC,WAAWnC;IACtB;IAEA,IAAI,AAAkB,WAAXA,QACT,OAAO;QACL,YAAYA,OAAO,IAAI,CAAC,WAAW;QACnC,OAAO;IACT;IAIF,IAAI1B,YAAYuD,6BAA6B;QAC3C,MAAMO,SAAS9D,YAAYK;QAC3ByC,OAAOgB,QAAQ;QACfpC,SAAS,IAAIqC,UAAU;YACrBD;YACA,WAAW7B;YACX,yBAAyB;QAC3B;IACF;IAEA,IAAI,AAAkB,WAAXP,UAA2BA,OAAe,QAAQ,EAC3D,OAAO;QACL,YAAaA,OAAe,QAAQ;QACpC,OAAO;IACT;IAGF,MAAM,IAAI+B,MAAM;AAClB;AAEO,eAAeO,KACpBC,QAAsC,EACtCxC,iBAA+B,EAC/ByC,cAEmC,EACnCC,OAGC;IAEDrB,OACEhD,iBACA;IAGF,MAAM,EAAEsE,UAAU,EAAEC,KAAK,EAAE,GAAG,MAAM7C,iBAAiB;QACnDC;IACF;IAEA,MAAM6C,YAAYtE,YAAYuE;IAC9B,MAAMC,YAAYrC,SAAS;IAC3B,MAAMsC,oBAAoBtC,SAAS;IACnC,MAAMuC,qBAAqBvC,SAAS;IAEpC,MAAMwC,YAAYC,KAAK,GAAG;IAC1B,MAAMC,QAAQzD;IACd,MAAM0D,cAAcX,AAAAA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,MAAM,AAAD,KAAKA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,AAAD;IACtD,IAAIY;IACJ,IAAIC,cAAc;IAClB,IAAIC;IACJ,IAAIC;IAEJ,MAAMC,eAAe;QACnB,aAAaC,AAAmB,kBAAnBA,iBAAmC,MAAM;QACtD,QAAQ,CAAC,CAACN;QACV,YACE,AAAqB,YAArB,OAAOR,YACHA,YACAe,OAAO,QAAQ,CAACf,aAAa,QAAQ;QAC3C,GAAIc,AAAmB,cAAnBA,iBACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAI;QACF,IAAIf,AAAU,aAAVA,OAAoB;YACtBG,UACE,CAAC,QAAQ,EAAEM,cAAc,eAAe,GAAG,WAAW,EAAED,OAAO;YAGjE,IAAIC,aAAa;gBACf,MAAMQ,SAAU,MAAMlB,WAAW,MAAM,CACrC;oBACES;oBACAZ;oBACA,iBAAiBC;oBACjB,GAAGiB,YAAY;gBACjB,GACA;oBACE,QAAQ;gBACV;gBAKF,WAAW,MAAMI,SAASD,OAAQ;wBAChBE,uBAAAA,iBAAAA,gBAEbC,wBAAAA,kBAAAA,iBAoBCC,kBAAAA;oBAtBJ,MAAMX,UAAUS,AAAAA,SAAAA,CAAAA,iBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,kBAAAA,cAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,QAAAA,CAAAA,wBAAAA,gBAAoB,KAAK,AAAD,IAAxBA,KAAAA,IAAAA,sBAA2B,OAAO,AAAD,KAAK;oBACtD,MAAMG,oBACJ,AAAC,SAAAF,CAAAA,kBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,mBAAAA,eAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,QAAAA,CAAAA,yBAAAA,iBAAoB,KAAK,AAAD,IAAxBA,KAAAA,IAAAA,uBAAmC,iBAAiB,AAAD,KAAK;oBAG3D,IAAIF,MAAM,KAAK,EACbN,QAAQM,MAAM,KAAK;oBAGrB,IAAIR,WAAWY,mBAAmB;wBAChCX,eAAeD;wBACf,MAAMa,YAAiC;4BACrCb;4BACAY;4BACAX;4BACA,YAAY;4BACZ,OAAOa;wBACT;wBACA1B,QAAQ,OAAO,CAAEyB;oBACnB;oBAGA,IAAI,QAAAF,CAAAA,kBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,mBAAAA,eAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,iBAAoB,aAAa,EAAE;wBACrCR,WAAWN,KAAK,GAAG,KAAKD;wBAGxB,IAAI,CAACM,OAAO;4BAEV,MAAMa,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACf,YAAY,MAAM,GAAG;4BAElCC,QAAQ;gCACN,eAAea;gCACf,mBAAmBA;gCACnB,cAAcA,AAAkB,IAAlBA;4BAChB;wBACF;wBAGA,MAAME,aAAkC;4BACtC,SAAS;4BACThB;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAO;gCACL,eAAeC,MAAM,aAAa,IAAI;gCACtC,mBAAmBA,MAAM,iBAAiB,IAAI;gCAC9C,cAAcA,MAAM,YAAY,IAAI;gCACpC,WAAWC,YAAY;gCACvB,YAAYL;4BACd;wBACF;wBACAV,QAAQ,OAAO,CAAE6B;wBACjB;oBACF;gBACF;gBACAjB,UAAUC;gBACVP,kBACE,CAAC,iBAAiB,EAAEI,MAAM,QAAQ,EAAEO,kBAAkB,UAAU,WAAW,EAAEF,UAAU;YAE3F,OAAO;oBAUgHe,eAAyDC,gBAAwDC;gBATtO,MAAMC,SAAS,MAAMhC,WAAW,MAAM,CAAC;oBACrCS;oBACAZ;oBACA,iBAAiBC;oBACjB,GAAGiB,YAAY;gBACjB;gBACAD,WAAWN,KAAK,GAAG,KAAKD;gBAExBF,kBACE,CAAC,OAAO,EAAEI,MAAM,QAAQ,EAAEO,kBAAkB,UAAU,mBAAmB,EAAEiB,qBAAqB,iBAAiB,EAAEJ,AAAAA,SAAAA,CAAAA,gBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,cAAc,aAAa,AAAD,KAAK,GAAG,qBAAqB,EAAEC,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,eAAc,iBAAiB,AAAD,KAAK,GAAG,gBAAgB,EAAEC,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,eAAc,YAAY,AAAD,KAAK,GAAG,WAAW,EAAEjB,SAAS,aAAa,EAAEkB,OAAO,WAAW,IAAI,IAAI;gBAGtU1B,mBACE,CAAC,oBAAoB,EAAE4B,KAAK,SAAS,CAACF,OAAO,KAAK,GAAG;gBAGvDtD,OACEsD,OAAO,OAAO,EACd,CAAC,mCAAmC,EAAEE,KAAK,SAAS,CAACF,SAAS;gBAEhErB,UAAUqB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;gBAC3CnB,QAAQmB,OAAO,KAAK;YACtB;YAEA5B,UAAU,CAAC,UAAU,EAAEO,SAAS;YAChCjC,OAAOiC,SAAS;QAClB,OAAO,IAAIV,AAAU,gBAAVA,OAAuB;YAChC,MAAMkC,sBAAsB,CAACxB;gBAC3B,IAAIA,AAAiB,gBAAjBA,QAAQ,IAAI,EAAkB;oBAChC,MAAMyB,YAAYzB,QAAQ,SAAS,CAAC,GAAG;oBACvCjC,OAAO0D,WAAW;oBAClB,MAAM,EAAEC,QAAQ,EAAEC,IAAI,EAAE,GAAGC,YAAY5B,QAAQ,SAAS,CAAC,GAAG;oBAC5D,OAAO;wBACL,QAAQ;4BACN,MAAM;4BACN,YAAY0B;4BACZ,MAAMC;wBACR;wBACA,MAAM;oBACR;gBACF;gBACA,OAAO3B;YACT;YAEA,IAAID,aAAa;gBACf,MAAMQ,SAAU,MAAMlB,WAAW,MAAM,CAAC;oBACtCS;oBACA,QAAQ;oBACR,UAAUZ,SAAS,GAAG,CAAC,CAAC2C,IAAO;4BAC7B,MAAM;4BACN,SAASC,MAAM,OAAO,CAACD,EAAE,OAAO,IAC3BA,EAAE,OAAO,CAAS,GAAG,CAACL,uBACvBK,EAAE,OAAO;wBACf;oBACA,iBAAiB1C;oBACjB,GAAGiB,YAAY;gBACjB;gBAEA,WAAW,MAAMI,SAASD,OAAQ;wBAChBwB;oBAAhB,MAAM/B,UAAU+B,AAAAA,SAAAA,CAAAA,eAAAA,MAAM,KAAK,AAAD,IAAVA,KAAAA,IAAAA,aAAa,IAAI,AAAD,KAAK;oBACrC,IAAI/B,SAAS;wBACXC,eAAeD;wBACf,MAAMa,YAAiC;4BACrCb;4BACAC;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAOa;wBACT;wBACA1B,QAAQ,OAAO,CAAEyB;oBACnB;oBAGA,IAAIL,AAAe,mBAAfA,MAAM,IAAI,EAAqB;wBACjCL,WAAWN,KAAK,GAAG,KAAKD;wBACxB,MAAMoC,iBAAiBxB,MAAM,KAAK;wBAGlC,MAAMS,aAAkC;4BACtC,SAAS;4BACThB;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAO+B,iBACH;gCACE,eAAeA,eAAe,YAAY,IAAI;gCAC9C,mBAAmBA,eAAe,aAAa,IAAI;gCACnD,cACGA,AAAAA,CAAAA,eAAe,YAAY,IAAI,KAC/BA,CAAAA,eAAe,aAAa,IAAI;gCACnC,WAAW7B,YAAY;gCACvB,YAAYL;4BACd,IACAgB;wBACN;wBACA1B,QAAQ,OAAO,CAAE6B;wBACjB;oBACF;gBACF;gBACAjB,UAAUC;YACZ,OAAO;gBACL,MAAMoB,SAAS,MAAMhC,WAAW,MAAM,CAAC;oBACrCS;oBACA,QAAQ;oBACR,UAAUZ,SAAS,GAAG,CAAC,CAAC2C,IAAO;4BAC7B,MAAM;4BACN,SAASC,MAAM,OAAO,CAACD,EAAE,OAAO,IAC3BA,EAAE,OAAO,CAAS,GAAG,CAACL,uBACvBK,EAAE,OAAO;wBACf;oBACA,iBAAiB1C;oBACjB,GAAGiB,YAAY;gBACjB;gBACAD,WAAWN,KAAK,GAAG,KAAKD;gBACxBI,UAAWqB,OAAe,OAAO,CAAC,EAAE,CAAC,IAAI;gBACzCnB,QAAQmB,OAAO,KAAK;YACtB;YAEAtD,OAAOiC,SAAS;QAClB;QAEA,IAAID,eAAe,CAACG,OAAO;YAEzB,MAAMa,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEhB,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtCE,QAAQ;gBACN,eAAea;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASf,WAAW;YACpB,OAAOE,QACH;gBACE,eAAeA,MAAM,aAAa,IAAI;gBACtC,mBAAmBA,MAAM,iBAAiB,IAAI;gBAC9C,cAAcA,MAAM,YAAY,IAAI;gBACpC,WAAWC,YAAY;gBACvB,YAAYL;YACd,IACAgB;YACJ,YAAY,CAAC,CAACf;QAChB;IACF,EAAE,OAAOkC,GAAQ;QACfjG,QAAQ,KAAK,CAAC,kBAAkBiG;QAChC,MAAMC,WAAW,IAAIxD,MACnB,CAAC,eAAe,EAAEqB,cAAc,eAAe,GAAG,kBAAkB,EAAEkC,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC/I;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,eAAeC,oBACpBjD,QAAsC,EACtCxC,iBAA+B;IAE/B,IAAIyC;IAKJ,MAAMW,QAAQzD;IAEd,IAAIyD,MAAM,QAAQ,CAAC,UACjB,OAAQpD;QACN,KAAK0F,aAAa,MAAM;YACtBjD,iBAAiBkD;YACjB;QACF,KAAKD,aAAa,eAAe;YAC/BjD,iBAAiBmD;YACjB;QACF,KAAKF,aAAa,IAAI;YACpBjD,iBAAiBoD;YACjB;QACF,KAAKH,aAAa,YAAY;QAC9B,KAAKA,aAAa,gBAAgB;YAChCjD,iBAAiB;gBAAE,MAAMqD,iBAAiB,IAAI;YAAC;YAC/C;IACJ;IAIF,IAAI1C,AAAU,wBAAVA,OACFX,iBAAiB;QAAE,MAAMqD,iBAAiB,IAAI;IAAC;IAGjD,MAAMC,WAAW,MAAMxD,KAAKC,UAAUxC,mBAAmByC;IACzDpB,OAAO0E,UAAU;IACjB,MAAMC,cAAcC,cAAcF,SAAS,OAAO;IAClD,OAAO;QAAE,SAASC;QAAa,OAAOD,SAAS,KAAK;IAAC;AACvD;AAEO,eAAeG,2BACpBC,IAAY,EACZnG,iBAA+B;IAE/B,MAAM,EAAEsD,OAAO,EAAEE,KAAK,EAAE,GAAG,MAAMjB,KAAK4D,MAAMnG;IAC5C,OAAO;QAAEsD;QAASE;IAAM;AAC1B;AAEO,SAAS4C,yBAAyBL,QAAgB;IACvD,IAAI;QAEF,MAAMM,YAAYN,SAAS,KAAK,CAAC;QACjC,IAAIM,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBP,SAAS,KAAK,CACnC;QAEF,IAAIO,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBR,SAAS,KAAK,CAAC;QACrC,IAAIQ,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOR;AACT;AAEO,SAASS,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEO,SAASR,cAAcQ,KAAa;IACzC,MAAMC,kBAAkBN,yBAAyBK;IAEjD,IAAIC,QAAAA,kBAAAA,KAAAA,IAAAA,gBAAiB,KAAK,CAAC,oBAAoB;YACtCC;QAAP,OAAO,QAAAA,CAAAA,yBAAAA,gBACJ,KAAK,CAAC,kBAAiB,IADnBA,KAAAA,IAAAA,uBAEH,KAAK,CAAC,GACP,GAAG,CAAC/C;IACT;IACA,IAAI;QACF,OAAOiB,KAAK,KAAK,CAAC6B;IACpB,EAAE,OAAM,CAAC;IACT,IAAI;QACF,OAAO7B,KAAK,KAAK,CAAC+B,WAAWF;IAC/B,EAAE,OAAOnB,GAAG,CAAC;IAEb,IAAI5B,AAAmB,oBAAnBA,kBAAsCA,AAAmB,kBAAnBA,gBAAkC;QAC1E,MAAMkD,aAAaL,yBAAyBE;QAC5C,OAAO7B,KAAK,KAAK,CAAC+B,WAAWC;IAC/B;IACA,MAAM7E,MAAM,CAAC,+BAA+B,EAAEyE,OAAO;AACvD"}
|
package/dist/es/index.mjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { Executor } from "./ai-model/action-executor.mjs";
|
|
2
2
|
import insight from "./insight/index.mjs";
|
|
3
3
|
import { getVersion } from "./utils.mjs";
|
|
4
|
-
import {
|
|
4
|
+
import { AiLocateElement, describeUserPage, plan } from "./ai-model/index.mjs";
|
|
5
5
|
import { MIDSCENE_MODEL_NAME, getAIConfig } from "@midscene/shared/env";
|
|
6
6
|
const src = insight;
|
|
7
|
-
export {
|
|
7
|
+
export { AiLocateElement, Executor, insight as Insight, MIDSCENE_MODEL_NAME, src as default, describeUserPage, getAIConfig, getVersion, plan };
|
|
8
8
|
|
|
9
9
|
//# sourceMappingURL=index.mjs.map
|
package/dist/es/index.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.mjs","sources":["webpack://@midscene/core/./src/index.ts"],"sourcesContent":["import { Executor } from './ai-model/action-executor';\nimport Insight from './insight/index';\nimport { getVersion } from './utils';\n\nexport {\n plan,\n describeUserPage,\n AiLocateElement,\n
|
|
1
|
+
{"version":3,"file":"index.mjs","sources":["webpack://@midscene/core/./src/index.ts"],"sourcesContent":["import { Executor } from './ai-model/action-executor';\nimport Insight from './insight/index';\nimport { getVersion } from './utils';\n\nexport {\n plan,\n describeUserPage,\n AiLocateElement,\n} from './ai-model/index';\n\nexport { getAIConfig, MIDSCENE_MODEL_NAME } from '@midscene/shared/env';\n\nexport type * from './types';\nexport default Insight;\nexport { Executor, Insight, getVersion };\n\nexport type {\n MidsceneYamlScript,\n MidsceneYamlTask,\n MidsceneYamlFlowItem,\n MidsceneYamlFlowItemAIRightClick,\n MidsceneYamlConfigResult,\n LocateOption,\n DetailedLocateParam,\n} from './yaml';\n"],"names":["Insight"],"mappings":";;;;;AAaA,YAAeA"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { AIActionType, callAiFn, expandSearchArea } from "../ai-model/common.mjs";
|
|
2
2
|
import { AiExtractElementInfo, AiLocateElement, callToGetJSONObject } from "../ai-model/index.mjs";
|
|
3
|
-
import {
|
|
3
|
+
import { AiLocateSection } from "../ai-model/inspect.mjs";
|
|
4
4
|
import { elementDescriberInstruction } from "../ai-model/prompt/describe.mjs";
|
|
5
5
|
import { MIDSCENE_FORCE_DEEP_THINK, MIDSCENE_USE_QWEN_VL, getAIConfigInBoolean, vlLocateMode } from "@midscene/shared/env";
|
|
6
6
|
import { compositeElementInfoImg, cropByRect } from "@midscene/shared/img";
|
|
@@ -156,41 +156,6 @@ class Insight {
|
|
|
156
156
|
usage
|
|
157
157
|
};
|
|
158
158
|
}
|
|
159
|
-
async assert(assertion) {
|
|
160
|
-
const dumpSubscriber = this.onceDumpUpdatedFn;
|
|
161
|
-
this.onceDumpUpdatedFn = void 0;
|
|
162
|
-
const context = await this.contextRetrieverFn('assert');
|
|
163
|
-
const startTime = Date.now();
|
|
164
|
-
const assertResult = await AiAssert({
|
|
165
|
-
assertion,
|
|
166
|
-
context
|
|
167
|
-
});
|
|
168
|
-
const timeCost = Date.now() - startTime;
|
|
169
|
-
const taskInfo = {
|
|
170
|
-
...this.taskInfo ? this.taskInfo : {},
|
|
171
|
-
durationMs: timeCost,
|
|
172
|
-
rawResponse: JSON.stringify(assertResult.content)
|
|
173
|
-
};
|
|
174
|
-
const { thought, pass } = assertResult.content;
|
|
175
|
-
const dumpData = {
|
|
176
|
-
type: 'assert',
|
|
177
|
-
userQuery: {
|
|
178
|
-
assertion
|
|
179
|
-
},
|
|
180
|
-
matchedElement: [],
|
|
181
|
-
data: null,
|
|
182
|
-
taskInfo,
|
|
183
|
-
assertionPass: pass,
|
|
184
|
-
assertionThought: thought,
|
|
185
|
-
error: pass ? void 0 : thought
|
|
186
|
-
};
|
|
187
|
-
emitInsightDump(dumpData, dumpSubscriber);
|
|
188
|
-
return {
|
|
189
|
-
pass,
|
|
190
|
-
thought,
|
|
191
|
-
usage: assertResult.usage
|
|
192
|
-
};
|
|
193
|
-
}
|
|
194
159
|
async describe(target, opt) {
|
|
195
160
|
assert(target, 'target is required for insight.describe');
|
|
196
161
|
const context = await this.contextRetrieverFn('describe');
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"insight/index.mjs","sources":["webpack://@midscene/core/./src/insight/index.ts"],"sourcesContent":["import {\n AIActionType,\n type AIArgs,\n callAiFn,\n expandSearchArea,\n} from '@/ai-model/common';\nimport {\n AiExtractElementInfo,\n AiLocateElement,\n callToGetJSONObject,\n} from '@/ai-model/index';\nimport { AiAssert, AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport type {\n AIDescribeElementResponse,\n AIElementResponse,\n AIUsageInfo,\n BaseElement,\n DetailedLocateParam,\n DumpSubscriber,\n InsightAction,\n InsightAssertionResponse,\n InsightExtractOption,\n InsightExtractParam,\n InsightOptions,\n InsightTaskInfo,\n LocateResult,\n PartialInsightDumpFromSDK,\n Rect,\n TMultimodalPrompt,\n TUserPrompt,\n UIContext,\n} from '@/types';\nimport {\n MIDSCENE_FORCE_DEEP_THINK,\n MIDSCENE_USE_QWEN_VL,\n getAIConfigInBoolean,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { emitInsightDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext<BaseElement>;\n callAI?: typeof callAiFn<AIElementResponse>;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\nconst debug = getDebug('ai:insight');\nexport default class Insight<\n ElementType extends BaseElement = BaseElement,\n ContextType extends UIContext<ElementType> = UIContext<ElementType>,\n> {\n contextRetrieverFn: (\n action: InsightAction,\n ) => Promise<ContextType> | ContextType;\n\n aiVendorFn: (...args: Array<any>) => Promise<any> = callAiFn;\n\n onceDumpUpdatedFn?: DumpSubscriber;\n\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n\n constructor(\n context:\n | ContextType\n | ((action: InsightAction) => Promise<ContextType> | ContextType),\n opt?: InsightOptions,\n ) {\n assert(context, 'context is required for Insight');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt?: LocateOpts,\n ): Promise<LocateResult> {\n const { callAI } = opt || {};\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = getAIConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n\n if (searchAreaPrompt && !vlLocateMode()) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model',\n );\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn('locate'));\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const {\n parseResult,\n rect,\n elementById,\n rawResponse,\n usage,\n isOrderSensitive,\n } = await AiLocateElement({\n callAI: callAI || this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI model failed to locate: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements: BaseElement[] = [];\n (parseResult.elements || []).forEach((item) => {\n if ('id' in item) {\n const element = elementById(item?.id);\n\n if (!element) {\n console.warn(\n `locate: cannot find element id=${item.id}. Maybe an unstable response from AI model`,\n );\n return;\n }\n elements.push(element);\n }\n });\n\n emitInsightDump(\n {\n ...dumpData,\n matchedElement: elements,\n },\n dumpSubscriber,\n );\n\n if (errorLog) {\n throw new Error(errorLog);\n }\n\n assert(\n elements.length <= 1,\n `locate: multiple elements found, length = ${elements.length}`,\n );\n\n if (elements.length === 1) {\n return {\n element: {\n id: elements[0]!.id,\n indexId: elements[0]!.indexId,\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n xpaths: elements[0]!.xpaths || [],\n attributes: elements[0]!.attributes,\n isOrderSensitive,\n },\n rect,\n };\n }\n return {\n element: null,\n rect,\n };\n }\n\n async extract<T>(\n dataDemand: InsightExtractParam,\n opt?: InsightExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<{\n data: T;\n thought?: string;\n usage?: AIUsageInfo;\n }> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('extract');\n\n const startTime = Date.now();\n const { parseResult, usage } = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n multimodalPrompt,\n extractOption: opt,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(parseResult),\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data, thought } = parseResult || {};\n\n // 4\n emitInsightDump(\n {\n ...dumpData,\n data,\n },\n dumpSubscriber,\n );\n\n if (errorLog && !data) {\n throw new Error(errorLog);\n }\n\n return {\n data,\n thought,\n usage,\n };\n }\n\n async assert(assertion: TUserPrompt): Promise<InsightAssertionResponse> {\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('assert');\n const startTime = Date.now();\n const assertResult = await AiAssert({\n assertion,\n context,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(assertResult.content),\n };\n\n const { thought, pass } = assertResult.content;\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'assert',\n userQuery: {\n assertion,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n assertionPass: pass,\n assertionThought: thought,\n error: pass ? undefined : thought,\n };\n emitInsightDump(dumpData, dumpSubscriber);\n\n return {\n pass,\n thought,\n usage: assertResult.usage,\n };\n }\n async describe(\n target: Rect | [number, number],\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for insight.describe');\n const context = await this.contextRetrieverFn('describe');\n const { screenshotBase64, size } = context;\n assert(screenshotBase64, 'screenshot is required for insight.describe');\n\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n size,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n const searchArea = expandSearchArea(targetRect, context.size);\n debug('describe: set searchArea', searchArea);\n imagePayload = await cropByRect(\n imagePayload,\n searchArea,\n getAIConfigInBoolean(MIDSCENE_USE_QWEN_VL),\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn =\n this.aiVendorFn || callToGetJSONObject<AIDescribeElementResponse>;\n\n const res = await callAIFn(msgs, AIActionType.DESCRIBE_ELEMENT);\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n"],"names":["debug","getDebug","Insight","query","opt","_parseResult_errors","callAI","queryPrompt","assert","dumpSubscriber","undefined","globalDeepThinkSwitch","getAIConfigInBoolean","MIDSCENE_FORCE_DEEP_THINK","searchAreaPrompt","vlLocateMode","console","context","searchArea","searchAreaRawResponse","searchAreaUsage","searchAreaResponse","AiLocateSection","startTime","Date","parseResult","rect","elementById","rawResponse","usage","isOrderSensitive","AiLocateElement","timeCost","taskInfo","JSON","errorLog","dumpData","elements","item","element","emitInsightDump","Error","dataDemand","multimodalPrompt","AiExtractElementInfo","data","thought","assertion","assertResult","AiAssert","pass","target","screenshotBase64","size","systemPrompt","elementDescriberInstruction","defaultRectSize","targetRect","Array","Math","imagePayload","compositeElementInfoImg","expandSearchArea","cropByRect","MIDSCENE_USE_QWEN_VL","msgs","callAIFn","callToGetJSONObject","res","AIActionType","content","callAiFn","Promise"],"mappings":";;;;;;;;;;;;;;;;;;;AAqDA,MAAMA,QAAQC,SAAS;AACR,MAAMC;IAmCnB,MAAM,OACJC,KAA0B,EAC1BC,GAAgB,EACO;YA+EnBC;QA9EJ,MAAM,EAAEC,MAAM,EAAE,GAAGF,OAAO,CAAC;QAC3B,MAAMG,cAAc,AAAiB,YAAjB,OAAOJ,QAAqBA,QAAQA,MAAM,MAAM;QACpEK,OAAOD,aAAa;QACpB,MAAME,iBAAiB,IAAI,CAAC,iBAAiB;QAC7C,IAAI,CAAC,iBAAiB,GAAGC;QAEzBF,OAAO,AAAiB,YAAjB,OAAOL,OAAoB;QAElC,MAAMQ,wBAAwBC,qBAC5BC;QAEF,IAAIF,uBACFX,MAAM,yBAAyBW;QAEjC,IAAIG;QACJ,IAAIX,MAAM,SAAS,IAAIQ,uBACrBG,mBAAmBX,MAAM,MAAM;QAGjC,IAAIW,oBAAoB,CAACC,gBAAgB;YACvCC,QAAQ,IAAI,CACV;YAEFF,mBAAmBJ;QACrB;QAEA,MAAMO,UAAUb,AAAAA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,OAAO,AAAD,KAAM,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAE/D,IAAIc;QACJ,IAAIC;QACJ,IAAIC;QACJ,IAAIC;QAGJ,IAAIP,kBAAkB;YACpBO,qBAAqB,MAAMC,gBAAgB;gBACzCL;gBACA,oBAAoBH;YACtB;YACAN,OACEa,mBAAmB,IAAI,EACvB,CAAC,6BAA6B,EAAEP,iBAAiB,CAAC,EAChDO,mBAAmB,KAAK,GAAG,CAAC,EAAE,EAAEA,mBAAmB,KAAK,EAAE,GAAG,IAC7D;YAEJF,wBAAwBE,mBAAmB,WAAW;YACtDD,kBAAkBC,mBAAmB,KAAK;YAC1CH,aAAaG,mBAAmB,IAAI;QACtC;QAEA,MAAME,YAAYC,KAAK,GAAG;QAC1B,MAAM,EACJC,WAAW,EACXC,IAAI,EACJC,WAAW,EACXC,WAAW,EACXC,KAAK,EACLC,gBAAgB,EACjB,GAAG,MAAMC,gBAAgB;YACxB,QAAQzB,UAAU,IAAI,CAAC,UAAU;YACjCW;YACA,0BAA0BV;YAC1B,cAAcc;QAChB;QAEA,MAAMW,WAAWR,KAAK,GAAG,KAAKD;QAC9B,MAAMU,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACN;YAC5B,gBAAgBM,KAAK,SAAS,CAACT;YAC/BI;YACAX;YACAC;YACAC;QACF;QAEA,IAAIe;QACJ,IAAI,QAAA9B,CAAAA,sBAAAA,YAAY,MAAM,AAAD,IAAjBA,KAAAA,IAAAA,oBAAoB,MAAM,EAC5B8B,WAAW,CAAC,6BAA6B,EAAEV,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAG5E,MAAMW,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACT,SAAS7B;YACX;YACA,gBAAgB,EAAE;YAClB,aAAamB;YACb,MAAM;YACNO;YACA,WAAW,CAAC,CAACf;YACb,OAAOiB;QACT;QAEA,MAAME,WAA0B,EAAE;QACjCZ,CAAAA,YAAY,QAAQ,IAAI,EAAC,EAAG,OAAO,CAAC,CAACa;YACpC,IAAI,QAAQA,MAAM;gBAChB,MAAMC,UAAUZ,YAAYW,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,EAAE;gBAEpC,IAAI,CAACC,SAAS,YACZvB,QAAQ,IAAI,CACV,CAAC,+BAA+B,EAAEsB,KAAK,EAAE,CAAC,0CAA0C,CAAC;gBAIzFD,SAAS,IAAI,CAACE;YAChB;QACF;QAEAC,gBACE;YACE,GAAGJ,QAAQ;YACX,gBAAgBC;QAClB,GACA5B;QAGF,IAAI0B,UACF,MAAM,IAAIM,MAAMN;QAGlB3B,OACE6B,SAAS,MAAM,IAAI,GACnB,CAAC,0CAA0C,EAAEA,SAAS,MAAM,EAAE;QAGhE,IAAIA,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACL,SAAS;gBACP,IAAIA,QAAQ,CAAC,EAAE,CAAE,EAAE;gBACnB,SAASA,QAAQ,CAAC,EAAE,CAAE,OAAO;gBAC7B,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM;gBAC3B,MAAMA,QAAQ,CAAC,EAAE,CAAE,IAAI;gBACvB,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM,IAAI,EAAE;gBACjC,YAAYA,QAAQ,CAAC,EAAE,CAAE,UAAU;gBACnCP;YACF;YACAJ;QACF;QAEF,OAAO;YACL,SAAS;YACTA;QACF;IACF;IAEA,MAAM,QACJgB,UAA+B,EAC/BtC,GAA0B,EAC1BuC,gBAAoC,EAKnC;YA0BGtC;QAzBJG,OACE,AAAsB,YAAtB,OAAOkC,cAA2B,AAAsB,YAAtB,OAAOA,YACzC,CAAC,+CAA+C,EAAE,OAAOA,YAAY;QAEvE,MAAMjC,iBAAiB,IAAI,CAAC,iBAAiB;QAC7C,IAAI,CAAC,iBAAiB,GAAGC;QAEzB,MAAMO,UAAU,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAE9C,MAAMM,YAAYC,KAAK,GAAG;QAC1B,MAAM,EAAEC,WAAW,EAAEI,KAAK,EAAE,GAAG,MAAMe,qBAAwB;YAC3D3B;YACA,WAAWyB;YACXC;YACA,eAAevC;QACjB;QAEA,MAAM4B,WAAWR,KAAK,GAAG,KAAKD;QAC9B,MAAMU,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACT;QAC9B;QAEA,IAAIU;QACJ,IAAI,QAAA9B,CAAAA,sBAAAA,YAAY,MAAM,AAAD,IAAjBA,KAAAA,IAAAA,oBAAoB,MAAM,EAC5B8B,WAAW,CAAC,qBAAqB,EAAEV,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAGpE,MAAMW,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACTM;YACF;YACA,gBAAgB,EAAE;YAClB,MAAM;YACNT;YACA,OAAOE;QACT;QAEA,MAAM,EAAEU,IAAI,EAAEC,OAAO,EAAE,GAAGrB,eAAe,CAAC;QAG1Ce,gBACE;YACE,GAAGJ,QAAQ;YACXS;QACF,GACApC;QAGF,IAAI0B,YAAY,CAACU,MACf,MAAM,IAAIJ,MAAMN;QAGlB,OAAO;YACLU;YACAC;YACAjB;QACF;IACF;IAEA,MAAM,OAAOkB,SAAsB,EAAqC;QACtE,MAAMtC,iBAAiB,IAAI,CAAC,iBAAiB;QAC7C,IAAI,CAAC,iBAAiB,GAAGC;QAEzB,MAAMO,UAAU,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAC9C,MAAMM,YAAYC,KAAK,GAAG;QAC1B,MAAMwB,eAAe,MAAMC,SAAS;YAClCF;YACA9B;QACF;QAEA,MAAMe,WAAWR,KAAK,GAAG,KAAKD;QAC9B,MAAMU,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACc,aAAa,OAAO;QAClD;QAEA,MAAM,EAAEF,OAAO,EAAEI,IAAI,EAAE,GAAGF,aAAa,OAAO;QAC9C,MAAMZ,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACTW;YACF;YACA,gBAAgB,EAAE;YAClB,MAAM;YACNd;YACA,eAAeiB;YACf,kBAAkBJ;YAClB,OAAOI,OAAOxC,SAAYoC;QAC5B;QACAN,gBAAgBJ,UAAU3B;QAE1B,OAAO;YACLyC;YACAJ;YACA,OAAOE,aAAa,KAAK;QAC3B;IACF;IACA,MAAM,SACJG,MAA+B,EAC/B/C,GAEC,EACwD;QACzDI,OAAO2C,QAAQ;QACf,MAAMlC,UAAU,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAC9C,MAAM,EAAEmC,gBAAgB,EAAEC,IAAI,EAAE,GAAGpC;QACnCT,OAAO4C,kBAAkB;QAEzB,MAAME,eAAeC;QAGrB,MAAMC,kBAAkB;QACxB,MAAMC,aAAmBC,MAAM,OAAO,CAACP,UACnC;YACE,MAAMQ,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC/C,KAAKG,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC9C,OAAOA;YACP,QAAQA;QACV,IACAL;QAEJ,IAAIS,eAAe,MAAMC,wBAAwB;YAC/C,gBAAgBT;YAChBC;YACA,sBAAsB;gBACpB;oBACE,MAAMI;gBACR;aACD;YACD,iBAAiB;QACnB;QAEA,IAAIrD,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,SAAS,EAAE;YAClB,MAAMc,aAAa4C,iBAAiBL,YAAYxC,QAAQ,IAAI;YAC5DjB,MAAM,4BAA4BkB;YAClC0C,eAAe,MAAMG,WACnBH,cACA1C,YACAN,qBAAqBoD;QAEzB;QAEA,MAAMC,OAAe;YACnB;gBAAE,MAAM;gBAAU,SAASX;YAAa;YACxC;gBACE,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKM;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;SACD;QAED,MAAMM,WACJ,IAAI,CAAC,UAAU,IAAIC;QAErB,MAAMC,MAAM,MAAMF,SAASD,MAAMI,aAAa,gBAAgB;QAE9D,MAAM,EAAEC,OAAO,EAAE,GAAGF;QACpB5D,OAAO,CAAC8D,QAAQ,KAAK,EAAE,CAAC,iBAAiB,EAAEA,QAAQ,KAAK,EAAE;QAC1D9D,OAAO8D,QAAQ,WAAW,EAAE;QAC5B,OAAOA;IACT;IAhWA,YACErD,OAEmE,EACnEb,GAAoB,CACpB;QAfF;QAIA,qCAAoDmE;QAEpD;QAEA;QAQE/D,OAAOS,SAAS;QAChB,IAAI,AAAmB,cAAnB,OAAOA,SACT,IAAI,CAAC,kBAAkB,GAAGA;aAE1B,IAAI,CAAC,kBAAkB,GAAG,IAAMuD,QAAQ,OAAO,CAACvD;QAGlD,IAAI,AAA2B,WAApBb,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,UAAU,AAAD,GACvB,IAAI,CAAC,UAAU,GAAGA,IAAI,UAAU;QAElC,IAAI,AAAyB,WAAlBA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,QAAQ,AAAD,GACrB,IAAI,CAAC,QAAQ,GAAGA,IAAI,QAAQ;IAEhC;AA8UF"}
|
|
1
|
+
{"version":3,"file":"insight/index.mjs","sources":["webpack://@midscene/core/./src/insight/index.ts"],"sourcesContent":["import {\n AIActionType,\n type AIArgs,\n callAiFn,\n expandSearchArea,\n} from '@/ai-model/common';\nimport {\n AiExtractElementInfo,\n AiLocateElement,\n callToGetJSONObject,\n} from '@/ai-model/index';\nimport { AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport type {\n AIDescribeElementResponse,\n AIElementResponse,\n AIUsageInfo,\n BaseElement,\n DetailedLocateParam,\n DumpSubscriber,\n InsightAction,\n InsightAssertionResponse,\n InsightExtractOption,\n InsightExtractParam,\n InsightOptions,\n InsightTaskInfo,\n LocateResult,\n PartialInsightDumpFromSDK,\n Rect,\n TMultimodalPrompt,\n TUserPrompt,\n UIContext,\n} from '@/types';\nimport {\n MIDSCENE_FORCE_DEEP_THINK,\n MIDSCENE_USE_QWEN_VL,\n getAIConfigInBoolean,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { emitInsightDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext<BaseElement>;\n callAI?: typeof callAiFn<AIElementResponse>;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\nconst debug = getDebug('ai:insight');\nexport default class Insight<\n ElementType extends BaseElement = BaseElement,\n ContextType extends UIContext<ElementType> = UIContext<ElementType>,\n> {\n contextRetrieverFn: (\n action: InsightAction,\n ) => Promise<ContextType> | ContextType;\n\n aiVendorFn: (...args: Array<any>) => Promise<any> = callAiFn;\n\n onceDumpUpdatedFn?: DumpSubscriber;\n\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n\n constructor(\n context:\n | ContextType\n | ((action: InsightAction) => Promise<ContextType> | ContextType),\n opt?: InsightOptions,\n ) {\n assert(context, 'context is required for Insight');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt?: LocateOpts,\n ): Promise<LocateResult> {\n const { callAI } = opt || {};\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = getAIConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n\n if (searchAreaPrompt && !vlLocateMode()) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model',\n );\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn('locate'));\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const {\n parseResult,\n rect,\n elementById,\n rawResponse,\n usage,\n isOrderSensitive,\n } = await AiLocateElement({\n callAI: callAI || this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI model failed to locate: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements: BaseElement[] = [];\n (parseResult.elements || []).forEach((item) => {\n if ('id' in item) {\n const element = elementById(item?.id);\n\n if (!element) {\n console.warn(\n `locate: cannot find element id=${item.id}. Maybe an unstable response from AI model`,\n );\n return;\n }\n elements.push(element);\n }\n });\n\n emitInsightDump(\n {\n ...dumpData,\n matchedElement: elements,\n },\n dumpSubscriber,\n );\n\n if (errorLog) {\n throw new Error(errorLog);\n }\n\n assert(\n elements.length <= 1,\n `locate: multiple elements found, length = ${elements.length}`,\n );\n\n if (elements.length === 1) {\n return {\n element: {\n id: elements[0]!.id,\n indexId: elements[0]!.indexId,\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n xpaths: elements[0]!.xpaths || [],\n attributes: elements[0]!.attributes,\n isOrderSensitive,\n },\n rect,\n };\n }\n return {\n element: null,\n rect,\n };\n }\n\n async extract<T>(\n dataDemand: InsightExtractParam,\n opt?: InsightExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<{\n data: T;\n thought?: string;\n usage?: AIUsageInfo;\n }> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('extract');\n\n const startTime = Date.now();\n const { parseResult, usage } = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n multimodalPrompt,\n extractOption: opt,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(parseResult),\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data, thought } = parseResult || {};\n\n // 4\n emitInsightDump(\n {\n ...dumpData,\n data,\n },\n dumpSubscriber,\n );\n\n if (errorLog && !data) {\n throw new Error(errorLog);\n }\n\n return {\n data,\n thought,\n usage,\n };\n }\n\n async describe(\n target: Rect | [number, number],\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for insight.describe');\n const context = await this.contextRetrieverFn('describe');\n const { screenshotBase64, size } = context;\n assert(screenshotBase64, 'screenshot is required for insight.describe');\n\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n size,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n const searchArea = expandSearchArea(targetRect, context.size);\n debug('describe: set searchArea', searchArea);\n imagePayload = await cropByRect(\n imagePayload,\n searchArea,\n getAIConfigInBoolean(MIDSCENE_USE_QWEN_VL),\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn =\n this.aiVendorFn || callToGetJSONObject<AIDescribeElementResponse>;\n\n const res = await callAIFn(msgs, AIActionType.DESCRIBE_ELEMENT);\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n"],"names":["debug","getDebug","Insight","query","opt","_parseResult_errors","callAI","queryPrompt","assert","dumpSubscriber","undefined","globalDeepThinkSwitch","getAIConfigInBoolean","MIDSCENE_FORCE_DEEP_THINK","searchAreaPrompt","vlLocateMode","console","context","searchArea","searchAreaRawResponse","searchAreaUsage","searchAreaResponse","AiLocateSection","startTime","Date","parseResult","rect","elementById","rawResponse","usage","isOrderSensitive","AiLocateElement","timeCost","taskInfo","JSON","errorLog","dumpData","elements","item","element","emitInsightDump","Error","dataDemand","multimodalPrompt","AiExtractElementInfo","data","thought","target","screenshotBase64","size","systemPrompt","elementDescriberInstruction","defaultRectSize","targetRect","Array","Math","imagePayload","compositeElementInfoImg","expandSearchArea","cropByRect","MIDSCENE_USE_QWEN_VL","msgs","callAIFn","callToGetJSONObject","res","AIActionType","content","callAiFn","Promise"],"mappings":";;;;;;;;;;;;;;;;;;;AAqDA,MAAMA,QAAQC,SAAS;AACR,MAAMC;IAmCnB,MAAM,OACJC,KAA0B,EAC1BC,GAAgB,EACO;YA+EnBC;QA9EJ,MAAM,EAAEC,MAAM,EAAE,GAAGF,OAAO,CAAC;QAC3B,MAAMG,cAAc,AAAiB,YAAjB,OAAOJ,QAAqBA,QAAQA,MAAM,MAAM;QACpEK,OAAOD,aAAa;QACpB,MAAME,iBAAiB,IAAI,CAAC,iBAAiB;QAC7C,IAAI,CAAC,iBAAiB,GAAGC;QAEzBF,OAAO,AAAiB,YAAjB,OAAOL,OAAoB;QAElC,MAAMQ,wBAAwBC,qBAC5BC;QAEF,IAAIF,uBACFX,MAAM,yBAAyBW;QAEjC,IAAIG;QACJ,IAAIX,MAAM,SAAS,IAAIQ,uBACrBG,mBAAmBX,MAAM,MAAM;QAGjC,IAAIW,oBAAoB,CAACC,gBAAgB;YACvCC,QAAQ,IAAI,CACV;YAEFF,mBAAmBJ;QACrB;QAEA,MAAMO,UAAUb,AAAAA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,OAAO,AAAD,KAAM,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAE/D,IAAIc;QACJ,IAAIC;QACJ,IAAIC;QACJ,IAAIC;QAGJ,IAAIP,kBAAkB;YACpBO,qBAAqB,MAAMC,gBAAgB;gBACzCL;gBACA,oBAAoBH;YACtB;YACAN,OACEa,mBAAmB,IAAI,EACvB,CAAC,6BAA6B,EAAEP,iBAAiB,CAAC,EAChDO,mBAAmB,KAAK,GAAG,CAAC,EAAE,EAAEA,mBAAmB,KAAK,EAAE,GAAG,IAC7D;YAEJF,wBAAwBE,mBAAmB,WAAW;YACtDD,kBAAkBC,mBAAmB,KAAK;YAC1CH,aAAaG,mBAAmB,IAAI;QACtC;QAEA,MAAME,YAAYC,KAAK,GAAG;QAC1B,MAAM,EACJC,WAAW,EACXC,IAAI,EACJC,WAAW,EACXC,WAAW,EACXC,KAAK,EACLC,gBAAgB,EACjB,GAAG,MAAMC,gBAAgB;YACxB,QAAQzB,UAAU,IAAI,CAAC,UAAU;YACjCW;YACA,0BAA0BV;YAC1B,cAAcc;QAChB;QAEA,MAAMW,WAAWR,KAAK,GAAG,KAAKD;QAC9B,MAAMU,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACN;YAC5B,gBAAgBM,KAAK,SAAS,CAACT;YAC/BI;YACAX;YACAC;YACAC;QACF;QAEA,IAAIe;QACJ,IAAI,QAAA9B,CAAAA,sBAAAA,YAAY,MAAM,AAAD,IAAjBA,KAAAA,IAAAA,oBAAoB,MAAM,EAC5B8B,WAAW,CAAC,6BAA6B,EAAEV,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAG5E,MAAMW,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACT,SAAS7B;YACX;YACA,gBAAgB,EAAE;YAClB,aAAamB;YACb,MAAM;YACNO;YACA,WAAW,CAAC,CAACf;YACb,OAAOiB;QACT;QAEA,MAAME,WAA0B,EAAE;QACjCZ,CAAAA,YAAY,QAAQ,IAAI,EAAC,EAAG,OAAO,CAAC,CAACa;YACpC,IAAI,QAAQA,MAAM;gBAChB,MAAMC,UAAUZ,YAAYW,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,EAAE;gBAEpC,IAAI,CAACC,SAAS,YACZvB,QAAQ,IAAI,CACV,CAAC,+BAA+B,EAAEsB,KAAK,EAAE,CAAC,0CAA0C,CAAC;gBAIzFD,SAAS,IAAI,CAACE;YAChB;QACF;QAEAC,gBACE;YACE,GAAGJ,QAAQ;YACX,gBAAgBC;QAClB,GACA5B;QAGF,IAAI0B,UACF,MAAM,IAAIM,MAAMN;QAGlB3B,OACE6B,SAAS,MAAM,IAAI,GACnB,CAAC,0CAA0C,EAAEA,SAAS,MAAM,EAAE;QAGhE,IAAIA,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACL,SAAS;gBACP,IAAIA,QAAQ,CAAC,EAAE,CAAE,EAAE;gBACnB,SAASA,QAAQ,CAAC,EAAE,CAAE,OAAO;gBAC7B,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM;gBAC3B,MAAMA,QAAQ,CAAC,EAAE,CAAE,IAAI;gBACvB,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM,IAAI,EAAE;gBACjC,YAAYA,QAAQ,CAAC,EAAE,CAAE,UAAU;gBACnCP;YACF;YACAJ;QACF;QAEF,OAAO;YACL,SAAS;YACTA;QACF;IACF;IAEA,MAAM,QACJgB,UAA+B,EAC/BtC,GAA0B,EAC1BuC,gBAAoC,EAKnC;YA0BGtC;QAzBJG,OACE,AAAsB,YAAtB,OAAOkC,cAA2B,AAAsB,YAAtB,OAAOA,YACzC,CAAC,+CAA+C,EAAE,OAAOA,YAAY;QAEvE,MAAMjC,iBAAiB,IAAI,CAAC,iBAAiB;QAC7C,IAAI,CAAC,iBAAiB,GAAGC;QAEzB,MAAMO,UAAU,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAE9C,MAAMM,YAAYC,KAAK,GAAG;QAC1B,MAAM,EAAEC,WAAW,EAAEI,KAAK,EAAE,GAAG,MAAMe,qBAAwB;YAC3D3B;YACA,WAAWyB;YACXC;YACA,eAAevC;QACjB;QAEA,MAAM4B,WAAWR,KAAK,GAAG,KAAKD;QAC9B,MAAMU,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACT;QAC9B;QAEA,IAAIU;QACJ,IAAI,QAAA9B,CAAAA,sBAAAA,YAAY,MAAM,AAAD,IAAjBA,KAAAA,IAAAA,oBAAoB,MAAM,EAC5B8B,WAAW,CAAC,qBAAqB,EAAEV,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAGpE,MAAMW,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACTM;YACF;YACA,gBAAgB,EAAE;YAClB,MAAM;YACNT;YACA,OAAOE;QACT;QAEA,MAAM,EAAEU,IAAI,EAAEC,OAAO,EAAE,GAAGrB,eAAe,CAAC;QAG1Ce,gBACE;YACE,GAAGJ,QAAQ;YACXS;QACF,GACApC;QAGF,IAAI0B,YAAY,CAACU,MACf,MAAM,IAAIJ,MAAMN;QAGlB,OAAO;YACLU;YACAC;YACAjB;QACF;IACF;IAEA,MAAM,SACJkB,MAA+B,EAC/B3C,GAEC,EACwD;QACzDI,OAAOuC,QAAQ;QACf,MAAM9B,UAAU,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAC9C,MAAM,EAAE+B,gBAAgB,EAAEC,IAAI,EAAE,GAAGhC;QACnCT,OAAOwC,kBAAkB;QAEzB,MAAME,eAAeC;QAGrB,MAAMC,kBAAkB;QACxB,MAAMC,aAAmBC,MAAM,OAAO,CAACP,UACnC;YACE,MAAMQ,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC/C,KAAKG,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC9C,OAAOA;YACP,QAAQA;QACV,IACAL;QAEJ,IAAIS,eAAe,MAAMC,wBAAwB;YAC/C,gBAAgBT;YAChBC;YACA,sBAAsB;gBACpB;oBACE,MAAMI;gBACR;aACD;YACD,iBAAiB;QACnB;QAEA,IAAIjD,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,SAAS,EAAE;YAClB,MAAMc,aAAawC,iBAAiBL,YAAYpC,QAAQ,IAAI;YAC5DjB,MAAM,4BAA4BkB;YAClCsC,eAAe,MAAMG,WACnBH,cACAtC,YACAN,qBAAqBgD;QAEzB;QAEA,MAAMC,OAAe;YACnB;gBAAE,MAAM;gBAAU,SAASX;YAAa;YACxC;gBACE,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKM;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;SACD;QAED,MAAMM,WACJ,IAAI,CAAC,UAAU,IAAIC;QAErB,MAAMC,MAAM,MAAMF,SAASD,MAAMI,aAAa,gBAAgB;QAE9D,MAAM,EAAEC,OAAO,EAAE,GAAGF;QACpBxD,OAAO,CAAC0D,QAAQ,KAAK,EAAE,CAAC,iBAAiB,EAAEA,QAAQ,KAAK,EAAE;QAC1D1D,OAAO0D,QAAQ,WAAW,EAAE;QAC5B,OAAOA;IACT;IAzTA,YACEjD,OAEmE,EACnEb,GAAoB,CACpB;QAfF;QAIA,qCAAoD+D;QAEpD;QAEA;QAQE3D,OAAOS,SAAS;QAChB,IAAI,AAAmB,cAAnB,OAAOA,SACT,IAAI,CAAC,kBAAkB,GAAGA;aAE1B,IAAI,CAAC,kBAAkB,GAAG,IAAMmD,QAAQ,OAAO,CAACnD;QAGlD,IAAI,AAA2B,WAApBb,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,UAAU,AAAD,GACvB,IAAI,CAAC,UAAU,GAAGA,IAAI,UAAU;QAElC,IAAI,AAAyB,WAAlBA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,QAAQ,AAAD,GACrB,IAAI,CAAC,QAAQ,GAAGA,IAAI,QAAQ;IAEhC;AAuSF"}
|
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
import { getVersion } from "../utils.mjs";
|
|
2
|
-
import { MIDSCENE_MODEL_NAME, getAIConfig } from "@midscene/shared/env";
|
|
3
2
|
import { uuid } from "@midscene/shared/utils";
|
|
4
3
|
function emitInsightDump(data, dumpSubscriber) {
|
|
5
4
|
const baseData = {
|
|
6
5
|
sdkVersion: getVersion(),
|
|
7
|
-
logTime: Date.now()
|
|
8
|
-
model_name: getAIConfig(MIDSCENE_MODEL_NAME) || ''
|
|
6
|
+
logTime: Date.now()
|
|
9
7
|
};
|
|
10
8
|
const finalData = {
|
|
11
9
|
logId: uuid(),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"insight/utils.mjs","sources":["webpack://@midscene/core/./src/insight/utils.ts"],"sourcesContent":["import type {\n DumpMeta,\n DumpSubscriber,\n InsightDump,\n PartialInsightDumpFromSDK,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport {
|
|
1
|
+
{"version":3,"file":"insight/utils.mjs","sources":["webpack://@midscene/core/./src/insight/utils.ts"],"sourcesContent":["import type {\n DumpMeta,\n DumpSubscriber,\n InsightDump,\n PartialInsightDumpFromSDK,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport { uuid } from '@midscene/shared/utils';\n\nexport function emitInsightDump(\n data: PartialInsightDumpFromSDK,\n dumpSubscriber?: DumpSubscriber,\n) {\n const baseData: DumpMeta = {\n sdkVersion: getVersion(),\n logTime: Date.now(),\n };\n const finalData: InsightDump = {\n logId: uuid(),\n ...baseData,\n ...data,\n };\n\n dumpSubscriber?.(finalData);\n}\n"],"names":["emitInsightDump","data","dumpSubscriber","baseData","getVersion","Date","finalData","uuid"],"mappings":";;AASO,SAASA,gBACdC,IAA+B,EAC/BC,cAA+B;IAE/B,MAAMC,WAAqB;QACzB,YAAYC;QACZ,SAASC,KAAK,GAAG;IACnB;IACA,MAAMC,YAAyB;QAC7B,OAAOC;QACP,GAAGJ,QAAQ;QACX,GAAGF,IAAI;IACT;IAEAC,QAAAA,kBAAAA,eAAiBI;AACnB"}
|
package/dist/es/types.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.mjs","sources":["webpack://@midscene/core/./src/types.ts"],"sourcesContent":["/* eslint-disable @typescript-eslint/no-explicit-any */\n\nimport type { NodeType } from '@midscene/shared/constants';\nimport type {\n BaseElement,\n ElementTreeNode,\n Rect,\n Size,\n} from '@midscene/shared/types';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { DetailedLocateParam, MidsceneYamlFlowItem } from './yaml';\n\nexport type {\n ElementTreeNode,\n BaseElement,\n Rect,\n Size,\n Point,\n} from '@midscene/shared/types';\nexport * from './yaml';\n\nexport type AIUsageInfo = Record<string, any> & {\n prompt_tokens: number | undefined;\n completion_tokens: number | undefined;\n total_tokens: number | undefined;\n time_cost: number | undefined;\n};\n\n/**\n * openai\n *\n */\nexport enum AIResponseFormat {\n JSON = 'json_object',\n TEXT = 'text',\n}\n\nexport type AISingleElementResponseById = {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n};\n\nexport type AISingleElementResponseByPosition = {\n position?: {\n x: number;\n y: number;\n };\n bbox?: [number, number, number, number];\n reason: string;\n text: string;\n};\n\nexport type AISingleElementResponse = AISingleElementResponseById;\nexport interface AIElementLocatorResponse {\n elements: {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n }[];\n bbox?: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport interface AIElementCoordinatesResponse {\n bbox: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport type AIElementResponse =\n | AIElementLocatorResponse\n | AIElementCoordinatesResponse;\n\nexport interface AIDataExtractionResponse<DataDemand> {\n data: DataDemand;\n errors?: string[];\n thought?: string;\n}\n\nexport interface AISectionLocatorResponse {\n bbox: [number, number, number, number];\n references_bbox?: [number, number, number, number][];\n error?: string;\n}\n\nexport interface AIAssertionResponse {\n pass: boolean;\n thought: string;\n}\n\nexport interface AIDescribeElementResponse {\n description: string;\n error?: string;\n}\n\nexport interface LocatorValidatorOption {\n centerDistanceThreshold?: number;\n}\n\nexport interface LocateValidatorResult {\n pass: boolean;\n rect: Rect;\n center: [number, number];\n centerDistance?: number;\n}\n\nexport interface AgentDescribeElementAtPointResult {\n prompt: string;\n deepThink: boolean;\n verifyResult?: LocateValidatorResult;\n}\n\n/**\n * context\n */\n\nexport abstract class UIContext<ElementType extends BaseElement = BaseElement> {\n abstract screenshotBase64: string;\n\n abstract tree: ElementTreeNode<ElementType>;\n\n abstract size: Size;\n}\n\n/**\n * insight\n */\n\nexport type CallAIFn = <T>(\n messages: ChatCompletionMessageParam[],\n) => Promise<T>;\n\nexport interface InsightOptions {\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n aiVendorFn?: CallAIFn;\n}\n\nexport type EnsureObject<T> = { [K in keyof T]: any };\n\nexport type InsightAction = 'locate' | 'extract' | 'assert' | 'describe';\n\nexport type InsightExtractParam = string | Record<string, string>;\n\nexport type LocateResultElement = {\n id: string;\n indexId?: number;\n center: [number, number];\n rect: Rect;\n xpaths: string[];\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n isOrderSensitive?: boolean;\n};\n\nexport interface LocateResult {\n element: LocateResultElement | null;\n rect?: Rect;\n}\n\nexport interface InsightTaskInfo {\n durationMs: number;\n formatResponse?: string;\n rawResponse?: string;\n usage?: AIUsageInfo;\n searchArea?: Rect;\n searchAreaRawResponse?: string;\n searchAreaUsage?: AIUsageInfo;\n}\n\nexport interface DumpMeta {\n sdkVersion: string;\n logTime: number;\n model_name: string;\n model_description?: string;\n}\n\nexport interface ReportDumpWithAttributes {\n dumpString: string;\n attributes?: Record<string, any>;\n}\n\nexport interface InsightDump extends DumpMeta {\n type: 'locate' | 'extract' | 'assert';\n logId: string;\n userQuery: {\n element?: TUserPrompt;\n dataDemand?: InsightExtractParam;\n assertion?: TUserPrompt;\n };\n matchedElement: BaseElement[];\n matchedRect?: Rect;\n deepThink?: boolean;\n data: any;\n assertionPass?: boolean;\n assertionThought?: string;\n taskInfo: InsightTaskInfo;\n error?: string;\n output?: any;\n}\n\nexport type PartialInsightDumpFromSDK = Omit<\n InsightDump,\n 'sdkVersion' | 'logTime' | 'logId' | 'model_name'\n>;\n\nexport type DumpSubscriber = (dump: InsightDump) => Promise<void> | void;\n\n// intermediate variables to optimize the return value by AI\nexport interface LiteUISection {\n name: string;\n description: string;\n sectionCharacteristics: string;\n textIds: string[];\n}\n\nexport type ElementById = (id: string) => BaseElement | null;\n\nexport type InsightAssertionResponse = AIAssertionResponse & {\n usage?: AIUsageInfo;\n};\n\n/**\n * agent\n */\n\nexport type OnTaskStartTip = (tip: string) => Promise<void> | void;\n\nexport interface AgentWaitForOpt {\n checkIntervalMs?: number;\n timeoutMs?: number;\n}\n\nexport interface AgentAssertOpt {\n keepRawResponse?: boolean;\n}\n\n/**\n * planning\n *\n */\n\nexport interface PlanningLocateParam extends DetailedLocateParam {\n id?: string;\n bbox?: [number, number, number, number];\n}\n\nexport interface PlanningAction<ParamType = any> {\n thought?: string;\n type:\n | 'Locate'\n | 'Tap'\n | 'RightClick'\n | 'Hover'\n | 'Drag'\n | 'Input'\n | 'KeyboardPress'\n | 'Scroll'\n | 'Error'\n | 'Assert'\n | 'AssertWithoutThrow'\n | 'Sleep'\n | 'Finished'\n | 'AndroidBackButton'\n | 'AndroidHomeButton'\n | 'AndroidRecentAppsButton'\n | 'AndroidLongPress'\n | 'AndroidPull';\n param: ParamType;\n locate?: PlanningLocateParam | null;\n}\n\nexport interface PlanningAIResponse {\n action?: PlanningAction; // this is the qwen mode\n actions?: PlanningAction[];\n more_actions_needed_by_instruction: boolean;\n log: string;\n sleep?: number;\n error?: string;\n usage?: AIUsageInfo;\n rawResponse?: string;\n yamlFlow?: MidsceneYamlFlowItem[];\n yamlString?: string;\n}\n\nexport type PlanningActionParamTap = null;\nexport type PlanningActionParamHover = null;\nexport type PlanningActionParamRightClick = null;\n\nexport interface PlanningActionParamInputOrKeyPress {\n value: string;\n autoDismissKeyboard?: boolean;\n}\n\nexport interface PlanningActionParamAssert {\n assertion: TUserPrompt;\n}\n\nexport interface PlanningActionParamSleep {\n timeMs: number;\n}\n\nexport interface PlanningActionParamError {\n thought: string;\n}\n\nexport type PlanningActionParamWaitFor = AgentWaitForOpt & {\n assertion: string;\n};\n\nexport interface AndroidLongPressParam {\n duration?: number;\n}\n\nexport interface AndroidPullParam {\n direction: 'up' | 'down';\n distance?: number;\n duration?: number;\n}\n/**\n * misc\n */\n\nexport interface Color {\n name: string;\n hex: string;\n}\n\nexport interface BaseAgentParserOpt {\n selector?: string;\n ignoreMarker?: boolean;\n}\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PuppeteerParserOpt extends BaseAgentParserOpt {}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PlaywrightParserOpt extends BaseAgentParserOpt {}\n\n/*\naction\n*/\nexport interface ExecutionTaskProgressOptions {\n onTaskStart?: (task: ExecutionTask) => Promise<void> | void;\n}\n\nexport interface ExecutionRecorderItem {\n type: 'screenshot';\n ts: number;\n screenshot?: string;\n timing?: string;\n}\n\nexport type ExecutionTaskType =\n | 'Planning'\n | 'Insight'\n | 'Action'\n | 'Assertion'\n | 'Log';\n\nexport interface ExecutorContext {\n task: ExecutionTask;\n element?: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskApply<\n Type extends ExecutionTaskType = any,\n TaskParam = any,\n TaskOutput = any,\n TaskLog = any,\n> {\n type: Type;\n subType?: string;\n param?: TaskParam;\n thought?: string;\n locate?: PlanningLocateParam | null;\n pageContext?: UIContext;\n executor: (\n param: TaskParam,\n context: ExecutorContext,\n ) => // biome-ignore lint/suspicious/noConfusingVoidType: <explanation>\n | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>\n | undefined\n | void;\n}\n\nexport interface ExecutionTaskHitBy {\n from: string;\n context: Record<string, any>;\n}\n\nexport interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {\n output?: TaskOutput;\n log?: TaskLog;\n recorder?: ExecutionRecorderItem[];\n hitBy?: ExecutionTaskHitBy;\n}\n\nexport type ExecutionTask<\n E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<\n any,\n any,\n any\n >,\n> = E &\n ExecutionTaskReturn<\n E extends ExecutionTaskApply<any, any, infer TaskOutput, any>\n ? TaskOutput\n : unknown,\n E extends ExecutionTaskApply<any, any, any, infer TaskLog>\n ? TaskLog\n : unknown\n > & {\n status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';\n error?: Error;\n errorMessage?: string;\n errorStack?: string;\n timing?: {\n start: number;\n end?: number;\n cost?: number;\n };\n usage?: AIUsageInfo;\n };\n\nexport interface ExecutionDump extends DumpMeta {\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n}\n\n/*\ntask - insight-locate\n*/\nexport type ExecutionTaskInsightLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskInsightLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskInsightDumpLog {\n dump?: InsightDump;\n}\n\nexport type ExecutionTaskInsightLocateApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightLocateParam,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightLocate =\n ExecutionTask<ExecutionTaskInsightLocateApply>;\n\n/*\ntask - insight-query\n*/\nexport interface ExecutionTaskInsightQueryParam {\n dataDemand: InsightExtractParam;\n}\n\nexport interface ExecutionTaskInsightQueryOutput {\n data: any;\n}\n\nexport type ExecutionTaskInsightQueryApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightQueryParam,\n any,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightQuery =\n ExecutionTask<ExecutionTaskInsightQueryApply>;\n\n/*\ntask - assertion\n*/\nexport interface ExecutionTaskInsightAssertionParam {\n assertion: string;\n}\n\nexport type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightAssertionParam,\n InsightAssertionResponse,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightAssertion =\n ExecutionTask<ExecutionTaskInsightAssertionApply>;\n\n/*\ntask - action (i.e. interact) \n*/\nexport type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<\n 'Action',\n ActionParam,\n void,\n void\n>;\n\nexport type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;\n\n/*\ntask - Log\n*/\n\nexport type ExecutionTaskLogApply<\n LogParam = {\n content: string;\n },\n> = ExecutionTaskApply<'Log', LogParam, void, void>;\n\nexport type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;\n\n/*\ntask - planning\n*/\n\nexport type ExecutionTaskPlanningApply = ExecutionTaskApply<\n 'Planning',\n {\n userInstruction: string;\n log?: string;\n },\n PlanningAIResponse\n>;\n\nexport type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;\n\n/*\nGrouped dump\n*/\nexport interface GroupedActionDump {\n groupName: string;\n groupDescription?: string;\n executions: ExecutionDump[];\n}\n\nexport type PageType =\n | 'puppeteer'\n | 'playwright'\n | 'static'\n | 'chrome-extension-proxy'\n | 'android';\n\nexport interface StreamingCodeGenerationOptions {\n /** Whether to enable streaming output */\n stream?: boolean;\n /** Callback function to handle streaming chunks */\n onChunk?: StreamingCallback;\n /** Callback function to handle streaming completion */\n onComplete?: (finalCode: string) => void;\n /** Callback function to handle streaming errors */\n onError?: (error: Error) => void;\n}\n\nexport type StreamingCallback = (chunk: CodeGenerationChunk) => void;\n\nexport interface CodeGenerationChunk {\n /** The incremental content chunk */\n content: string;\n /** The reasoning content */\n reasoning_content: string;\n /** The accumulated content so far */\n accumulated: string;\n /** Whether this is the final chunk */\n isComplete: boolean;\n /** Token usage information if available */\n usage?: AIUsageInfo;\n}\n\nexport interface StreamingAIResponse {\n /** The final accumulated content */\n content: string;\n /** Token usage information */\n usage?: AIUsageInfo;\n /** Whether the response was streamed */\n isStreamed: boolean;\n}\n\nexport type TMultimodalPrompt = {\n /**\n * Support use image to inspect elements.\n * The \"images\" field is an object that uses image name as key and image url as value.\n * The image url can be a local path, a http link , or a base64 string.\n */\n images?: {\n name: string;\n url: string;\n }[];\n /**\n * By default, the image url in the \"images\" filed starts with `https://` or `http://` will be directly sent to the LLM.\n * In case the images are not accessible to the LLM (One common case is that image url is internal network only.), you can enable this option.\n * Then image will be download and convert to base64 format.\n */\n convertHttpImage2Base64?: boolean;\n};\n\nexport type TUserPrompt =\n | string\n | ({\n prompt: string;\n } & Partial<TMultimodalPrompt>);\n\nexport interface DeviceAction<ParamType = any> {\n name: string;\n description?: string;\n paramSchema?: string;\n paramDescription?: string;\n location?: 'required' | 'optional' | false;\n whatToLocate?: string; // what to locate if location is required or optional\n call: (context: ExecutorContext, param: ParamType) => Promise<void> | void;\n}\n"],"names":["AIResponseFormat","UIContext"],"mappings":";AAgCO,IAAKA,yBAAgBA,WAAAA,GAAAA,SAAhBA,gBAAgB;;;WAAhBA;;AAwFL,MAAeC;AAMtB"}
|
|
1
|
+
{"version":3,"file":"types.mjs","sources":["webpack://@midscene/core/./src/types.ts"],"sourcesContent":["/* eslint-disable @typescript-eslint/no-explicit-any */\n\nimport type { NodeType } from '@midscene/shared/constants';\nimport type {\n BaseElement,\n ElementTreeNode,\n Rect,\n Size,\n} from '@midscene/shared/types';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { DetailedLocateParam, MidsceneYamlFlowItem } from './yaml';\n\nexport type {\n ElementTreeNode,\n BaseElement,\n Rect,\n Size,\n Point,\n} from '@midscene/shared/types';\nexport * from './yaml';\n\nexport type AIUsageInfo = Record<string, any> & {\n prompt_tokens: number | undefined;\n completion_tokens: number | undefined;\n total_tokens: number | undefined;\n time_cost: number | undefined;\n model_name: string | undefined;\n};\n\n/**\n * openai\n *\n */\nexport enum AIResponseFormat {\n JSON = 'json_object',\n TEXT = 'text',\n}\n\nexport type AISingleElementResponseById = {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n};\n\nexport type AISingleElementResponseByPosition = {\n position?: {\n x: number;\n y: number;\n };\n bbox?: [number, number, number, number];\n reason: string;\n text: string;\n};\n\nexport type AISingleElementResponse = AISingleElementResponseById;\nexport interface AIElementLocatorResponse {\n elements: {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n }[];\n bbox?: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport interface AIElementCoordinatesResponse {\n bbox: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport type AIElementResponse =\n | AIElementLocatorResponse\n | AIElementCoordinatesResponse;\n\nexport interface AIDataExtractionResponse<DataDemand> {\n data: DataDemand;\n errors?: string[];\n thought?: string;\n}\n\nexport interface AISectionLocatorResponse {\n bbox: [number, number, number, number];\n references_bbox?: [number, number, number, number][];\n error?: string;\n}\n\nexport interface AIAssertionResponse {\n pass: boolean;\n thought: string;\n}\n\nexport interface AIDescribeElementResponse {\n description: string;\n error?: string;\n}\n\nexport interface LocatorValidatorOption {\n centerDistanceThreshold?: number;\n}\n\nexport interface LocateValidatorResult {\n pass: boolean;\n rect: Rect;\n center: [number, number];\n centerDistance?: number;\n}\n\nexport interface AgentDescribeElementAtPointResult {\n prompt: string;\n deepThink: boolean;\n verifyResult?: LocateValidatorResult;\n}\n\n/**\n * context\n */\n\nexport abstract class UIContext<ElementType extends BaseElement = BaseElement> {\n abstract screenshotBase64: string;\n\n abstract tree: ElementTreeNode<ElementType>;\n\n abstract size: Size;\n}\n\n/**\n * insight\n */\n\nexport type CallAIFn = <T>(\n messages: ChatCompletionMessageParam[],\n) => Promise<T>;\n\nexport interface InsightOptions {\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n aiVendorFn?: CallAIFn;\n}\n\nexport type EnsureObject<T> = { [K in keyof T]: any };\n\nexport type InsightAction = 'locate' | 'extract' | 'assert' | 'describe';\n\nexport type InsightExtractParam = string | Record<string, string>;\n\nexport type LocateResultElement = {\n id: string;\n indexId?: number;\n center: [number, number];\n rect: Rect;\n xpaths: string[];\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n isOrderSensitive?: boolean;\n};\n\nexport interface LocateResult {\n element: LocateResultElement | null;\n rect?: Rect;\n}\n\nexport interface InsightTaskInfo {\n durationMs: number;\n formatResponse?: string;\n rawResponse?: string;\n usage?: AIUsageInfo;\n searchArea?: Rect;\n searchAreaRawResponse?: string;\n searchAreaUsage?: AIUsageInfo;\n}\n\nexport interface DumpMeta {\n sdkVersion: string;\n logTime: number;\n}\n\nexport interface ReportDumpWithAttributes {\n dumpString: string;\n attributes?: Record<string, any>;\n}\n\nexport interface InsightDump extends DumpMeta {\n type: 'locate' | 'extract' | 'assert';\n logId: string;\n userQuery: {\n element?: TUserPrompt;\n dataDemand?: InsightExtractParam;\n assertion?: TUserPrompt;\n };\n matchedElement: BaseElement[];\n matchedRect?: Rect;\n deepThink?: boolean;\n data: any;\n assertionPass?: boolean;\n assertionThought?: string;\n taskInfo: InsightTaskInfo;\n error?: string;\n output?: any;\n}\n\nexport type PartialInsightDumpFromSDK = Omit<\n InsightDump,\n 'sdkVersion' | 'logTime' | 'logId' | 'model_name'\n>;\n\nexport type DumpSubscriber = (dump: InsightDump) => Promise<void> | void;\n\n// intermediate variables to optimize the return value by AI\nexport interface LiteUISection {\n name: string;\n description: string;\n sectionCharacteristics: string;\n textIds: string[];\n}\n\nexport type ElementById = (id: string) => BaseElement | null;\n\nexport type InsightAssertionResponse = AIAssertionResponse & {\n usage?: AIUsageInfo;\n};\n\n/**\n * agent\n */\n\nexport type OnTaskStartTip = (tip: string) => Promise<void> | void;\n\nexport interface AgentWaitForOpt {\n checkIntervalMs?: number;\n timeoutMs?: number;\n}\n\nexport interface AgentAssertOpt {\n keepRawResponse?: boolean;\n}\n\n/**\n * planning\n *\n */\n\nexport interface PlanningLocateParam extends DetailedLocateParam {\n id?: string;\n bbox?: [number, number, number, number];\n}\n\nexport interface PlanningAction<ParamType = any> {\n thought?: string;\n type:\n | 'Locate'\n | 'Tap'\n | 'RightClick'\n | 'Hover'\n | 'Drag'\n | 'Input'\n | 'KeyboardPress'\n | 'Scroll'\n | 'Error'\n | 'Assert'\n | 'AssertWithoutThrow'\n | 'Sleep'\n | 'Finished'\n | 'AndroidBackButton'\n | 'AndroidHomeButton'\n | 'AndroidRecentAppsButton'\n | 'AndroidLongPress'\n | 'AndroidPull';\n param: ParamType;\n locate?: PlanningLocateParam | null;\n}\n\nexport interface PlanningAIResponse {\n action?: PlanningAction; // this is the qwen mode\n actions?: PlanningAction[];\n more_actions_needed_by_instruction: boolean;\n log: string;\n sleep?: number;\n error?: string;\n usage?: AIUsageInfo;\n rawResponse?: string;\n yamlFlow?: MidsceneYamlFlowItem[];\n yamlString?: string;\n}\n\nexport type PlanningActionParamTap = null;\nexport type PlanningActionParamHover = null;\nexport type PlanningActionParamRightClick = null;\n\nexport interface PlanningActionParamInputOrKeyPress {\n value: string;\n autoDismissKeyboard?: boolean;\n}\n\nexport interface PlanningActionParamAssert {\n assertion: TUserPrompt;\n}\n\nexport interface PlanningActionParamSleep {\n timeMs: number;\n}\n\nexport interface PlanningActionParamError {\n thought: string;\n}\n\nexport type PlanningActionParamWaitFor = AgentWaitForOpt & {\n assertion: string;\n};\n\nexport interface AndroidLongPressParam {\n duration?: number;\n}\n\nexport interface AndroidPullParam {\n direction: 'up' | 'down';\n distance?: number;\n duration?: number;\n}\n/**\n * misc\n */\n\nexport interface Color {\n name: string;\n hex: string;\n}\n\nexport interface BaseAgentParserOpt {\n selector?: string;\n ignoreMarker?: boolean;\n}\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PuppeteerParserOpt extends BaseAgentParserOpt {}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PlaywrightParserOpt extends BaseAgentParserOpt {}\n\n/*\naction\n*/\nexport interface ExecutionTaskProgressOptions {\n onTaskStart?: (task: ExecutionTask) => Promise<void> | void;\n}\n\nexport interface ExecutionRecorderItem {\n type: 'screenshot';\n ts: number;\n screenshot?: string;\n timing?: string;\n}\n\nexport type ExecutionTaskType =\n | 'Planning'\n | 'Insight'\n | 'Action'\n | 'Assertion'\n | 'Log';\n\nexport interface ExecutorContext {\n task: ExecutionTask;\n element?: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskApply<\n Type extends ExecutionTaskType = any,\n TaskParam = any,\n TaskOutput = any,\n TaskLog = any,\n> {\n type: Type;\n subType?: string;\n param?: TaskParam;\n thought?: string;\n locate?: PlanningLocateParam | null;\n pageContext?: UIContext;\n executor: (\n param: TaskParam,\n context: ExecutorContext,\n ) => // biome-ignore lint/suspicious/noConfusingVoidType: <explanation>\n | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>\n | undefined\n | void;\n}\n\nexport interface ExecutionTaskHitBy {\n from: string;\n context: Record<string, any>;\n}\n\nexport interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {\n output?: TaskOutput;\n log?: TaskLog;\n recorder?: ExecutionRecorderItem[];\n hitBy?: ExecutionTaskHitBy;\n}\n\nexport type ExecutionTask<\n E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<\n any,\n any,\n any\n >,\n> = E &\n ExecutionTaskReturn<\n E extends ExecutionTaskApply<any, any, infer TaskOutput, any>\n ? TaskOutput\n : unknown,\n E extends ExecutionTaskApply<any, any, any, infer TaskLog>\n ? TaskLog\n : unknown\n > & {\n status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';\n error?: Error;\n errorMessage?: string;\n errorStack?: string;\n timing?: {\n start: number;\n end?: number;\n cost?: number;\n };\n usage?: AIUsageInfo;\n };\n\nexport interface ExecutionDump extends DumpMeta {\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n}\n\n/*\ntask - insight-locate\n*/\nexport type ExecutionTaskInsightLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskInsightLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskInsightDumpLog {\n dump?: InsightDump;\n}\n\nexport type ExecutionTaskInsightLocateApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightLocateParam,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightLocate =\n ExecutionTask<ExecutionTaskInsightLocateApply>;\n\n/*\ntask - insight-query\n*/\nexport interface ExecutionTaskInsightQueryParam {\n dataDemand: InsightExtractParam;\n}\n\nexport interface ExecutionTaskInsightQueryOutput {\n data: any;\n}\n\nexport type ExecutionTaskInsightQueryApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightQueryParam,\n any,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightQuery =\n ExecutionTask<ExecutionTaskInsightQueryApply>;\n\n/*\ntask - assertion\n*/\nexport interface ExecutionTaskInsightAssertionParam {\n assertion: string;\n}\n\nexport type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightAssertionParam,\n InsightAssertionResponse,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightAssertion =\n ExecutionTask<ExecutionTaskInsightAssertionApply>;\n\n/*\ntask - action (i.e. interact) \n*/\nexport type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<\n 'Action',\n ActionParam,\n void,\n void\n>;\n\nexport type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;\n\n/*\ntask - Log\n*/\n\nexport type ExecutionTaskLogApply<\n LogParam = {\n content: string;\n },\n> = ExecutionTaskApply<'Log', LogParam, void, void>;\n\nexport type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;\n\n/*\ntask - planning\n*/\n\nexport type ExecutionTaskPlanningApply = ExecutionTaskApply<\n 'Planning',\n {\n userInstruction: string;\n log?: string;\n },\n PlanningAIResponse\n>;\n\nexport type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;\n\n/*\nGrouped dump\n*/\nexport interface GroupedActionDump {\n groupName: string;\n groupDescription?: string;\n modelName: string;\n modelDescription: string;\n executions: ExecutionDump[];\n}\n\nexport type PageType =\n | 'puppeteer'\n | 'playwright'\n | 'static'\n | 'chrome-extension-proxy'\n | 'android';\n\nexport interface StreamingCodeGenerationOptions {\n /** Whether to enable streaming output */\n stream?: boolean;\n /** Callback function to handle streaming chunks */\n onChunk?: StreamingCallback;\n /** Callback function to handle streaming completion */\n onComplete?: (finalCode: string) => void;\n /** Callback function to handle streaming errors */\n onError?: (error: Error) => void;\n}\n\nexport type StreamingCallback = (chunk: CodeGenerationChunk) => void;\n\nexport interface CodeGenerationChunk {\n /** The incremental content chunk */\n content: string;\n /** The reasoning content */\n reasoning_content: string;\n /** The accumulated content so far */\n accumulated: string;\n /** Whether this is the final chunk */\n isComplete: boolean;\n /** Token usage information if available */\n usage?: AIUsageInfo;\n}\n\nexport interface StreamingAIResponse {\n /** The final accumulated content */\n content: string;\n /** Token usage information */\n usage?: AIUsageInfo;\n /** Whether the response was streamed */\n isStreamed: boolean;\n}\n\nexport type TMultimodalPrompt = {\n /**\n * Support use image to inspect elements.\n * The \"images\" field is an object that uses image name as key and image url as value.\n * The image url can be a local path, a http link , or a base64 string.\n */\n images?: {\n name: string;\n url: string;\n }[];\n /**\n * By default, the image url in the \"images\" filed starts with `https://` or `http://` will be directly sent to the LLM.\n * In case the images are not accessible to the LLM (One common case is that image url is internal network only.), you can enable this option.\n * Then image will be download and convert to base64 format.\n */\n convertHttpImage2Base64?: boolean;\n};\n\nexport type TUserPrompt =\n | string\n | ({\n prompt: string;\n } & Partial<TMultimodalPrompt>);\n\nexport interface DeviceAction<ParamType = any> {\n name: string;\n interfaceAlias?: string;\n description?: string;\n paramSchema?: string;\n paramDescription?: string;\n location?: 'required' | 'optional' | false;\n whatToLocate?: string; // what to locate if location is required or optional\n call: (context: ExecutorContext, param: ParamType) => Promise<void> | void;\n}\n"],"names":["AIResponseFormat","UIContext"],"mappings":";AAiCO,IAAKA,yBAAgBA,WAAAA,GAAAA,SAAhBA,gBAAgB;;;WAAhBA;;AAwFL,MAAeC;AAMtB"}
|