@midscene/core 1.8.7 → 1.8.8-beta-20260601092605.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +4 -0
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +1 -1
- package/dist/es/ai-model/llm-planning.mjs +4 -2
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +1 -1
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/device/index.mjs.map +1 -1
- package/dist/es/service/index.mjs +2 -2
- package/dist/es/service/index.mjs.map +1 -1
- package/dist/es/utils.mjs +3 -3
- package/dist/es/utils.mjs.map +1 -1
- package/dist/es/yaml/utils.mjs +45 -6
- package/dist/es/yaml/utils.mjs.map +1 -1
- package/dist/lib/agent/agent.js +4 -0
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/utils.js +1 -1
- package/dist/lib/ai-model/llm-planning.js +4 -2
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +1 -1
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/device/index.js.map +1 -1
- package/dist/lib/service/index.js +3 -3
- package/dist/lib/service/index.js.map +1 -1
- package/dist/lib/utils.js +3 -3
- package/dist/lib/utils.js.map +1 -1
- package/dist/lib/yaml/utils.js +45 -6
- package/dist/lib/yaml/utils.js.map +1 -1
- package/dist/types/ai-model/service-caller/index.d.ts +1 -0
- package/dist/types/device/device-options.d.ts +2 -0
- package/dist/types/device/index.d.ts +1 -0
- package/package.json +2 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/service-caller/index.mjs","sources":["../../../../src/ai-model/service-caller/index.ts"],"sourcesContent":["import type { AIUsageInfo } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\n\n// Error class that preserves usage and rawResponse when AI call parsing fails\nexport class AIResponseParseError extends Error {\n usage?: AIUsageInfo;\n rawResponse: string;\n\n constructor(message: string, rawResponse: string, usage?: AIUsageInfo) {\n super(message);\n this.name = 'AIResponseParseError';\n this.rawResponse = rawResponse;\n this.usage = usage;\n }\n}\nimport {\n type IModelConfig,\n MIDSCENE_LANGFUSE_DEBUG,\n MIDSCENE_LANGSMITH_DEBUG,\n type TModelFamily,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert, ifInBrowser } from '@midscene/shared/utils';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport type { AIArgs } from '../../common';\nimport { isAutoGLM, isUITars } from '../auto-glm/util';\nimport {\n callAIWithCodexAppServer,\n isCodexAppServerProvider,\n} from './codex-app-server';\nimport { shouldForceOriginalImageDetail } from './image-detail';\nimport {\n buildRequestAbortSignal,\n isHardTimeoutError,\n resolveEffectiveTimeoutMs,\n} from './request-timeout';\n\nasync function createChatClient({\n modelConfig,\n}: {\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n modelName: string;\n modelDescription: string;\n uiTarsModelVersion?: UITarsModelVersion;\n modelFamily: TModelFamily | undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n modelDescription,\n uiTarsModelVersion,\n modelFamily,\n createOpenAIClient,\n timeout,\n } = modelConfig;\n\n let proxyAgent: any = undefined;\n const warnClient = getDebug('ai:call', { console: true });\n const debugProxy = getDebug('ai:call:proxy');\n const warnProxy = getDebug('ai:call:proxy', { console: true });\n\n // Helper function to sanitize proxy URL for logging (remove credentials)\n // Uses URL API instead of regex to avoid ReDoS vulnerabilities\n const sanitizeProxyUrl = (url: string): string => {\n try {\n const parsed = new URL(url);\n if (parsed.username) {\n // Keep username for debugging, hide password for security\n parsed.password = '****';\n return parsed.href;\n }\n return url;\n } catch {\n // If URL parsing fails, return original URL (will be caught later)\n return url;\n }\n };\n\n if (httpProxy) {\n debugProxy('using http proxy', sanitizeProxyUrl(httpProxy));\n if (ifInBrowser) {\n warnProxy(\n 'HTTP proxy is configured but not supported in browser environment',\n );\n } else {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'undici';\n const { ProxyAgent } = await import(moduleName);\n proxyAgent = new ProxyAgent({\n uri: httpProxy,\n // Note: authentication is handled via the URI (e.g., http://user:pass@proxy.com:8080)\n });\n }\n } else if (socksProxy) {\n debugProxy('using socks proxy', sanitizeProxyUrl(socksProxy));\n if (ifInBrowser) {\n warnProxy(\n 'SOCKS proxy is configured but not supported in browser environment',\n );\n } else {\n try {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'fetch-socks';\n const { socksDispatcher } = await import(moduleName);\n // Parse SOCKS proxy URL (e.g., socks5://127.0.0.1:1080)\n const proxyUrl = new URL(socksProxy);\n\n // Validate hostname\n if (!proxyUrl.hostname) {\n throw new Error('SOCKS proxy URL must include a valid hostname');\n }\n\n // Validate and parse port\n const port = Number.parseInt(proxyUrl.port, 10);\n if (!proxyUrl.port || Number.isNaN(port)) {\n throw new Error('SOCKS proxy URL must include a valid port');\n }\n\n // Parse SOCKS version from protocol\n const protocol = proxyUrl.protocol.replace(':', '');\n const socksType =\n protocol === 'socks4' ? 4 : protocol === 'socks5' ? 5 : 5;\n\n proxyAgent = socksDispatcher({\n type: socksType,\n host: proxyUrl.hostname,\n port,\n ...(proxyUrl.username\n ? {\n userId: decodeURIComponent(proxyUrl.username),\n password: decodeURIComponent(proxyUrl.password || ''),\n }\n : {}),\n });\n debugProxy('socks proxy configured successfully', {\n type: socksType,\n host: proxyUrl.hostname,\n port: port,\n });\n } catch (error) {\n warnProxy('Failed to configure SOCKS proxy:', error);\n throw new Error(\n `Invalid SOCKS proxy URL: ${socksProxy}. Expected format: socks4://host:port, socks5://host:port, or with authentication: socks5://user:pass@host:port`,\n );\n }\n }\n }\n\n const effectiveTimeoutMs = resolveEffectiveTimeoutMs({ timeout });\n const openAIOptions = {\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n // Use fetchOptions.dispatcher for fetch-based SDK instead of httpAgent\n // Note: Type assertion needed due to undici version mismatch between dependencies\n ...(proxyAgent ? { fetchOptions: { dispatcher: proxyAgent as any } } : {}),\n ...openaiExtraConfig,\n // Midscene already handles retries in callAI(), so disable SDK-level retries\n // to avoid duplicate attempts and duplicated backoff latency.\n maxRetries: 0,\n // When disabled (timeoutMs === null) fall through to the SDK default so\n // only the caller-provided abortSignal can cancel the request.\n ...(effectiveTimeoutMs !== null ? { timeout: effectiveTimeoutMs } : {}),\n dangerouslyAllowBrowser: true,\n };\n\n const baseOpenAI = new OpenAI(openAIOptions);\n\n let openai: OpenAI = baseOpenAI;\n\n // LangSmith wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n warnClient('DEBUGGING MODE: langsmith wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langsmithModule = 'langsmith/wrappers';\n const { wrapOpenAI } = await import(langsmithModule);\n openai = wrapOpenAI(openai);\n }\n\n // Langfuse wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGFUSE_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langfuse is not supported in browser');\n }\n warnClient('DEBUGGING MODE: langfuse wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langfuseModule = '@langfuse/openai';\n const { observeOpenAI } = await import(langfuseModule);\n openai = observeOpenAI(openai);\n }\n\n if (createOpenAIClient) {\n const wrappedClient = await createOpenAIClient(baseOpenAI, openAIOptions);\n\n if (wrappedClient) {\n openai = wrappedClient as OpenAI;\n }\n }\n\n return {\n completion: openai.chat.completions,\n modelName,\n modelDescription,\n uiTarsModelVersion,\n modelFamily,\n };\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n abortSignal?: AbortSignal;\n },\n): Promise<{\n content: string;\n reasoning_content?: string;\n usage?: AIUsageInfo;\n isStreamed: boolean;\n}> {\n if (isCodexAppServerProvider(modelConfig.openaiBaseURL)) {\n if (\n !modelConfig.modelFamily &&\n hasExplicitReasoningConfig({\n reasoningEnabled: modelConfig.reasoningEnabled,\n reasoningEffort: modelConfig.reasoningEffort,\n reasoningBudget: modelConfig.reasoningBudget,\n })\n ) {\n throw new Error(\n 'Reasoning config requires MIDSCENE_MODEL_FAMILY. Set MIDSCENE_MODEL_FAMILY when using MIDSCENE_MODEL_REASONING_ENABLED / MIDSCENE_MODEL_REASONING_EFFORT / MIDSCENE_MODEL_REASONING_BUDGET.',\n );\n }\n\n return callAIWithCodexAppServer(messages, modelConfig, {\n stream: options?.stream,\n onChunk: options?.onChunk,\n reasoningEnabled: modelConfig.reasoningEnabled,\n abortSignal: options?.abortSignal,\n });\n }\n\n const {\n completion,\n modelName,\n modelDescription,\n uiTarsModelVersion,\n modelFamily,\n } = await createChatClient({\n modelConfig,\n });\n const effectiveTimeoutMs = resolveEffectiveTimeoutMs(modelConfig);\n\n const extraBody = modelConfig.extraBody;\n\n const debugCall = getDebug('ai:call');\n const warnCall = getDebug('ai:call', { console: true });\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n\n const temperature = (() => {\n if (modelFamily === 'gpt-5') {\n debugCall('temperature is ignored for gpt-5');\n return undefined;\n }\n return modelConfig.temperature ?? 0;\n })();\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let accumulatedReasoning = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n let requestId: string | null | undefined;\n\n const hasUsableText = (value: string | null | undefined): value is string =>\n typeof value === 'string' && value.trim().length > 0;\n\n const buildUsageInfo = (\n usageData?: OpenAI.CompletionUsage,\n requestId?: string | null,\n ) => {\n if (!usageData) return undefined;\n\n const cachedInputTokens = (\n usageData as { prompt_tokens_details?: { cached_tokens?: number } }\n )?.prompt_tokens_details?.cached_tokens;\n\n return {\n prompt_tokens: usageData.prompt_tokens ?? 0,\n completion_tokens: usageData.completion_tokens ?? 0,\n total_tokens: usageData.total_tokens ?? 0,\n cached_input: cachedInputTokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n slot: modelConfig.slot,\n intent: undefined,\n request_id: requestId ?? undefined,\n } satisfies AIUsageInfo;\n };\n\n const commonConfig = {\n temperature,\n stream: !!isStreaming,\n ...(modelFamily === 'qwen2.5-vl' // qwen vl v2 specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n if (isAutoGLM(modelFamily)) {\n (commonConfig as unknown as Record<string, number>).top_p = 0.85;\n (commonConfig as unknown as Record<string, number>).frequency_penalty = 0.2;\n }\n\n const {\n config: reasoningEffortConfig,\n debugMessage: reasoningEffortDebugMessage,\n } = resolveReasoningConfig({\n reasoningEnabled: modelConfig.reasoningEnabled,\n reasoningEffort: modelConfig.reasoningEffort,\n reasoningBudget: modelConfig.reasoningBudget,\n modelFamily,\n });\n if (reasoningEffortDebugMessage) {\n debugCall(reasoningEffortDebugMessage);\n }\n\n const shouldUseOriginalImageDetail =\n shouldForceOriginalImageDetail(modelConfig);\n\n // For default-intent GPT-5 calls, request original image detail to preserve\n // screenshot resolution for localization-sensitive tasks.\n const messagesWithImageDetail: ChatCompletionMessageParam[] = (() => {\n if (!shouldUseOriginalImageDetail) {\n return messages;\n }\n\n return messages.map((msg) => {\n if (!Array.isArray(msg.content)) {\n return msg;\n }\n\n const content = msg.content.map((part) => {\n if (part && part.type === 'image_url' && part.image_url?.url) {\n return {\n ...part,\n image_url: {\n ...part.image_url,\n detail: 'original',\n },\n };\n }\n return part;\n });\n\n return {\n ...msg,\n content,\n } as ChatCompletionMessageParam;\n });\n })();\n\n try {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const { signal: streamSignal, cleanup: cleanupStreamSignal } =\n buildRequestAbortSignal(effectiveTimeoutMs, options?.abortSignal);\n try {\n const stream = (await completion.create(\n {\n model: modelName,\n messages: messagesWithImageDetail,\n ...commonConfig,\n ...reasoningEffortConfig,\n ...extraBody,\n },\n {\n stream: true,\n signal: streamSignal,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n requestId = stream._request_id;\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n accumulatedReasoning += reasoning_content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: buildUsageInfo(usage, requestId),\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n } finally {\n cleanupStreamSignal();\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${modelFamily || 'default'}, cost-ms, ${timeCost}, temperature, ${temperature ?? ''}`,\n );\n } else {\n // Non-streaming with retry logic\n const retryCount = modelConfig.retryCount ?? 1;\n const retryInterval = modelConfig.retryInterval ?? 2000;\n const maxAttempts = retryCount + 1; // retryCount=1 means 2 total attempts (1 initial + 1 retry)\n\n let lastError: Error | undefined;\n\n for (let attempt = 1; attempt <= maxAttempts; attempt++) {\n const { signal: attemptSignal, cleanup: cleanupAttemptSignal } =\n buildRequestAbortSignal(effectiveTimeoutMs, options?.abortSignal);\n try {\n const result = await completion.create(\n {\n model: modelName,\n messages: messagesWithImageDetail,\n ...commonConfig,\n ...reasoningEffortConfig,\n ...extraBody,\n } as any,\n { signal: attemptSignal },\n );\n\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${modelFamily || 'default'}, ui-tars-version, ${uiTarsModelVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}, temperature, ${temperature ?? ''}`,\n );\n\n debugProfileDetail(\n `model usage detail: ${JSON.stringify(result.usage)}`,\n );\n\n if (!result.choices) {\n throw new Error(\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n }\n\n content = result.choices[0].message.content!;\n accumulatedReasoning =\n (result.choices[0].message as any)?.reasoning_content || '';\n usage = result.usage;\n requestId = result._request_id;\n\n if (!hasUsableText(content) && hasUsableText(accumulatedReasoning)) {\n warnCall('empty content from AI model, using reasoning content');\n content = accumulatedReasoning;\n }\n\n if (!hasUsableText(content)) {\n throw new AIResponseParseError(\n 'empty content from AI model',\n JSON.stringify(result),\n buildUsageInfo(usage, requestId),\n );\n }\n\n break; // Success, exit retry loop\n } catch (error) {\n lastError = error as Error;\n const wasHardTimeout = isHardTimeoutError(lastError);\n if (wasHardTimeout) {\n warnCall(\n `AI call hit hard timeout (${effectiveTimeoutMs}ms, attempt ${attempt}/${maxAttempts}, model ${modelName}, slot ${modelConfig.slot})`,\n );\n }\n // Do not retry if the request was aborted by the caller\n if (options?.abortSignal?.aborted) {\n break;\n }\n if (attempt < maxAttempts) {\n warnCall(\n `AI call failed (attempt ${attempt}/${maxAttempts}), retrying in ${retryInterval}ms... Error: ${lastError.message}`,\n );\n await new Promise((resolve) => setTimeout(resolve, retryInterval));\n }\n } finally {\n cleanupAttemptSignal();\n }\n }\n\n if (!content) {\n throw lastError;\n }\n }\n\n debugCall(`response reasoning content: ${accumulatedReasoning}`);\n debugCall(`response content: ${content}`);\n\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n } as OpenAI.CompletionUsage;\n }\n\n return {\n content: content || '',\n reasoning_content: accumulatedReasoning || undefined,\n usage: buildUsageInfo(usage, requestId),\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n warnCall('call AI error', e);\n\n if (e instanceof AIResponseParseError) {\n throw e;\n }\n\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service (${modelName}): ${e.message}\\nTrouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport async function callAIWithObjectResponse<T>(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n abortSignal?: AbortSignal;\n },\n): Promise<{\n content: T;\n contentString: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const response = await callAI(messages, modelConfig, {\n abortSignal: options?.abortSignal,\n });\n assert(response, 'empty response');\n const modelFamily = modelConfig.modelFamily;\n const jsonContent = safeParseJson(response.content, modelFamily);\n if (typeof jsonContent !== 'object') {\n throw new AIResponseParseError(\n `failed to parse json response from model (${modelConfig.modelName}): ${response.content}`,\n response.content,\n response.usage,\n );\n }\n return {\n content: jsonContent,\n contentString: response.content,\n usage: response.usage,\n reasoning_content: response.reasoning_content,\n };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n modelConfig: IModelConfig,\n options?: {\n abortSignal?: AbortSignal;\n },\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, modelConfig, {\n abortSignal: options?.abortSignal,\n });\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nfunction hasExplicitReasoningConfig({\n reasoningEnabled,\n reasoningEffort,\n reasoningBudget,\n}: {\n reasoningEnabled?: boolean;\n reasoningEffort?: string;\n reasoningBudget?: number;\n}): boolean {\n return (\n reasoningEnabled !== undefined ||\n !!reasoningEffort ||\n reasoningBudget !== undefined\n );\n}\n\nconst SUPPORTED_REASONING_FAMILIES = [\n 'qwen3-vl',\n 'qwen3.5',\n 'qwen3.6',\n 'doubao-vision',\n 'doubao-seed',\n 'glm-v',\n] as const satisfies readonly TModelFamily[];\n\ntype SupportedReasoningFamily = (typeof SUPPORTED_REASONING_FAMILIES)[number];\n\nfunction isSupportedReasoningFamily(\n family: TModelFamily | undefined,\n): family is SupportedReasoningFamily {\n return (\n !!family &&\n (SUPPORTED_REASONING_FAMILIES as readonly TModelFamily[]).includes(family)\n );\n}\n\nfunction supportedReasoningFamilyNames(): string {\n return SUPPORTED_REASONING_FAMILIES.join(', ');\n}\n\nexport function resolveReasoningConfig({\n reasoningEnabled,\n reasoningEffort,\n reasoningBudget,\n modelFamily,\n}: {\n reasoningEnabled?: boolean;\n reasoningEffort?: string;\n reasoningBudget?: number;\n modelFamily?: TModelFamily;\n}): {\n config: Record<string, unknown>;\n debugMessage?: string;\n} {\n const hasExplicitConfig = hasExplicitReasoningConfig({\n reasoningEnabled,\n reasoningEffort,\n reasoningBudget,\n });\n\n if (hasExplicitConfig) {\n if (!modelFamily) {\n throw new Error(\n `Reasoning config requires MIDSCENE_MODEL_FAMILY. Set MIDSCENE_MODEL_FAMILY to a supported family such as ${supportedReasoningFamilyNames()}, or remove MIDSCENE_MODEL_REASONING_ENABLED / MIDSCENE_MODEL_REASONING_EFFORT / MIDSCENE_MODEL_REASONING_BUDGET.`,\n );\n }\n\n // GPT-5 over Chat Completions is intentionally unsupported here because\n // its reasoning effort compatibility varies by model version.\n if (!isSupportedReasoningFamily(modelFamily)) {\n throw new Error(\n `Reasoning config is not supported for model family \"${modelFamily}\". Use a supported family such as ${supportedReasoningFamilyNames()}, or remove MIDSCENE_MODEL_REASONING_ENABLED / MIDSCENE_MODEL_REASONING_EFFORT / MIDSCENE_MODEL_REASONING_BUDGET.`,\n );\n }\n } else if (!isSupportedReasoningFamily(modelFamily)) {\n return { config: {} };\n }\n\n const effectiveReasoningEnabled = reasoningEnabled ?? false;\n\n const debugMessages: string[] = [];\n const config: Record<string, unknown> = {};\n\n if (\n modelFamily === 'qwen3-vl' ||\n modelFamily === 'qwen3.5' ||\n modelFamily === 'qwen3.6'\n ) {\n // reasoningEnabled → enable_thinking\n config.enable_thinking = effectiveReasoningEnabled;\n debugMessages.push(`enable_thinking=${effectiveReasoningEnabled}`);\n // reasoningBudget → thinking_budget\n if (reasoningBudget !== undefined) {\n config.thinking_budget = reasoningBudget;\n debugMessages.push(`thinking_budget=${reasoningBudget}`);\n }\n // reasoningEffort is ignored for qwen\n } else if (modelFamily === 'doubao-vision' || modelFamily === 'doubao-seed') {\n // reasoningEnabled → thinking.type\n config.thinking = {\n type: effectiveReasoningEnabled ? 'enabled' : 'disabled',\n };\n debugMessages.push(\n `thinking.type=${effectiveReasoningEnabled ? 'enabled' : 'disabled'}`,\n );\n // reasoningEffort → reasoning_effort\n if (reasoningEffort) {\n config.reasoning_effort = reasoningEffort;\n debugMessages.push(`reasoning_effort=\"${reasoningEffort}\"`);\n }\n // reasoningBudget is ignored for doubao\n } else if (modelFamily === 'glm-v') {\n // reasoningEnabled → thinking.type\n config.thinking = {\n type: effectiveReasoningEnabled ? 'enabled' : 'disabled',\n };\n debugMessages.push(\n `thinking.type=${effectiveReasoningEnabled ? 'enabled' : 'disabled'}`,\n );\n // reasoningEffort and reasoningBudget are ignored for glm-v\n }\n\n return {\n config,\n debugMessage: debugMessages.length\n ? `reasoning config for ${modelFamily}: ${debugMessages.join(', ')}`\n : undefined,\n };\n}\n\n/**\n * Normalize a parsed JSON object by trimming whitespace from:\n * 1. All object keys (e.g., \" prompt \" -> \"prompt\")\n * 2. All string values (e.g., \" Tap \" -> \"Tap\")\n * This handles LLM output that may include leading/trailing spaces.\n */\nfunction normalizeJsonObject(obj: any): any {\n // Handle null and undefined\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n // Handle arrays - recursively normalize each element\n if (Array.isArray(obj)) {\n return obj.map((item) => normalizeJsonObject(item));\n }\n\n // Handle objects\n if (typeof obj === 'object') {\n const normalized: any = {};\n\n for (const [key, value] of Object.entries(obj)) {\n // Trim the key to remove leading/trailing spaces\n const trimmedKey = key.trim();\n\n // Recursively normalize the value\n let normalizedValue = normalizeJsonObject(value);\n\n // Trim all string values\n if (typeof normalizedValue === 'string') {\n normalizedValue = normalizedValue.trim();\n }\n\n normalized[trimmedKey] = normalizedValue;\n }\n\n return normalized;\n }\n\n // Handle primitive strings\n if (typeof obj === 'string') {\n return obj.trim();\n }\n\n // Return other primitives as-is\n return obj;\n}\n\nexport function safeParseJson(\n input: string,\n modelFamily: TModelFamily | undefined,\n) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n\n let parsed: any;\n let lastError: unknown;\n try {\n parsed = JSON.parse(cleanJsonString);\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n try {\n parsed = JSON.parse(jsonrepair(cleanJsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n\n if (\n modelFamily === 'doubao-vision' ||\n modelFamily === 'doubao-seed' ||\n isUITars(modelFamily)\n ) {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n try {\n parsed = JSON.parse(jsonrepair(jsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n }\n throw Error(\n `failed to parse LLM response into JSON. Error - ${String(\n lastError ?? 'unknown error',\n )}. Response - \\n ${input}`,\n );\n}\n"],"names":["AIResponseParseError","Error","message","rawResponse","usage","createChatClient","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","modelDescription","uiTarsModelVersion","modelFamily","createOpenAIClient","timeout","proxyAgent","warnClient","getDebug","debugProxy","warnProxy","sanitizeProxyUrl","url","parsed","URL","ifInBrowser","moduleName","ProxyAgent","socksDispatcher","proxyUrl","port","Number","protocol","socksType","decodeURIComponent","error","effectiveTimeoutMs","resolveEffectiveTimeoutMs","openAIOptions","baseOpenAI","OpenAI","openai","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","langsmithModule","wrapOpenAI","MIDSCENE_LANGFUSE_DEBUG","langfuseModule","observeOpenAI","wrappedClient","callAI","messages","options","isCodexAppServerProvider","hasExplicitReasoningConfig","callAIWithCodexAppServer","completion","extraBody","debugCall","warnCall","debugProfileStats","debugProfileDetail","startTime","Date","temperature","isStreaming","content","accumulated","accumulatedReasoning","timeCost","requestId","hasUsableText","value","buildUsageInfo","usageData","cachedInputTokens","undefined","commonConfig","isAutoGLM","reasoningEffortConfig","reasoningEffortDebugMessage","resolveReasoningConfig","shouldUseOriginalImageDetail","shouldForceOriginalImageDetail","messagesWithImageDetail","msg","Array","part","streamSignal","cleanupStreamSignal","buildRequestAbortSignal","stream","chunk","reasoning_content","chunkData","estimatedTokens","Math","finalChunk","retryCount","retryInterval","maxAttempts","lastError","attempt","attemptSignal","cleanupAttemptSignal","result","JSON","wasHardTimeout","isHardTimeoutError","Promise","resolve","setTimeout","e","newError","callAIWithObjectResponse","response","assert","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","reasoningEnabled","reasoningEffort","reasoningBudget","SUPPORTED_REASONING_FAMILIES","isSupportedReasoningFamily","family","supportedReasoningFamilyNames","hasExplicitConfig","effectiveReasoningEnabled","debugMessages","config","normalizeJsonObject","obj","item","normalized","key","Object","trimmedKey","normalizedValue","cleanJsonString","jsonrepair","isUITars","jsonString","String"],"mappings":";;;;;;;;;;;;;;;;;;;AAIO,MAAMA,6BAA6BC;IAIxC,YAAYC,OAAe,EAAEC,WAAmB,EAAEC,KAAmB,CAAE;QACrE,KAAK,CAACF,UAJR,yCACA;QAIE,IAAI,CAAC,IAAI,GAAG;QACZ,IAAI,CAAC,WAAW,GAAGC;QACnB,IAAI,CAAC,KAAK,GAAGC;IACf;AACF;AA6BA,eAAeC,iBAAiB,EAC9BC,WAAW,EAGZ;IAOC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,gBAAgB,EAChBC,kBAAkB,EAClBC,WAAW,EACXC,kBAAkB,EAClBC,OAAO,EACR,GAAGX;IAEJ,IAAIY;IACJ,MAAMC,aAAaC,SAAS,WAAW;QAAE,SAAS;IAAK;IACvD,MAAMC,aAAaD,SAAS;IAC5B,MAAME,YAAYF,SAAS,iBAAiB;QAAE,SAAS;IAAK;IAI5D,MAAMG,mBAAmB,CAACC;QACxB,IAAI;YACF,MAAMC,SAAS,IAAIC,IAAIF;YACvB,IAAIC,OAAO,QAAQ,EAAE;gBAEnBA,OAAO,QAAQ,GAAG;gBAClB,OAAOA,OAAO,IAAI;YACpB;YACA,OAAOD;QACT,EAAE,OAAM;YAEN,OAAOA;QACT;IACF;IAEA,IAAIhB,WAAW;QACba,WAAW,oBAAoBE,iBAAiBf;QAChD,IAAImB,aACFL,UACE;aAEG;YAEL,MAAMM,aAAa;YACnB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;YACpCV,aAAa,IAAIW,WAAW;gBAC1B,KAAKrB;YAEP;QACF;IACF,OAAO,IAAID,YAAY;QACrBc,WAAW,qBAAqBE,iBAAiBhB;QACjD,IAAIoB,aACFL,UACE;aAGF,IAAI;YAEF,MAAMM,aAAa;YACnB,MAAM,EAAEE,eAAe,EAAE,GAAG,MAAM,MAAM,CAACF;YAEzC,MAAMG,WAAW,IAAIL,IAAInB;YAGzB,IAAI,CAACwB,SAAS,QAAQ,EACpB,MAAM,IAAI9B,MAAM;YAIlB,MAAM+B,OAAOC,OAAO,QAAQ,CAACF,SAAS,IAAI,EAAE;YAC5C,IAAI,CAACA,SAAS,IAAI,IAAIE,OAAO,KAAK,CAACD,OACjC,MAAM,IAAI/B,MAAM;YAIlB,MAAMiC,WAAWH,SAAS,QAAQ,CAAC,OAAO,CAAC,KAAK;YAChD,MAAMI,YACJD,AAAa,aAAbA,WAAwB,IAAIA,AAAa,aAAbA,WAAwB,IAAI;YAE1DhB,aAAaY,gBAAgB;gBAC3B,MAAMK;gBACN,MAAMJ,SAAS,QAAQ;gBACvBC;gBACA,GAAID,SAAS,QAAQ,GACjB;oBACE,QAAQK,mBAAmBL,SAAS,QAAQ;oBAC5C,UAAUK,mBAAmBL,SAAS,QAAQ,IAAI;gBACpD,IACA,CAAC,CAAC;YACR;YACAV,WAAW,uCAAuC;gBAChD,MAAMc;gBACN,MAAMJ,SAAS,QAAQ;gBACvB,MAAMC;YACR;QACF,EAAE,OAAOK,OAAO;YACdf,UAAU,oCAAoCe;YAC9C,MAAM,IAAIpC,MACR,CAAC,yBAAyB,EAAEM,WAAW,+GAA+G,CAAC;QAE3J;IAEJ;IAEA,MAAM+B,qBAAqBC,0BAA0B;QAAEtB;IAAQ;IAC/D,MAAMuB,gBAAgB;QACpB,SAAS9B;QACT,QAAQC;QAGR,GAAIO,aAAa;YAAE,cAAc;gBAAE,YAAYA;YAAkB;QAAE,IAAI,CAAC,CAAC;QACzE,GAAGN,iBAAiB;QAGpB,YAAY;QAGZ,GAAI0B,AAAuB,SAAvBA,qBAA8B;YAAE,SAASA;QAAmB,IAAI,CAAC,CAAC;QACtE,yBAAyB;IAC3B;IAEA,MAAMG,aAAa,IAAIC,SAAOF;IAE9B,IAAIG,SAAiBF;IAGrB,IACEE,UACAC,oBAAoB,qBAAqB,CAACC,2BAC1C;QACA,IAAIlB,aACF,MAAM,IAAI1B,MAAM;QAElBkB,WAAW;QAEX,MAAM2B,kBAAkB;QACxB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;QACpCH,SAASI,WAAWJ;IACtB;IAGA,IACEA,UACAC,oBAAoB,qBAAqB,CAACI,0BAC1C;QACA,IAAIrB,aACF,MAAM,IAAI1B,MAAM;QAElBkB,WAAW;QAEX,MAAM8B,iBAAiB;QACvB,MAAM,EAAEC,aAAa,EAAE,GAAG,MAAM,MAAM,CAACD;QACvCN,SAASO,cAAcP;IACzB;IAEA,IAAI3B,oBAAoB;QACtB,MAAMmC,gBAAgB,MAAMnC,mBAAmByB,YAAYD;QAE3D,IAAIW,eACFR,SAASQ;IAEb;IAEA,OAAO;QACL,YAAYR,OAAO,IAAI,CAAC,WAAW;QACnClC;QACAI;QACAC;QACAC;IACF;AACF;AAEO,eAAeqC,OACpBC,QAAsC,EACtC/C,WAAyB,EACzBgD,OAIC;IAOD,IAAIC,yBAAyBjD,YAAY,aAAa,GAAG;QACvD,IACE,CAACA,YAAY,WAAW,IACxBkD,2BAA2B;YACzB,kBAAkBlD,YAAY,gBAAgB;YAC9C,iBAAiBA,YAAY,eAAe;YAC5C,iBAAiBA,YAAY,eAAe;QAC9C,IAEA,MAAM,IAAIL,MACR;QAIJ,OAAOwD,yBAAyBJ,UAAU/C,aAAa;YACrD,QAAQgD,SAAS;YACjB,SAASA,SAAS;YAClB,kBAAkBhD,YAAY,gBAAgB;YAC9C,aAAagD,SAAS;QACxB;IACF;IAEA,MAAM,EACJI,UAAU,EACVjD,SAAS,EACTI,gBAAgB,EAChBC,kBAAkB,EAClBC,WAAW,EACZ,GAAG,MAAMV,iBAAiB;QACzBC;IACF;IACA,MAAMgC,qBAAqBC,0BAA0BjC;IAErD,MAAMqD,YAAYrD,YAAY,SAAS;IAEvC,MAAMsD,YAAYxC,SAAS;IAC3B,MAAMyC,WAAWzC,SAAS,WAAW;QAAE,SAAS;IAAK;IACrD,MAAM0C,oBAAoB1C,SAAS;IACnC,MAAM2C,qBAAqB3C,SAAS;IAEpC,MAAM4C,YAAYC,KAAK,GAAG;IAE1B,MAAMC,cAAe,AAAC;QACpB,IAAInD,AAAgB,YAAhBA,aAAyB,YAC3B6C,UAAU;QAGZ,OAAOtD,YAAY,WAAW,IAAI;IACpC;IAEA,MAAM6D,cAAcb,SAAS,UAAUA,SAAS;IAChD,IAAIc;IACJ,IAAIC,cAAc;IAClB,IAAIC,uBAAuB;IAC3B,IAAIlE;IACJ,IAAImE;IACJ,IAAIC;IAEJ,MAAMC,gBAAgB,CAACC,QACrB,AAAiB,YAAjB,OAAOA,SAAsBA,MAAM,IAAI,GAAG,MAAM,GAAG;IAErD,MAAMC,iBAAiB,CACrBC,WACAJ;QAEA,IAAI,CAACI,WAAW;QAEhB,MAAMC,oBACJD,WACC,uBAAuB;QAE1B,OAAO;YACL,eAAeA,UAAU,aAAa,IAAI;YAC1C,mBAAmBA,UAAU,iBAAiB,IAAI;YAClD,cAAcA,UAAU,YAAY,IAAI;YACxC,cAAcC,qBAAqB;YACnC,WAAWN,YAAY;YACvB,YAAY9D;YACZ,mBAAmBI;YACnB,MAAMP,YAAY,IAAI;YACtB,QAAQwE;YACR,YAAYN,aAAaM;QAC3B;IACF;IAEA,MAAMC,eAAe;QACnBb;QACA,QAAQ,CAAC,CAACC;QACV,GAAIpD,AAAgB,iBAAhBA,cACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAIiE,UAAUjE,cAAc;QACzBgE,aAAmD,KAAK,GAAG;QAC3DA,aAAmD,iBAAiB,GAAG;IAC1E;IAEA,MAAM,EACJ,QAAQE,qBAAqB,EAC7B,cAAcC,2BAA2B,EAC1C,GAAGC,uBAAuB;QACzB,kBAAkB7E,YAAY,gBAAgB;QAC9C,iBAAiBA,YAAY,eAAe;QAC5C,iBAAiBA,YAAY,eAAe;QAC5CS;IACF;IACA,IAAImE,6BACFtB,UAAUsB;IAGZ,MAAME,+BACJC,+BAA+B/E;IAIjC,MAAMgF,0BAAyD,AAAC;QAC9D,IAAI,CAACF,8BACH,OAAO/B;QAGT,OAAOA,SAAS,GAAG,CAAC,CAACkC;YACnB,IAAI,CAACC,MAAM,OAAO,CAACD,IAAI,OAAO,GAC5B,OAAOA;YAGT,MAAMnB,UAAUmB,IAAI,OAAO,CAAC,GAAG,CAAC,CAACE;gBAC/B,IAAIA,QAAQA,AAAc,gBAAdA,KAAK,IAAI,IAAoBA,KAAK,SAAS,EAAE,KACvD,OAAO;oBACL,GAAGA,IAAI;oBACP,WAAW;wBACT,GAAGA,KAAK,SAAS;wBACjB,QAAQ;oBACV;gBACF;gBAEF,OAAOA;YACT;YAEA,OAAO;gBACL,GAAGF,GAAG;gBACNnB;YACF;QACF;IACF;IAEA,IAAI;QACFR,UACE,CAAC,QAAQ,EAAEO,cAAc,eAAe,GAAG,WAAW,EAAE1D,WAAW;QAGrE,IAAI0D,aAAa;YACf,MAAM,EAAE,QAAQuB,YAAY,EAAE,SAASC,mBAAmB,EAAE,GAC1DC,wBAAwBtD,oBAAoBgB,SAAS;YACvD,IAAI;gBACF,MAAMuC,SAAU,MAAMnC,WAAW,MAAM,CACrC;oBACE,OAAOjD;oBACP,UAAU6E;oBACV,GAAGP,YAAY;oBACf,GAAGE,qBAAqB;oBACxB,GAAGtB,SAAS;gBACd,GACA;oBACE,QAAQ;oBACR,QAAQ+B;gBACV;gBAKFlB,YAAYqB,OAAO,WAAW;gBAE9B,WAAW,MAAMC,SAASD,OAAQ;oBAChC,MAAMzB,UAAU0B,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAO,WAAW;oBACtD,MAAMC,oBACHD,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAe,qBAAqB;oBAG3D,IAAIA,MAAM,KAAK,EACb1F,QAAQ0F,MAAM,KAAK;oBAGrB,IAAI1B,WAAW2B,mBAAmB;wBAChC1B,eAAeD;wBACfE,wBAAwByB;wBACxB,MAAMC,YAAiC;4BACrC5B;4BACA2B;4BACA1B;4BACA,YAAY;4BACZ,OAAOS;wBACT;wBACAxB,QAAQ,OAAO,CAAE0C;oBACnB;oBAGA,IAAIF,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,eAAe;wBACrCvB,WAAWN,KAAK,GAAG,KAAKD;wBAGxB,IAAI,CAAC5D,OAAO;4BAEV,MAAM6F,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAC7B,YAAY,MAAM,GAAG;4BAElCjE,QAAQ;gCACN,eAAe6F;gCACf,mBAAmBA;gCACnB,cAAcA,AAAkB,IAAlBA;4BAChB;wBACF;wBAGA,MAAME,aAAkC;4BACtC,SAAS;4BACT9B;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAOM,eAAevE,OAAOoE;wBAC/B;wBACAlB,QAAQ,OAAO,CAAE6C;wBACjB;oBACF;gBACF;YACF,SAAU;gBACRR;YACF;YACAvB,UAAUC;YACVP,kBACE,CAAC,iBAAiB,EAAErD,UAAU,QAAQ,EAAEM,eAAe,UAAU,WAAW,EAAEwD,SAAS,eAAe,EAAEL,eAAe,IAAI;QAE/H,OAAO;YAEL,MAAMkC,aAAa9F,YAAY,UAAU,IAAI;YAC7C,MAAM+F,gBAAgB/F,YAAY,aAAa,IAAI;YACnD,MAAMgG,cAAcF,aAAa;YAEjC,IAAIG;YAEJ,IAAK,IAAIC,UAAU,GAAGA,WAAWF,aAAaE,UAAW;gBACvD,MAAM,EAAE,QAAQC,aAAa,EAAE,SAASC,oBAAoB,EAAE,GAC5Dd,wBAAwBtD,oBAAoBgB,SAAS;gBACvD,IAAI;oBACF,MAAMqD,SAAS,MAAMjD,WAAW,MAAM,CACpC;wBACE,OAAOjD;wBACP,UAAU6E;wBACV,GAAGP,YAAY;wBACf,GAAGE,qBAAqB;wBACxB,GAAGtB,SAAS;oBACd,GACA;wBAAE,QAAQ8C;oBAAc;oBAG1BlC,WAAWN,KAAK,GAAG,KAAKD;oBAExBF,kBACE,CAAC,OAAO,EAAErD,UAAU,QAAQ,EAAEM,eAAe,UAAU,mBAAmB,EAAED,mBAAmB,iBAAiB,EAAE6F,OAAO,KAAK,EAAE,iBAAiB,GAAG,qBAAqB,EAAEA,OAAO,KAAK,EAAE,qBAAqB,GAAG,gBAAgB,EAAEA,OAAO,KAAK,EAAE,gBAAgB,GAAG,WAAW,EAAEpC,SAAS,aAAa,EAAEoC,OAAO,WAAW,IAAI,GAAG,eAAe,EAAEzC,eAAe,IAAI;oBAGxWH,mBACE,CAAC,oBAAoB,EAAE6C,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;oBAGvD,IAAI,CAACA,OAAO,OAAO,EACjB,MAAM,IAAI1G,MACR,CAAC,mCAAmC,EAAE2G,KAAK,SAAS,CAACD,SAAS;oBAIlEvC,UAAUuC,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;oBAC3CrC,uBACGqC,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,EAAU,qBAAqB;oBAC3DvG,QAAQuG,OAAO,KAAK;oBACpBnC,YAAYmC,OAAO,WAAW;oBAE9B,IAAI,CAAClC,cAAcL,YAAYK,cAAcH,uBAAuB;wBAClET,SAAS;wBACTO,UAAUE;oBACZ;oBAEA,IAAI,CAACG,cAAcL,UACjB,MAAM,IAAIpE,qBACR,+BACA4G,KAAK,SAAS,CAACD,SACfhC,eAAevE,OAAOoE;oBAI1B;gBACF,EAAE,OAAOnC,OAAO;oBACdkE,YAAYlE;oBACZ,MAAMwE,iBAAiBC,mBAAmBP;oBAC1C,IAAIM,gBACFhD,SACE,CAAC,0BAA0B,EAAEvB,mBAAmB,YAAY,EAAEkE,QAAQ,CAAC,EAAEF,YAAY,QAAQ,EAAE7F,UAAU,OAAO,EAAEH,YAAY,IAAI,CAAC,CAAC,CAAC;oBAIzI,IAAIgD,SAAS,aAAa,SACxB;oBAEF,IAAIkD,UAAUF,aAAa;wBACzBzC,SACE,CAAC,wBAAwB,EAAE2C,QAAQ,CAAC,EAAEF,YAAY,eAAe,EAAED,cAAc,aAAa,EAAEE,UAAU,OAAO,EAAE;wBAErH,MAAM,IAAIQ,QAAQ,CAACC,UAAYC,WAAWD,SAASX;oBACrD;gBACF,SAAU;oBACRK;gBACF;YACF;YAEA,IAAI,CAACtC,SACH,MAAMmC;QAEV;QAEA3C,UAAU,CAAC,4BAA4B,EAAEU,sBAAsB;QAC/DV,UAAU,CAAC,kBAAkB,EAAEQ,SAAS;QAGxC,IAAID,eAAe,CAAC/D,OAAO;YAEzB,MAAM6F,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAE9B,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtChE,QAAQ;gBACN,eAAe6F;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAAS7B,WAAW;YACpB,mBAAmBE,wBAAwBQ;YAC3C,OAAOH,eAAevE,OAAOoE;YAC7B,YAAY,CAAC,CAACL;QAChB;IACF,EAAE,OAAO+C,GAAQ;QACfrD,SAAS,iBAAiBqD;QAE1B,IAAIA,aAAalH,sBACf,MAAMkH;QAGR,MAAMC,WAAW,IAAIlH,MACnB,CAAC,eAAe,EAAEkE,cAAc,eAAe,GAAG,kBAAkB,EAAE1D,UAAU,GAAG,EAAEyG,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC9J;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,eAAeC,yBACpB/D,QAAsC,EACtC/C,WAAyB,EACzBgD,OAEC;IAOD,MAAM+D,WAAW,MAAMjE,OAAOC,UAAU/C,aAAa;QACnD,aAAagD,SAAS;IACxB;IACAgE,OAAOD,UAAU;IACjB,MAAMtG,cAAcT,YAAY,WAAW;IAC3C,MAAMiH,cAAcC,cAAcH,SAAS,OAAO,EAAEtG;IACpD,IAAI,AAAuB,YAAvB,OAAOwG,aACT,MAAM,IAAIvH,qBACR,CAAC,0CAA0C,EAAEM,YAAY,SAAS,CAAC,GAAG,EAAE+G,SAAS,OAAO,EAAE,EAC1FA,SAAS,OAAO,EAChBA,SAAS,KAAK;IAGlB,OAAO;QACL,SAASE;QACT,eAAeF,SAAS,OAAO;QAC/B,OAAOA,SAAS,KAAK;QACrB,mBAAmBA,SAAS,iBAAiB;IAC/C;AACF;AAEO,eAAeI,yBACpBC,IAAY,EACZpH,WAAyB,EACzBgD,OAEC;IAED,MAAM,EAAEc,OAAO,EAAEhE,KAAK,EAAE,GAAG,MAAMgD,OAAOsE,MAAMpH,aAAa;QACzD,aAAagD,SAAS;IACxB;IACA,OAAO;QAAEc;QAAShE;IAAM;AAC1B;AAEO,SAASuH,yBAAyBN,QAAgB;IACvD,IAAI;QAEF,MAAMO,YAAYP,SAAS,KAAK,CAAC;QACjC,IAAIO,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBR,SAAS,KAAK,CACnC;QAEF,IAAIQ,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBT,SAAS,KAAK,CAAC;QACrC,IAAIS,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOT;AACT;AAEO,SAASU,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEA,SAASxE,2BAA2B,EAClCyE,gBAAgB,EAChBC,eAAe,EACfC,eAAe,EAKhB;IACC,OACEF,AAAqBnD,WAArBmD,oBACA,CAAC,CAACC,mBACFC,AAAoBrD,WAApBqD;AAEJ;AAEA,MAAMC,+BAA+B;IACnC;IACA;IACA;IACA;IACA;IACA;CACD;AAID,SAASC,2BACPC,MAAgC;IAEhC,OACE,CAAC,CAACA,UACDF,6BAAyD,QAAQ,CAACE;AAEvE;AAEA,SAASC;IACP,OAAOH,6BAA6B,IAAI,CAAC;AAC3C;AAEO,SAASjD,uBAAuB,EACrC8C,gBAAgB,EAChBC,eAAe,EACfC,eAAe,EACfpH,WAAW,EAMZ;IAIC,MAAMyH,oBAAoBhF,2BAA2B;QACnDyE;QACAC;QACAC;IACF;IAEA,IAAIK,mBAAmB;QACrB,IAAI,CAACzH,aACH,MAAM,IAAId,MACR,CAAC,yGAAyG,EAAEsI,gCAAgC,iHAAiH,CAAC;QAMlQ,IAAI,CAACF,2BAA2BtH,cAC9B,MAAM,IAAId,MACR,CAAC,oDAAoD,EAAEc,YAAY,kCAAkC,EAAEwH,gCAAgC,iHAAiH,CAAC;IAG/P,OAAO,IAAI,CAACF,2BAA2BtH,cACrC,OAAO;QAAE,QAAQ,CAAC;IAAE;IAGtB,MAAM0H,4BAA4BR,oBAAoB;IAEtD,MAAMS,gBAA0B,EAAE;IAClC,MAAMC,SAAkC,CAAC;IAEzC,IACE5H,AAAgB,eAAhBA,eACAA,AAAgB,cAAhBA,eACAA,AAAgB,cAAhBA,aACA;QAEA4H,OAAO,eAAe,GAAGF;QACzBC,cAAc,IAAI,CAAC,CAAC,gBAAgB,EAAED,2BAA2B;QAEjE,IAAIN,AAAoBrD,WAApBqD,iBAA+B;YACjCQ,OAAO,eAAe,GAAGR;YACzBO,cAAc,IAAI,CAAC,CAAC,gBAAgB,EAAEP,iBAAiB;QACzD;IAEF,OAAO,IAAIpH,AAAgB,oBAAhBA,eAAmCA,AAAgB,kBAAhBA,aAA+B;QAE3E4H,OAAO,QAAQ,GAAG;YAChB,MAAMF,4BAA4B,YAAY;QAChD;QACAC,cAAc,IAAI,CAChB,CAAC,cAAc,EAAED,4BAA4B,YAAY,YAAY;QAGvE,IAAIP,iBAAiB;YACnBS,OAAO,gBAAgB,GAAGT;YAC1BQ,cAAc,IAAI,CAAC,CAAC,kBAAkB,EAAER,gBAAgB,CAAC,CAAC;QAC5D;IAEF,OAAO,IAAInH,AAAgB,YAAhBA,aAAyB;QAElC4H,OAAO,QAAQ,GAAG;YAChB,MAAMF,4BAA4B,YAAY;QAChD;QACAC,cAAc,IAAI,CAChB,CAAC,cAAc,EAAED,4BAA4B,YAAY,YAAY;IAGzE;IAEA,OAAO;QACLE;QACA,cAAcD,cAAc,MAAM,GAC9B,CAAC,qBAAqB,EAAE3H,YAAY,EAAE,EAAE2H,cAAc,IAAI,CAAC,OAAO,GAClE5D;IACN;AACF;AAQA,SAAS8D,oBAAoBC,GAAQ;IAEnC,IAAIA,QAAAA,KACF,OAAOA;IAIT,IAAIrD,MAAM,OAAO,CAACqD,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACC,OAASF,oBAAoBE;IAI/C,IAAI,AAAe,YAAf,OAAOD,KAAkB;QAC3B,MAAME,aAAkB,CAAC;QAEzB,KAAK,MAAM,CAACC,KAAKtE,MAAM,IAAIuE,OAAO,OAAO,CAACJ,KAAM;YAE9C,MAAMK,aAAaF,IAAI,IAAI;YAG3B,IAAIG,kBAAkBP,oBAAoBlE;YAG1C,IAAI,AAA2B,YAA3B,OAAOyE,iBACTA,kBAAkBA,gBAAgB,IAAI;YAGxCJ,UAAU,CAACG,WAAW,GAAGC;QAC3B;QAEA,OAAOJ;IACT;IAGA,IAAI,AAAe,YAAf,OAAOF,KACT,OAAOA,IAAI,IAAI;IAIjB,OAAOA;AACT;AAEO,SAASrB,cACdQ,KAAa,EACbjH,WAAqC;IAErC,MAAMqI,kBAAkBzB,yBAAyBK;IAEjD,IAAIoB,iBAAiB,MAAM,oBACzB,OAAOA,gBACJ,KAAK,CAAC,oBACL,MAAM,GACP,IAAInH;IAGT,IAAIR;IACJ,IAAI8E;IACJ,IAAI;QACF9E,SAASmF,KAAK,KAAK,CAACwC;QACpB,OAAOR,oBAAoBnH;IAC7B,EAAE,OAAOY,OAAO;QACdkE,YAAYlE;IACd;IACA,IAAI;QACFZ,SAASmF,KAAK,KAAK,CAACyC,WAAWD;QAC/B,OAAOR,oBAAoBnH;IAC7B,EAAE,OAAOY,OAAO;QACdkE,YAAYlE;IACd;IAEA,IACEtB,AAAgB,oBAAhBA,eACAA,AAAgB,kBAAhBA,eACAuI,SAASvI,cACT;QACA,MAAMwI,aAAaxB,yBAAyBqB;QAC5C,IAAI;YACF3H,SAASmF,KAAK,KAAK,CAACyC,WAAWE;YAC/B,OAAOX,oBAAoBnH;QAC7B,EAAE,OAAOY,OAAO;YACdkE,YAAYlE;QACd;IACF;IACA,MAAMpC,MACJ,CAAC,gDAAgD,EAAEuJ,OACjDjD,aAAa,iBACb,gBAAgB,EAAEyB,OAAO;AAE/B"}
|
|
1
|
+
{"version":3,"file":"ai-model/service-caller/index.mjs","sources":["../../../../src/ai-model/service-caller/index.ts"],"sourcesContent":["import type { AIUsageInfo } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\n\n// Error class that preserves usage and rawResponse when AI call parsing fails\nexport class AIResponseParseError extends Error {\n usage?: AIUsageInfo;\n rawResponse: string;\n\n constructor(message: string, rawResponse: string, usage?: AIUsageInfo) {\n super(message);\n this.name = 'AIResponseParseError';\n this.rawResponse = rawResponse;\n this.usage = usage;\n }\n}\nimport {\n type IModelConfig,\n MIDSCENE_LANGFUSE_DEBUG,\n MIDSCENE_LANGSMITH_DEBUG,\n type TModelFamily,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert, ifInBrowser } from '@midscene/shared/utils';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport type { AIArgs } from '../../common';\nimport { isAutoGLM, isUITars } from '../auto-glm/util';\nimport {\n callAIWithCodexAppServer,\n isCodexAppServerProvider,\n} from './codex-app-server';\nimport { shouldForceOriginalImageDetail } from './image-detail';\nimport {\n buildRequestAbortSignal,\n isHardTimeoutError,\n resolveEffectiveTimeoutMs,\n} from './request-timeout';\n\nasync function createChatClient({\n modelConfig,\n}: {\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n modelName: string;\n modelDescription: string;\n uiTarsModelVersion?: UITarsModelVersion;\n modelFamily: TModelFamily | undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n modelDescription,\n uiTarsModelVersion,\n modelFamily,\n createOpenAIClient,\n timeout,\n } = modelConfig;\n\n let proxyAgent: any = undefined;\n const warnClient = getDebug('ai:call', { console: true });\n const debugProxy = getDebug('ai:call:proxy');\n const warnProxy = getDebug('ai:call:proxy', { console: true });\n\n // Helper function to sanitize proxy URL for logging (remove credentials)\n // Uses URL API instead of regex to avoid ReDoS vulnerabilities\n const sanitizeProxyUrl = (url: string): string => {\n try {\n const parsed = new URL(url);\n if (parsed.username) {\n // Keep username for debugging, hide password for security\n parsed.password = '****';\n return parsed.href;\n }\n return url;\n } catch {\n // If URL parsing fails, return original URL (will be caught later)\n return url;\n }\n };\n\n if (httpProxy) {\n debugProxy('using http proxy', sanitizeProxyUrl(httpProxy));\n if (ifInBrowser) {\n warnProxy(\n 'HTTP proxy is configured but not supported in browser environment',\n );\n } else {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'undici';\n const { ProxyAgent } = await import(moduleName);\n proxyAgent = new ProxyAgent({\n uri: httpProxy,\n // Note: authentication is handled via the URI (e.g., http://user:pass@proxy.com:8080)\n });\n }\n } else if (socksProxy) {\n debugProxy('using socks proxy', sanitizeProxyUrl(socksProxy));\n if (ifInBrowser) {\n warnProxy(\n 'SOCKS proxy is configured but not supported in browser environment',\n );\n } else {\n try {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'fetch-socks';\n const { socksDispatcher } = await import(moduleName);\n // Parse SOCKS proxy URL (e.g., socks5://127.0.0.1:1080)\n const proxyUrl = new URL(socksProxy);\n\n // Validate hostname\n if (!proxyUrl.hostname) {\n throw new Error('SOCKS proxy URL must include a valid hostname');\n }\n\n // Validate and parse port\n const port = Number.parseInt(proxyUrl.port, 10);\n if (!proxyUrl.port || Number.isNaN(port)) {\n throw new Error('SOCKS proxy URL must include a valid port');\n }\n\n // Parse SOCKS version from protocol\n const protocol = proxyUrl.protocol.replace(':', '');\n const socksType =\n protocol === 'socks4' ? 4 : protocol === 'socks5' ? 5 : 5;\n\n proxyAgent = socksDispatcher({\n type: socksType,\n host: proxyUrl.hostname,\n port,\n ...(proxyUrl.username\n ? {\n userId: decodeURIComponent(proxyUrl.username),\n password: decodeURIComponent(proxyUrl.password || ''),\n }\n : {}),\n });\n debugProxy('socks proxy configured successfully', {\n type: socksType,\n host: proxyUrl.hostname,\n port: port,\n });\n } catch (error) {\n warnProxy('Failed to configure SOCKS proxy:', error);\n throw new Error(\n `Invalid SOCKS proxy URL: ${socksProxy}. Expected format: socks4://host:port, socks5://host:port, or with authentication: socks5://user:pass@host:port`,\n );\n }\n }\n }\n\n const effectiveTimeoutMs = resolveEffectiveTimeoutMs({ timeout });\n const openAIOptions = {\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n // Use fetchOptions.dispatcher for fetch-based SDK instead of httpAgent\n // Note: Type assertion needed due to undici version mismatch between dependencies\n ...(proxyAgent ? { fetchOptions: { dispatcher: proxyAgent as any } } : {}),\n ...openaiExtraConfig,\n // Midscene already handles retries in callAI(), so disable SDK-level retries\n // to avoid duplicate attempts and duplicated backoff latency.\n maxRetries: 0,\n // When disabled (timeoutMs === null) fall through to the SDK default so\n // only the caller-provided abortSignal can cancel the request.\n ...(effectiveTimeoutMs !== null ? { timeout: effectiveTimeoutMs } : {}),\n dangerouslyAllowBrowser: true,\n };\n\n const baseOpenAI = new OpenAI(openAIOptions);\n\n let openai: OpenAI = baseOpenAI;\n\n // LangSmith wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n warnClient('DEBUGGING MODE: langsmith wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langsmithModule = 'langsmith/wrappers';\n const { wrapOpenAI } = await import(langsmithModule);\n openai = wrapOpenAI(openai);\n }\n\n // Langfuse wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGFUSE_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langfuse is not supported in browser');\n }\n warnClient('DEBUGGING MODE: langfuse wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langfuseModule = '@langfuse/openai';\n const { observeOpenAI } = await import(langfuseModule);\n openai = observeOpenAI(openai);\n }\n\n if (createOpenAIClient) {\n const wrappedClient = await createOpenAIClient(baseOpenAI, openAIOptions);\n\n if (wrappedClient) {\n openai = wrappedClient as OpenAI;\n }\n }\n\n return {\n completion: openai.chat.completions,\n modelName,\n modelDescription,\n uiTarsModelVersion,\n modelFamily,\n };\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n abortSignal?: AbortSignal;\n forceOriginalImageDetail?: boolean;\n },\n): Promise<{\n content: string;\n reasoning_content?: string;\n usage?: AIUsageInfo;\n isStreamed: boolean;\n}> {\n if (isCodexAppServerProvider(modelConfig.openaiBaseURL)) {\n if (\n !modelConfig.modelFamily &&\n hasExplicitReasoningConfig({\n reasoningEnabled: modelConfig.reasoningEnabled,\n reasoningEffort: modelConfig.reasoningEffort,\n reasoningBudget: modelConfig.reasoningBudget,\n })\n ) {\n throw new Error(\n 'Reasoning config requires MIDSCENE_MODEL_FAMILY. Set MIDSCENE_MODEL_FAMILY when using MIDSCENE_MODEL_REASONING_ENABLED / MIDSCENE_MODEL_REASONING_EFFORT / MIDSCENE_MODEL_REASONING_BUDGET.',\n );\n }\n\n return callAIWithCodexAppServer(messages, modelConfig, {\n stream: options?.stream,\n onChunk: options?.onChunk,\n reasoningEnabled: modelConfig.reasoningEnabled,\n abortSignal: options?.abortSignal,\n });\n }\n\n const {\n completion,\n modelName,\n modelDescription,\n uiTarsModelVersion,\n modelFamily,\n } = await createChatClient({\n modelConfig,\n });\n const effectiveTimeoutMs = resolveEffectiveTimeoutMs(modelConfig);\n\n const extraBody = modelConfig.extraBody;\n\n const debugCall = getDebug('ai:call');\n const warnCall = getDebug('ai:call', { console: true });\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n\n const temperature = (() => {\n if (modelFamily === 'gpt-5') {\n debugCall('temperature is ignored for gpt-5');\n return undefined;\n }\n return modelConfig.temperature ?? 0;\n })();\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let accumulatedReasoning = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n let requestId: string | null | undefined;\n\n const hasUsableText = (value: string | null | undefined): value is string =>\n typeof value === 'string' && value.trim().length > 0;\n\n const buildUsageInfo = (\n usageData?: OpenAI.CompletionUsage,\n requestId?: string | null,\n ) => {\n if (!usageData) return undefined;\n\n const cachedInputTokens = (\n usageData as { prompt_tokens_details?: { cached_tokens?: number } }\n )?.prompt_tokens_details?.cached_tokens;\n\n return {\n prompt_tokens: usageData.prompt_tokens ?? 0,\n completion_tokens: usageData.completion_tokens ?? 0,\n total_tokens: usageData.total_tokens ?? 0,\n cached_input: cachedInputTokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n slot: modelConfig.slot,\n intent: undefined,\n request_id: requestId ?? undefined,\n } satisfies AIUsageInfo;\n };\n\n const commonConfig = {\n temperature,\n stream: !!isStreaming,\n ...(modelFamily === 'qwen2.5-vl' // qwen vl v2 specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n if (isAutoGLM(modelFamily)) {\n (commonConfig as unknown as Record<string, number>).top_p = 0.85;\n (commonConfig as unknown as Record<string, number>).frequency_penalty = 0.2;\n }\n\n const {\n config: reasoningEffortConfig,\n debugMessage: reasoningEffortDebugMessage,\n } = resolveReasoningConfig({\n reasoningEnabled: modelConfig.reasoningEnabled,\n reasoningEffort: modelConfig.reasoningEffort,\n reasoningBudget: modelConfig.reasoningBudget,\n modelFamily,\n });\n if (reasoningEffortDebugMessage) {\n debugCall(reasoningEffortDebugMessage);\n }\n\n const shouldUseOriginalImageDetail =\n options?.forceOriginalImageDetail ||\n shouldForceOriginalImageDetail(modelConfig);\n\n // For default-intent GPT-5 calls, request original image detail to preserve\n // screenshot resolution for localization-sensitive tasks.\n const messagesWithImageDetail: ChatCompletionMessageParam[] = (() => {\n if (!shouldUseOriginalImageDetail) {\n return messages;\n }\n\n return messages.map((msg) => {\n if (!Array.isArray(msg.content)) {\n return msg;\n }\n\n const content = msg.content.map((part) => {\n if (part && part.type === 'image_url' && part.image_url?.url) {\n return {\n ...part,\n image_url: {\n ...part.image_url,\n detail: 'original',\n },\n };\n }\n return part;\n });\n\n return {\n ...msg,\n content,\n } as ChatCompletionMessageParam;\n });\n })();\n\n try {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const { signal: streamSignal, cleanup: cleanupStreamSignal } =\n buildRequestAbortSignal(effectiveTimeoutMs, options?.abortSignal);\n try {\n const stream = (await completion.create(\n {\n model: modelName,\n messages: messagesWithImageDetail,\n ...commonConfig,\n ...reasoningEffortConfig,\n ...extraBody,\n },\n {\n stream: true,\n signal: streamSignal,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n requestId = stream._request_id;\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n accumulatedReasoning += reasoning_content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: buildUsageInfo(usage, requestId),\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n } finally {\n cleanupStreamSignal();\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${modelFamily || 'default'}, cost-ms, ${timeCost}, temperature, ${temperature ?? ''}`,\n );\n } else {\n // Non-streaming with retry logic\n const retryCount = modelConfig.retryCount ?? 1;\n const retryInterval = modelConfig.retryInterval ?? 2000;\n const maxAttempts = retryCount + 1; // retryCount=1 means 2 total attempts (1 initial + 1 retry)\n\n let lastError: Error | undefined;\n\n for (let attempt = 1; attempt <= maxAttempts; attempt++) {\n const { signal: attemptSignal, cleanup: cleanupAttemptSignal } =\n buildRequestAbortSignal(effectiveTimeoutMs, options?.abortSignal);\n try {\n const result = await completion.create(\n {\n model: modelName,\n messages: messagesWithImageDetail,\n ...commonConfig,\n ...reasoningEffortConfig,\n ...extraBody,\n } as any,\n { signal: attemptSignal },\n );\n\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${modelFamily || 'default'}, ui-tars-version, ${uiTarsModelVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}, temperature, ${temperature ?? ''}`,\n );\n\n debugProfileDetail(\n `model usage detail: ${JSON.stringify(result.usage)}`,\n );\n\n if (!result.choices) {\n throw new Error(\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n }\n\n content = result.choices[0].message.content!;\n accumulatedReasoning =\n (result.choices[0].message as any)?.reasoning_content || '';\n usage = result.usage;\n requestId = result._request_id;\n\n if (!hasUsableText(content) && hasUsableText(accumulatedReasoning)) {\n warnCall('empty content from AI model, using reasoning content');\n content = accumulatedReasoning;\n }\n\n if (!hasUsableText(content)) {\n throw new AIResponseParseError(\n 'empty content from AI model',\n JSON.stringify(result),\n buildUsageInfo(usage, requestId),\n );\n }\n\n break; // Success, exit retry loop\n } catch (error) {\n lastError = error as Error;\n const wasHardTimeout = isHardTimeoutError(lastError);\n if (wasHardTimeout) {\n warnCall(\n `AI call hit hard timeout (${effectiveTimeoutMs}ms, attempt ${attempt}/${maxAttempts}, model ${modelName}, slot ${modelConfig.slot})`,\n );\n }\n // Do not retry if the request was aborted by the caller\n if (options?.abortSignal?.aborted) {\n break;\n }\n if (attempt < maxAttempts) {\n warnCall(\n `AI call failed (attempt ${attempt}/${maxAttempts}), retrying in ${retryInterval}ms... Error: ${lastError.message}`,\n );\n await new Promise((resolve) => setTimeout(resolve, retryInterval));\n }\n } finally {\n cleanupAttemptSignal();\n }\n }\n\n if (!content) {\n throw lastError;\n }\n }\n\n debugCall(`response reasoning content: ${accumulatedReasoning}`);\n debugCall(`response content: ${content}`);\n\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n } as OpenAI.CompletionUsage;\n }\n\n return {\n content: content || '',\n reasoning_content: accumulatedReasoning || undefined,\n usage: buildUsageInfo(usage, requestId),\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n warnCall('call AI error', e);\n\n if (e instanceof AIResponseParseError) {\n throw e;\n }\n\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service (${modelName}): ${e.message}\\nTrouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport async function callAIWithObjectResponse<T>(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n abortSignal?: AbortSignal;\n },\n): Promise<{\n content: T;\n contentString: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const response = await callAI(messages, modelConfig, {\n abortSignal: options?.abortSignal,\n });\n assert(response, 'empty response');\n const modelFamily = modelConfig.modelFamily;\n const jsonContent = safeParseJson(response.content, modelFamily);\n if (typeof jsonContent !== 'object') {\n throw new AIResponseParseError(\n `failed to parse json response from model (${modelConfig.modelName}): ${response.content}`,\n response.content,\n response.usage,\n );\n }\n return {\n content: jsonContent,\n contentString: response.content,\n usage: response.usage,\n reasoning_content: response.reasoning_content,\n };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n modelConfig: IModelConfig,\n options?: {\n abortSignal?: AbortSignal;\n },\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, modelConfig, {\n abortSignal: options?.abortSignal,\n });\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nfunction hasExplicitReasoningConfig({\n reasoningEnabled,\n reasoningEffort,\n reasoningBudget,\n}: {\n reasoningEnabled?: boolean;\n reasoningEffort?: string;\n reasoningBudget?: number;\n}): boolean {\n return (\n reasoningEnabled !== undefined ||\n !!reasoningEffort ||\n reasoningBudget !== undefined\n );\n}\n\nconst SUPPORTED_REASONING_FAMILIES = [\n 'qwen3-vl',\n 'qwen3.5',\n 'qwen3.6',\n 'doubao-vision',\n 'doubao-seed',\n 'glm-v',\n] as const satisfies readonly TModelFamily[];\n\ntype SupportedReasoningFamily = (typeof SUPPORTED_REASONING_FAMILIES)[number];\n\nfunction isSupportedReasoningFamily(\n family: TModelFamily | undefined,\n): family is SupportedReasoningFamily {\n return (\n !!family &&\n (SUPPORTED_REASONING_FAMILIES as readonly TModelFamily[]).includes(family)\n );\n}\n\nfunction supportedReasoningFamilyNames(): string {\n return SUPPORTED_REASONING_FAMILIES.join(', ');\n}\n\nexport function resolveReasoningConfig({\n reasoningEnabled,\n reasoningEffort,\n reasoningBudget,\n modelFamily,\n}: {\n reasoningEnabled?: boolean;\n reasoningEffort?: string;\n reasoningBudget?: number;\n modelFamily?: TModelFamily;\n}): {\n config: Record<string, unknown>;\n debugMessage?: string;\n} {\n const hasExplicitConfig = hasExplicitReasoningConfig({\n reasoningEnabled,\n reasoningEffort,\n reasoningBudget,\n });\n\n if (hasExplicitConfig) {\n if (!modelFamily) {\n throw new Error(\n `Reasoning config requires MIDSCENE_MODEL_FAMILY. Set MIDSCENE_MODEL_FAMILY to a supported family such as ${supportedReasoningFamilyNames()}, or remove MIDSCENE_MODEL_REASONING_ENABLED / MIDSCENE_MODEL_REASONING_EFFORT / MIDSCENE_MODEL_REASONING_BUDGET.`,\n );\n }\n\n // GPT-5 over Chat Completions is intentionally unsupported here because\n // its reasoning effort compatibility varies by model version.\n if (!isSupportedReasoningFamily(modelFamily)) {\n throw new Error(\n `Reasoning config is not supported for model family \"${modelFamily}\". Use a supported family such as ${supportedReasoningFamilyNames()}, or remove MIDSCENE_MODEL_REASONING_ENABLED / MIDSCENE_MODEL_REASONING_EFFORT / MIDSCENE_MODEL_REASONING_BUDGET.`,\n );\n }\n } else if (!isSupportedReasoningFamily(modelFamily)) {\n return { config: {} };\n }\n\n const effectiveReasoningEnabled = reasoningEnabled ?? false;\n\n const debugMessages: string[] = [];\n const config: Record<string, unknown> = {};\n\n if (\n modelFamily === 'qwen3-vl' ||\n modelFamily === 'qwen3.5' ||\n modelFamily === 'qwen3.6'\n ) {\n // reasoningEnabled → enable_thinking\n config.enable_thinking = effectiveReasoningEnabled;\n debugMessages.push(`enable_thinking=${effectiveReasoningEnabled}`);\n // reasoningBudget → thinking_budget\n if (reasoningBudget !== undefined) {\n config.thinking_budget = reasoningBudget;\n debugMessages.push(`thinking_budget=${reasoningBudget}`);\n }\n // reasoningEffort is ignored for qwen\n } else if (modelFamily === 'doubao-vision' || modelFamily === 'doubao-seed') {\n // reasoningEnabled → thinking.type\n config.thinking = {\n type: effectiveReasoningEnabled ? 'enabled' : 'disabled',\n };\n debugMessages.push(\n `thinking.type=${effectiveReasoningEnabled ? 'enabled' : 'disabled'}`,\n );\n // reasoningEffort → reasoning_effort\n if (reasoningEffort) {\n config.reasoning_effort = reasoningEffort;\n debugMessages.push(`reasoning_effort=\"${reasoningEffort}\"`);\n }\n // reasoningBudget is ignored for doubao\n } else if (modelFamily === 'glm-v') {\n // reasoningEnabled → thinking.type\n config.thinking = {\n type: effectiveReasoningEnabled ? 'enabled' : 'disabled',\n };\n debugMessages.push(\n `thinking.type=${effectiveReasoningEnabled ? 'enabled' : 'disabled'}`,\n );\n // reasoningEffort and reasoningBudget are ignored for glm-v\n }\n\n return {\n config,\n debugMessage: debugMessages.length\n ? `reasoning config for ${modelFamily}: ${debugMessages.join(', ')}`\n : undefined,\n };\n}\n\n/**\n * Normalize a parsed JSON object by trimming whitespace from:\n * 1. All object keys (e.g., \" prompt \" -> \"prompt\")\n * 2. All string values (e.g., \" Tap \" -> \"Tap\")\n * This handles LLM output that may include leading/trailing spaces.\n */\nfunction normalizeJsonObject(obj: any): any {\n // Handle null and undefined\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n // Handle arrays - recursively normalize each element\n if (Array.isArray(obj)) {\n return obj.map((item) => normalizeJsonObject(item));\n }\n\n // Handle objects\n if (typeof obj === 'object') {\n const normalized: any = {};\n\n for (const [key, value] of Object.entries(obj)) {\n // Trim the key to remove leading/trailing spaces\n const trimmedKey = key.trim();\n\n // Recursively normalize the value\n let normalizedValue = normalizeJsonObject(value);\n\n // Trim all string values\n if (typeof normalizedValue === 'string') {\n normalizedValue = normalizedValue.trim();\n }\n\n normalized[trimmedKey] = normalizedValue;\n }\n\n return normalized;\n }\n\n // Handle primitive strings\n if (typeof obj === 'string') {\n return obj.trim();\n }\n\n // Return other primitives as-is\n return obj;\n}\n\nexport function safeParseJson(\n input: string,\n modelFamily: TModelFamily | undefined,\n) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n\n let parsed: any;\n let lastError: unknown;\n try {\n parsed = JSON.parse(cleanJsonString);\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n try {\n parsed = JSON.parse(jsonrepair(cleanJsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n\n if (\n modelFamily === 'doubao-vision' ||\n modelFamily === 'doubao-seed' ||\n isUITars(modelFamily)\n ) {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n try {\n parsed = JSON.parse(jsonrepair(jsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n }\n throw Error(\n `failed to parse LLM response into JSON. Error - ${String(\n lastError ?? 'unknown error',\n )}. Response - \\n ${input}`,\n );\n}\n"],"names":["AIResponseParseError","Error","message","rawResponse","usage","createChatClient","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","modelDescription","uiTarsModelVersion","modelFamily","createOpenAIClient","timeout","proxyAgent","warnClient","getDebug","debugProxy","warnProxy","sanitizeProxyUrl","url","parsed","URL","ifInBrowser","moduleName","ProxyAgent","socksDispatcher","proxyUrl","port","Number","protocol","socksType","decodeURIComponent","error","effectiveTimeoutMs","resolveEffectiveTimeoutMs","openAIOptions","baseOpenAI","OpenAI","openai","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","langsmithModule","wrapOpenAI","MIDSCENE_LANGFUSE_DEBUG","langfuseModule","observeOpenAI","wrappedClient","callAI","messages","options","isCodexAppServerProvider","hasExplicitReasoningConfig","callAIWithCodexAppServer","completion","extraBody","debugCall","warnCall","debugProfileStats","debugProfileDetail","startTime","Date","temperature","isStreaming","content","accumulated","accumulatedReasoning","timeCost","requestId","hasUsableText","value","buildUsageInfo","usageData","cachedInputTokens","undefined","commonConfig","isAutoGLM","reasoningEffortConfig","reasoningEffortDebugMessage","resolveReasoningConfig","shouldUseOriginalImageDetail","shouldForceOriginalImageDetail","messagesWithImageDetail","msg","Array","part","streamSignal","cleanupStreamSignal","buildRequestAbortSignal","stream","chunk","reasoning_content","chunkData","estimatedTokens","Math","finalChunk","retryCount","retryInterval","maxAttempts","lastError","attempt","attemptSignal","cleanupAttemptSignal","result","JSON","wasHardTimeout","isHardTimeoutError","Promise","resolve","setTimeout","e","newError","callAIWithObjectResponse","response","assert","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","reasoningEnabled","reasoningEffort","reasoningBudget","SUPPORTED_REASONING_FAMILIES","isSupportedReasoningFamily","family","supportedReasoningFamilyNames","hasExplicitConfig","effectiveReasoningEnabled","debugMessages","config","normalizeJsonObject","obj","item","normalized","key","Object","trimmedKey","normalizedValue","cleanJsonString","jsonrepair","isUITars","jsonString","String"],"mappings":";;;;;;;;;;;;;;;;;;;AAIO,MAAMA,6BAA6BC;IAIxC,YAAYC,OAAe,EAAEC,WAAmB,EAAEC,KAAmB,CAAE;QACrE,KAAK,CAACF,UAJR,yCACA;QAIE,IAAI,CAAC,IAAI,GAAG;QACZ,IAAI,CAAC,WAAW,GAAGC;QACnB,IAAI,CAAC,KAAK,GAAGC;IACf;AACF;AA6BA,eAAeC,iBAAiB,EAC9BC,WAAW,EAGZ;IAOC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,gBAAgB,EAChBC,kBAAkB,EAClBC,WAAW,EACXC,kBAAkB,EAClBC,OAAO,EACR,GAAGX;IAEJ,IAAIY;IACJ,MAAMC,aAAaC,SAAS,WAAW;QAAE,SAAS;IAAK;IACvD,MAAMC,aAAaD,SAAS;IAC5B,MAAME,YAAYF,SAAS,iBAAiB;QAAE,SAAS;IAAK;IAI5D,MAAMG,mBAAmB,CAACC;QACxB,IAAI;YACF,MAAMC,SAAS,IAAIC,IAAIF;YACvB,IAAIC,OAAO,QAAQ,EAAE;gBAEnBA,OAAO,QAAQ,GAAG;gBAClB,OAAOA,OAAO,IAAI;YACpB;YACA,OAAOD;QACT,EAAE,OAAM;YAEN,OAAOA;QACT;IACF;IAEA,IAAIhB,WAAW;QACba,WAAW,oBAAoBE,iBAAiBf;QAChD,IAAImB,aACFL,UACE;aAEG;YAEL,MAAMM,aAAa;YACnB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;YACpCV,aAAa,IAAIW,WAAW;gBAC1B,KAAKrB;YAEP;QACF;IACF,OAAO,IAAID,YAAY;QACrBc,WAAW,qBAAqBE,iBAAiBhB;QACjD,IAAIoB,aACFL,UACE;aAGF,IAAI;YAEF,MAAMM,aAAa;YACnB,MAAM,EAAEE,eAAe,EAAE,GAAG,MAAM,MAAM,CAACF;YAEzC,MAAMG,WAAW,IAAIL,IAAInB;YAGzB,IAAI,CAACwB,SAAS,QAAQ,EACpB,MAAM,IAAI9B,MAAM;YAIlB,MAAM+B,OAAOC,OAAO,QAAQ,CAACF,SAAS,IAAI,EAAE;YAC5C,IAAI,CAACA,SAAS,IAAI,IAAIE,OAAO,KAAK,CAACD,OACjC,MAAM,IAAI/B,MAAM;YAIlB,MAAMiC,WAAWH,SAAS,QAAQ,CAAC,OAAO,CAAC,KAAK;YAChD,MAAMI,YACJD,AAAa,aAAbA,WAAwB,IAAIA,AAAa,aAAbA,WAAwB,IAAI;YAE1DhB,aAAaY,gBAAgB;gBAC3B,MAAMK;gBACN,MAAMJ,SAAS,QAAQ;gBACvBC;gBACA,GAAID,SAAS,QAAQ,GACjB;oBACE,QAAQK,mBAAmBL,SAAS,QAAQ;oBAC5C,UAAUK,mBAAmBL,SAAS,QAAQ,IAAI;gBACpD,IACA,CAAC,CAAC;YACR;YACAV,WAAW,uCAAuC;gBAChD,MAAMc;gBACN,MAAMJ,SAAS,QAAQ;gBACvB,MAAMC;YACR;QACF,EAAE,OAAOK,OAAO;YACdf,UAAU,oCAAoCe;YAC9C,MAAM,IAAIpC,MACR,CAAC,yBAAyB,EAAEM,WAAW,+GAA+G,CAAC;QAE3J;IAEJ;IAEA,MAAM+B,qBAAqBC,0BAA0B;QAAEtB;IAAQ;IAC/D,MAAMuB,gBAAgB;QACpB,SAAS9B;QACT,QAAQC;QAGR,GAAIO,aAAa;YAAE,cAAc;gBAAE,YAAYA;YAAkB;QAAE,IAAI,CAAC,CAAC;QACzE,GAAGN,iBAAiB;QAGpB,YAAY;QAGZ,GAAI0B,AAAuB,SAAvBA,qBAA8B;YAAE,SAASA;QAAmB,IAAI,CAAC,CAAC;QACtE,yBAAyB;IAC3B;IAEA,MAAMG,aAAa,IAAIC,SAAOF;IAE9B,IAAIG,SAAiBF;IAGrB,IACEE,UACAC,oBAAoB,qBAAqB,CAACC,2BAC1C;QACA,IAAIlB,aACF,MAAM,IAAI1B,MAAM;QAElBkB,WAAW;QAEX,MAAM2B,kBAAkB;QACxB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;QACpCH,SAASI,WAAWJ;IACtB;IAGA,IACEA,UACAC,oBAAoB,qBAAqB,CAACI,0BAC1C;QACA,IAAIrB,aACF,MAAM,IAAI1B,MAAM;QAElBkB,WAAW;QAEX,MAAM8B,iBAAiB;QACvB,MAAM,EAAEC,aAAa,EAAE,GAAG,MAAM,MAAM,CAACD;QACvCN,SAASO,cAAcP;IACzB;IAEA,IAAI3B,oBAAoB;QACtB,MAAMmC,gBAAgB,MAAMnC,mBAAmByB,YAAYD;QAE3D,IAAIW,eACFR,SAASQ;IAEb;IAEA,OAAO;QACL,YAAYR,OAAO,IAAI,CAAC,WAAW;QACnClC;QACAI;QACAC;QACAC;IACF;AACF;AAEO,eAAeqC,OACpBC,QAAsC,EACtC/C,WAAyB,EACzBgD,OAKC;IAOD,IAAIC,yBAAyBjD,YAAY,aAAa,GAAG;QACvD,IACE,CAACA,YAAY,WAAW,IACxBkD,2BAA2B;YACzB,kBAAkBlD,YAAY,gBAAgB;YAC9C,iBAAiBA,YAAY,eAAe;YAC5C,iBAAiBA,YAAY,eAAe;QAC9C,IAEA,MAAM,IAAIL,MACR;QAIJ,OAAOwD,yBAAyBJ,UAAU/C,aAAa;YACrD,QAAQgD,SAAS;YACjB,SAASA,SAAS;YAClB,kBAAkBhD,YAAY,gBAAgB;YAC9C,aAAagD,SAAS;QACxB;IACF;IAEA,MAAM,EACJI,UAAU,EACVjD,SAAS,EACTI,gBAAgB,EAChBC,kBAAkB,EAClBC,WAAW,EACZ,GAAG,MAAMV,iBAAiB;QACzBC;IACF;IACA,MAAMgC,qBAAqBC,0BAA0BjC;IAErD,MAAMqD,YAAYrD,YAAY,SAAS;IAEvC,MAAMsD,YAAYxC,SAAS;IAC3B,MAAMyC,WAAWzC,SAAS,WAAW;QAAE,SAAS;IAAK;IACrD,MAAM0C,oBAAoB1C,SAAS;IACnC,MAAM2C,qBAAqB3C,SAAS;IAEpC,MAAM4C,YAAYC,KAAK,GAAG;IAE1B,MAAMC,cAAe,AAAC;QACpB,IAAInD,AAAgB,YAAhBA,aAAyB,YAC3B6C,UAAU;QAGZ,OAAOtD,YAAY,WAAW,IAAI;IACpC;IAEA,MAAM6D,cAAcb,SAAS,UAAUA,SAAS;IAChD,IAAIc;IACJ,IAAIC,cAAc;IAClB,IAAIC,uBAAuB;IAC3B,IAAIlE;IACJ,IAAImE;IACJ,IAAIC;IAEJ,MAAMC,gBAAgB,CAACC,QACrB,AAAiB,YAAjB,OAAOA,SAAsBA,MAAM,IAAI,GAAG,MAAM,GAAG;IAErD,MAAMC,iBAAiB,CACrBC,WACAJ;QAEA,IAAI,CAACI,WAAW;QAEhB,MAAMC,oBACJD,WACC,uBAAuB;QAE1B,OAAO;YACL,eAAeA,UAAU,aAAa,IAAI;YAC1C,mBAAmBA,UAAU,iBAAiB,IAAI;YAClD,cAAcA,UAAU,YAAY,IAAI;YACxC,cAAcC,qBAAqB;YACnC,WAAWN,YAAY;YACvB,YAAY9D;YACZ,mBAAmBI;YACnB,MAAMP,YAAY,IAAI;YACtB,QAAQwE;YACR,YAAYN,aAAaM;QAC3B;IACF;IAEA,MAAMC,eAAe;QACnBb;QACA,QAAQ,CAAC,CAACC;QACV,GAAIpD,AAAgB,iBAAhBA,cACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAIiE,UAAUjE,cAAc;QACzBgE,aAAmD,KAAK,GAAG;QAC3DA,aAAmD,iBAAiB,GAAG;IAC1E;IAEA,MAAM,EACJ,QAAQE,qBAAqB,EAC7B,cAAcC,2BAA2B,EAC1C,GAAGC,uBAAuB;QACzB,kBAAkB7E,YAAY,gBAAgB;QAC9C,iBAAiBA,YAAY,eAAe;QAC5C,iBAAiBA,YAAY,eAAe;QAC5CS;IACF;IACA,IAAImE,6BACFtB,UAAUsB;IAGZ,MAAME,+BACJ9B,SAAS,4BACT+B,+BAA+B/E;IAIjC,MAAMgF,0BAAyD,AAAC;QAC9D,IAAI,CAACF,8BACH,OAAO/B;QAGT,OAAOA,SAAS,GAAG,CAAC,CAACkC;YACnB,IAAI,CAACC,MAAM,OAAO,CAACD,IAAI,OAAO,GAC5B,OAAOA;YAGT,MAAMnB,UAAUmB,IAAI,OAAO,CAAC,GAAG,CAAC,CAACE;gBAC/B,IAAIA,QAAQA,AAAc,gBAAdA,KAAK,IAAI,IAAoBA,KAAK,SAAS,EAAE,KACvD,OAAO;oBACL,GAAGA,IAAI;oBACP,WAAW;wBACT,GAAGA,KAAK,SAAS;wBACjB,QAAQ;oBACV;gBACF;gBAEF,OAAOA;YACT;YAEA,OAAO;gBACL,GAAGF,GAAG;gBACNnB;YACF;QACF;IACF;IAEA,IAAI;QACFR,UACE,CAAC,QAAQ,EAAEO,cAAc,eAAe,GAAG,WAAW,EAAE1D,WAAW;QAGrE,IAAI0D,aAAa;YACf,MAAM,EAAE,QAAQuB,YAAY,EAAE,SAASC,mBAAmB,EAAE,GAC1DC,wBAAwBtD,oBAAoBgB,SAAS;YACvD,IAAI;gBACF,MAAMuC,SAAU,MAAMnC,WAAW,MAAM,CACrC;oBACE,OAAOjD;oBACP,UAAU6E;oBACV,GAAGP,YAAY;oBACf,GAAGE,qBAAqB;oBACxB,GAAGtB,SAAS;gBACd,GACA;oBACE,QAAQ;oBACR,QAAQ+B;gBACV;gBAKFlB,YAAYqB,OAAO,WAAW;gBAE9B,WAAW,MAAMC,SAASD,OAAQ;oBAChC,MAAMzB,UAAU0B,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAO,WAAW;oBACtD,MAAMC,oBACHD,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAe,qBAAqB;oBAG3D,IAAIA,MAAM,KAAK,EACb1F,QAAQ0F,MAAM,KAAK;oBAGrB,IAAI1B,WAAW2B,mBAAmB;wBAChC1B,eAAeD;wBACfE,wBAAwByB;wBACxB,MAAMC,YAAiC;4BACrC5B;4BACA2B;4BACA1B;4BACA,YAAY;4BACZ,OAAOS;wBACT;wBACAxB,QAAQ,OAAO,CAAE0C;oBACnB;oBAGA,IAAIF,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,eAAe;wBACrCvB,WAAWN,KAAK,GAAG,KAAKD;wBAGxB,IAAI,CAAC5D,OAAO;4BAEV,MAAM6F,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAC7B,YAAY,MAAM,GAAG;4BAElCjE,QAAQ;gCACN,eAAe6F;gCACf,mBAAmBA;gCACnB,cAAcA,AAAkB,IAAlBA;4BAChB;wBACF;wBAGA,MAAME,aAAkC;4BACtC,SAAS;4BACT9B;4BACA,mBAAmB;4BACnB,YAAY;4BACZ,OAAOM,eAAevE,OAAOoE;wBAC/B;wBACAlB,QAAQ,OAAO,CAAE6C;wBACjB;oBACF;gBACF;YACF,SAAU;gBACRR;YACF;YACAvB,UAAUC;YACVP,kBACE,CAAC,iBAAiB,EAAErD,UAAU,QAAQ,EAAEM,eAAe,UAAU,WAAW,EAAEwD,SAAS,eAAe,EAAEL,eAAe,IAAI;QAE/H,OAAO;YAEL,MAAMkC,aAAa9F,YAAY,UAAU,IAAI;YAC7C,MAAM+F,gBAAgB/F,YAAY,aAAa,IAAI;YACnD,MAAMgG,cAAcF,aAAa;YAEjC,IAAIG;YAEJ,IAAK,IAAIC,UAAU,GAAGA,WAAWF,aAAaE,UAAW;gBACvD,MAAM,EAAE,QAAQC,aAAa,EAAE,SAASC,oBAAoB,EAAE,GAC5Dd,wBAAwBtD,oBAAoBgB,SAAS;gBACvD,IAAI;oBACF,MAAMqD,SAAS,MAAMjD,WAAW,MAAM,CACpC;wBACE,OAAOjD;wBACP,UAAU6E;wBACV,GAAGP,YAAY;wBACf,GAAGE,qBAAqB;wBACxB,GAAGtB,SAAS;oBACd,GACA;wBAAE,QAAQ8C;oBAAc;oBAG1BlC,WAAWN,KAAK,GAAG,KAAKD;oBAExBF,kBACE,CAAC,OAAO,EAAErD,UAAU,QAAQ,EAAEM,eAAe,UAAU,mBAAmB,EAAED,mBAAmB,iBAAiB,EAAE6F,OAAO,KAAK,EAAE,iBAAiB,GAAG,qBAAqB,EAAEA,OAAO,KAAK,EAAE,qBAAqB,GAAG,gBAAgB,EAAEA,OAAO,KAAK,EAAE,gBAAgB,GAAG,WAAW,EAAEpC,SAAS,aAAa,EAAEoC,OAAO,WAAW,IAAI,GAAG,eAAe,EAAEzC,eAAe,IAAI;oBAGxWH,mBACE,CAAC,oBAAoB,EAAE6C,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;oBAGvD,IAAI,CAACA,OAAO,OAAO,EACjB,MAAM,IAAI1G,MACR,CAAC,mCAAmC,EAAE2G,KAAK,SAAS,CAACD,SAAS;oBAIlEvC,UAAUuC,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;oBAC3CrC,uBACGqC,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,EAAU,qBAAqB;oBAC3DvG,QAAQuG,OAAO,KAAK;oBACpBnC,YAAYmC,OAAO,WAAW;oBAE9B,IAAI,CAAClC,cAAcL,YAAYK,cAAcH,uBAAuB;wBAClET,SAAS;wBACTO,UAAUE;oBACZ;oBAEA,IAAI,CAACG,cAAcL,UACjB,MAAM,IAAIpE,qBACR,+BACA4G,KAAK,SAAS,CAACD,SACfhC,eAAevE,OAAOoE;oBAI1B;gBACF,EAAE,OAAOnC,OAAO;oBACdkE,YAAYlE;oBACZ,MAAMwE,iBAAiBC,mBAAmBP;oBAC1C,IAAIM,gBACFhD,SACE,CAAC,0BAA0B,EAAEvB,mBAAmB,YAAY,EAAEkE,QAAQ,CAAC,EAAEF,YAAY,QAAQ,EAAE7F,UAAU,OAAO,EAAEH,YAAY,IAAI,CAAC,CAAC,CAAC;oBAIzI,IAAIgD,SAAS,aAAa,SACxB;oBAEF,IAAIkD,UAAUF,aAAa;wBACzBzC,SACE,CAAC,wBAAwB,EAAE2C,QAAQ,CAAC,EAAEF,YAAY,eAAe,EAAED,cAAc,aAAa,EAAEE,UAAU,OAAO,EAAE;wBAErH,MAAM,IAAIQ,QAAQ,CAACC,UAAYC,WAAWD,SAASX;oBACrD;gBACF,SAAU;oBACRK;gBACF;YACF;YAEA,IAAI,CAACtC,SACH,MAAMmC;QAEV;QAEA3C,UAAU,CAAC,4BAA4B,EAAEU,sBAAsB;QAC/DV,UAAU,CAAC,kBAAkB,EAAEQ,SAAS;QAGxC,IAAID,eAAe,CAAC/D,OAAO;YAEzB,MAAM6F,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAE9B,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtChE,QAAQ;gBACN,eAAe6F;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAAS7B,WAAW;YACpB,mBAAmBE,wBAAwBQ;YAC3C,OAAOH,eAAevE,OAAOoE;YAC7B,YAAY,CAAC,CAACL;QAChB;IACF,EAAE,OAAO+C,GAAQ;QACfrD,SAAS,iBAAiBqD;QAE1B,IAAIA,aAAalH,sBACf,MAAMkH;QAGR,MAAMC,WAAW,IAAIlH,MACnB,CAAC,eAAe,EAAEkE,cAAc,eAAe,GAAG,kBAAkB,EAAE1D,UAAU,GAAG,EAAEyG,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC9J;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,eAAeC,yBACpB/D,QAAsC,EACtC/C,WAAyB,EACzBgD,OAEC;IAOD,MAAM+D,WAAW,MAAMjE,OAAOC,UAAU/C,aAAa;QACnD,aAAagD,SAAS;IACxB;IACAgE,OAAOD,UAAU;IACjB,MAAMtG,cAAcT,YAAY,WAAW;IAC3C,MAAMiH,cAAcC,cAAcH,SAAS,OAAO,EAAEtG;IACpD,IAAI,AAAuB,YAAvB,OAAOwG,aACT,MAAM,IAAIvH,qBACR,CAAC,0CAA0C,EAAEM,YAAY,SAAS,CAAC,GAAG,EAAE+G,SAAS,OAAO,EAAE,EAC1FA,SAAS,OAAO,EAChBA,SAAS,KAAK;IAGlB,OAAO;QACL,SAASE;QACT,eAAeF,SAAS,OAAO;QAC/B,OAAOA,SAAS,KAAK;QACrB,mBAAmBA,SAAS,iBAAiB;IAC/C;AACF;AAEO,eAAeI,yBACpBC,IAAY,EACZpH,WAAyB,EACzBgD,OAEC;IAED,MAAM,EAAEc,OAAO,EAAEhE,KAAK,EAAE,GAAG,MAAMgD,OAAOsE,MAAMpH,aAAa;QACzD,aAAagD,SAAS;IACxB;IACA,OAAO;QAAEc;QAAShE;IAAM;AAC1B;AAEO,SAASuH,yBAAyBN,QAAgB;IACvD,IAAI;QAEF,MAAMO,YAAYP,SAAS,KAAK,CAAC;QACjC,IAAIO,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBR,SAAS,KAAK,CACnC;QAEF,IAAIQ,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBT,SAAS,KAAK,CAAC;QACrC,IAAIS,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOT;AACT;AAEO,SAASU,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEA,SAASxE,2BAA2B,EAClCyE,gBAAgB,EAChBC,eAAe,EACfC,eAAe,EAKhB;IACC,OACEF,AAAqBnD,WAArBmD,oBACA,CAAC,CAACC,mBACFC,AAAoBrD,WAApBqD;AAEJ;AAEA,MAAMC,+BAA+B;IACnC;IACA;IACA;IACA;IACA;IACA;CACD;AAID,SAASC,2BACPC,MAAgC;IAEhC,OACE,CAAC,CAACA,UACDF,6BAAyD,QAAQ,CAACE;AAEvE;AAEA,SAASC;IACP,OAAOH,6BAA6B,IAAI,CAAC;AAC3C;AAEO,SAASjD,uBAAuB,EACrC8C,gBAAgB,EAChBC,eAAe,EACfC,eAAe,EACfpH,WAAW,EAMZ;IAIC,MAAMyH,oBAAoBhF,2BAA2B;QACnDyE;QACAC;QACAC;IACF;IAEA,IAAIK,mBAAmB;QACrB,IAAI,CAACzH,aACH,MAAM,IAAId,MACR,CAAC,yGAAyG,EAAEsI,gCAAgC,iHAAiH,CAAC;QAMlQ,IAAI,CAACF,2BAA2BtH,cAC9B,MAAM,IAAId,MACR,CAAC,oDAAoD,EAAEc,YAAY,kCAAkC,EAAEwH,gCAAgC,iHAAiH,CAAC;IAG/P,OAAO,IAAI,CAACF,2BAA2BtH,cACrC,OAAO;QAAE,QAAQ,CAAC;IAAE;IAGtB,MAAM0H,4BAA4BR,oBAAoB;IAEtD,MAAMS,gBAA0B,EAAE;IAClC,MAAMC,SAAkC,CAAC;IAEzC,IACE5H,AAAgB,eAAhBA,eACAA,AAAgB,cAAhBA,eACAA,AAAgB,cAAhBA,aACA;QAEA4H,OAAO,eAAe,GAAGF;QACzBC,cAAc,IAAI,CAAC,CAAC,gBAAgB,EAAED,2BAA2B;QAEjE,IAAIN,AAAoBrD,WAApBqD,iBAA+B;YACjCQ,OAAO,eAAe,GAAGR;YACzBO,cAAc,IAAI,CAAC,CAAC,gBAAgB,EAAEP,iBAAiB;QACzD;IAEF,OAAO,IAAIpH,AAAgB,oBAAhBA,eAAmCA,AAAgB,kBAAhBA,aAA+B;QAE3E4H,OAAO,QAAQ,GAAG;YAChB,MAAMF,4BAA4B,YAAY;QAChD;QACAC,cAAc,IAAI,CAChB,CAAC,cAAc,EAAED,4BAA4B,YAAY,YAAY;QAGvE,IAAIP,iBAAiB;YACnBS,OAAO,gBAAgB,GAAGT;YAC1BQ,cAAc,IAAI,CAAC,CAAC,kBAAkB,EAAER,gBAAgB,CAAC,CAAC;QAC5D;IAEF,OAAO,IAAInH,AAAgB,YAAhBA,aAAyB;QAElC4H,OAAO,QAAQ,GAAG;YAChB,MAAMF,4BAA4B,YAAY;QAChD;QACAC,cAAc,IAAI,CAChB,CAAC,cAAc,EAAED,4BAA4B,YAAY,YAAY;IAGzE;IAEA,OAAO;QACLE;QACA,cAAcD,cAAc,MAAM,GAC9B,CAAC,qBAAqB,EAAE3H,YAAY,EAAE,EAAE2H,cAAc,IAAI,CAAC,OAAO,GAClE5D;IACN;AACF;AAQA,SAAS8D,oBAAoBC,GAAQ;IAEnC,IAAIA,QAAAA,KACF,OAAOA;IAIT,IAAIrD,MAAM,OAAO,CAACqD,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACC,OAASF,oBAAoBE;IAI/C,IAAI,AAAe,YAAf,OAAOD,KAAkB;QAC3B,MAAME,aAAkB,CAAC;QAEzB,KAAK,MAAM,CAACC,KAAKtE,MAAM,IAAIuE,OAAO,OAAO,CAACJ,KAAM;YAE9C,MAAMK,aAAaF,IAAI,IAAI;YAG3B,IAAIG,kBAAkBP,oBAAoBlE;YAG1C,IAAI,AAA2B,YAA3B,OAAOyE,iBACTA,kBAAkBA,gBAAgB,IAAI;YAGxCJ,UAAU,CAACG,WAAW,GAAGC;QAC3B;QAEA,OAAOJ;IACT;IAGA,IAAI,AAAe,YAAf,OAAOF,KACT,OAAOA,IAAI,IAAI;IAIjB,OAAOA;AACT;AAEO,SAASrB,cACdQ,KAAa,EACbjH,WAAqC;IAErC,MAAMqI,kBAAkBzB,yBAAyBK;IAEjD,IAAIoB,iBAAiB,MAAM,oBACzB,OAAOA,gBACJ,KAAK,CAAC,oBACL,MAAM,GACP,IAAInH;IAGT,IAAIR;IACJ,IAAI8E;IACJ,IAAI;QACF9E,SAASmF,KAAK,KAAK,CAACwC;QACpB,OAAOR,oBAAoBnH;IAC7B,EAAE,OAAOY,OAAO;QACdkE,YAAYlE;IACd;IACA,IAAI;QACFZ,SAASmF,KAAK,KAAK,CAACyC,WAAWD;QAC/B,OAAOR,oBAAoBnH;IAC7B,EAAE,OAAOY,OAAO;QACdkE,YAAYlE;IACd;IAEA,IACEtB,AAAgB,oBAAhBA,eACAA,AAAgB,kBAAhBA,eACAuI,SAASvI,cACT;QACA,MAAMwI,aAAaxB,yBAAyBqB;QAC5C,IAAI;YACF3H,SAASmF,KAAK,KAAK,CAACyC,WAAWE;YAC/B,OAAOX,oBAAoBnH;QAC7B,EAAE,OAAOY,OAAO;YACdkE,YAAYlE;QACd;IACF;IACA,MAAMpC,MACJ,CAAC,gDAAgD,EAAEuJ,OACjDjD,aAAa,iBACb,gBAAgB,EAAEyB,OAAO;AAE/B"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"device/index.mjs","sources":["../../../src/device/index.ts"],"sourcesContent":["import { getMidsceneLocationSchema } from '@/common';\nimport type {\n ActionScrollParam,\n DeviceAction,\n LocateResultElement,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport type { ElementNode } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { z } from 'zod';\nimport type { ElementCacheFeature, Rect, Size, UIContext } from '../types';\n\nexport interface FileChooserHandler {\n accept(files: string[]): Promise<void>;\n}\n\nexport interface MjpegStreamFrame {\n /** Raw base64-encoded image bytes WITHOUT a `data:image/...;base64,` prefix. */\n data: string;\n contentType?: string;\n}\n\nexport interface MjpegStreamHandle {\n stop(): void | Promise<void>;\n}\n\nexport interface MjpegStreamOptions {\n signal?: AbortSignal;\n onFrame(frame: MjpegStreamFrame): void;\n onError?(error: unknown): void;\n}\n\n/** A point in device-pixel coordinates on the screen. */\nexport interface PointerPoint {\n x: number;\n y: number;\n}\n\nexport interface PointerInputPrimitives {\n tap(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n doubleClick?(p: PointerPoint): Promise<void>;\n rightClick?(p: PointerPoint): Promise<void>;\n hover?(p: PointerPoint): Promise<void>;\n longPress?(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n dragAndDrop?(from: PointerPoint, to: PointerPoint): Promise<void>;\n}\n\nexport interface TouchInputPrimitives {\n swipe(\n start: PointerPoint,\n end: PointerPoint,\n opts?: { duration?: number; repeat?: number },\n ): Promise<void>;\n pinch?(\n center: PointerPoint,\n opts: { startDistance: number; endDistance: number; duration: number },\n ): Promise<void>;\n}\n\nexport interface KeyboardInputPrimitives {\n keyboardPress(keyName: string, opts?: { target?: unknown }): Promise<void>;\n cursorMove?(direction: 'left' | 'right', times?: number): Promise<void>;\n typeText(\n value: string,\n opts?: {\n autoDismissKeyboard?: boolean;\n target?: unknown;\n replace?: boolean;\n focusOnly?: boolean;\n },\n ): Promise<void>;\n clearInput(target?: unknown): Promise<void>;\n}\n\nexport interface ScrollInputPrimitives {\n scroll(param: ActionScrollParam): Promise<void>;\n}\n\nexport interface SystemInputPrimitives {\n backButton?(): Promise<void>;\n homeButton?(): Promise<void>;\n recentAppsButton?(): Promise<void>;\n}\n\nexport interface InputPrimitives {\n pointer?: PointerInputPrimitives;\n keyboard?: KeyboardInputPrimitives;\n touch?: TouchInputPrimitives;\n scroll?: ScrollInputPrimitives;\n system?: SystemInputPrimitives;\n}\n\nexport interface MobileInputPrimitives extends InputPrimitives {\n pointer: PointerInputPrimitives & {\n doubleClick(p: PointerPoint): Promise<void>;\n longPress(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n dragAndDrop(from: PointerPoint, to: PointerPoint): Promise<void>;\n };\n keyboard: KeyboardInputPrimitives;\n touch: TouchInputPrimitives;\n}\n\nexport interface BrowserInputPrimitives extends InputPrimitives {\n pointer: PointerInputPrimitives & {\n doubleClick(p: PointerPoint): Promise<void>;\n rightClick(p: PointerPoint): Promise<void>;\n hover(p: PointerPoint): Promise<void>;\n dragAndDrop(from: PointerPoint, to: PointerPoint): Promise<void>;\n longPress(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n };\n keyboard: KeyboardInputPrimitives;\n scroll: ScrollInputPrimitives;\n touch: TouchInputPrimitives;\n}\n\nexport interface ComputerInputPrimitives extends InputPrimitives {\n pointer: PointerInputPrimitives & {\n doubleClick(p: PointerPoint): Promise<void>;\n rightClick(p: PointerPoint): Promise<void>;\n hover(p: PointerPoint): Promise<void>;\n dragAndDrop(from: PointerPoint, to: PointerPoint): Promise<void>;\n };\n keyboard: KeyboardInputPrimitives;\n scroll: ScrollInputPrimitives;\n}\n\nexport abstract class AbstractInterface {\n abstract interfaceType: string;\n\n abstract screenshotBase64(): Promise<string>;\n abstract size(): Promise<Size>;\n abstract actionSpace(): DeviceAction[];\n\n abstract cacheFeatureForPoint?(\n center: [number, number],\n options?: {\n targetDescription?: string;\n modelConfig?: IModelConfig;\n },\n ): Promise<ElementCacheFeature>;\n abstract rectMatchesCacheFeature?(\n feature: ElementCacheFeature,\n ): Promise<Rect>;\n\n abstract destroy?(): Promise<void>;\n\n abstract describe?(): string;\n abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;\n abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;\n\n // for web only\n registerFileChooserListener?(\n handler: (chooser: FileChooserHandler) => Promise<void>,\n ): Promise<{ dispose: () => void; getError: () => Error | undefined }>;\n\n // @deprecated do NOT extend this method\n abstract getElementsNodeTree?: () => Promise<ElementNode>;\n\n // @deprecated do NOT extend this method\n abstract url?: () => string | Promise<string>;\n\n // @deprecated do NOT extend this method\n abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;\n\n /**\n * Get the current device-local time as a formatted string.\n * Prefer this for user-visible time because timestamps alone do not preserve\n * the target device's timezone when formatted on the host machine.\n */\n getDeviceLocalTimeString?(format?: string): Promise<string>;\n\n /** URL of native MJPEG stream for real-time screen preview (e.g. WDA MJPEG server) */\n mjpegStreamUrl?: string;\n\n /**\n * Optional in-process MJPEG frame producer. Implementations can push raw\n * base64 frames here when there is no standalone native MJPEG URL, e.g.\n * Chromium CDP Page.startScreencast for web previews.\n */\n startMjpegStream?(\n options: MjpegStreamOptions,\n ): MjpegStreamHandle | undefined | Promise<MjpegStreamHandle | undefined>;\n\n /**\n * Optional hook used after keyboard-only actions to force a fresh frame on\n * the active MJPEG stream. Implementations should be a no-op when no stream\n * is active.\n */\n flushPendingVisualUpdate?(): Promise<void>;\n\n /**\n * Optional navigation state probe for browser-like interfaces, used to drive\n * loading indicators in playground UIs. Returning `undefined` means the\n * interface does not expose this concept.\n */\n navigationState?(): Promise<{ isLoading: boolean }>;\n\n /**\n * Low-level device input surface. Platform implementations expose transport\n * primitives here; higher-level AI actions and manual pointer dispatch should\n * adapt to this instead of duplicating platform gesture logic.\n */\n inputPrimitives?: InputPrimitives;\n}\n\n// Generic function to define actions with proper type inference\n// TRuntime allows specifying a different type for the runtime parameter (after location resolution)\n// TReturn allows specifying the return type of the action\nexport const defineAction = <\n TSchema extends z.ZodType | undefined = undefined,\n TRuntime = TSchema extends z.ZodType ? z.infer<TSchema> : undefined,\n TReturn = any,\n>(\n config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema?: TSchema;\n call: (param: TRuntime) => Promise<TReturn> | TReturn;\n } & Partial<\n Omit<\n DeviceAction<TRuntime, TReturn>,\n 'name' | 'description' | 'interfaceAlias' | 'paramSchema' | 'call'\n >\n >,\n): DeviceAction<TRuntime, TReturn> => {\n return config as any; // Type assertion needed because schema validation type differs from runtime type\n};\n\nfunction pointFromLocate(\n locate: LocateResultElement | undefined,\n missingMessage: string,\n): PointerPoint {\n if (!locate) {\n throw new Error(missingMessage);\n }\n return { x: locate.center[0], y: locate.center[1] };\n}\n\nfunction defineLocatedPointAction<\n TSchema extends z.ZodType,\n TParam extends { locate: LocateResultElement },\n>(config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema: TSchema;\n sample: DeviceAction<TParam>['sample'];\n missingLocateMessage: string;\n call: (point: PointerPoint, param: TParam) => Promise<void>;\n}): DeviceAction<TParam> {\n return defineAction<TSchema, TParam>({\n name: config.name,\n description: config.description,\n interfaceAlias: config.interfaceAlias,\n paramSchema: config.paramSchema,\n sample: config.sample,\n call: async (param) => {\n await config.call(\n pointFromLocate(param.locate, config.missingLocateMessage),\n param,\n );\n },\n });\n}\n\n// Tap\nexport const actionTapParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be tapped'),\n});\nexport type ActionTapParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionTap = (\n tap: PointerInputPrimitives['tap'],\n): DeviceAction<ActionTapParam> => {\n return defineLocatedPointAction<typeof actionTapParamSchema, ActionTapParam>({\n name: 'Tap',\n description: 'Tap the element',\n interfaceAlias: 'aiTap',\n paramSchema: actionTapParamSchema,\n sample: {\n locate: { prompt: 'the \"Submit\" button' },\n },\n missingLocateMessage: 'Element not found, cannot tap',\n call: async (point) => {\n await tap(point);\n },\n });\n};\n\n// RightClick\nexport const actionRightClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be right clicked',\n ),\n});\nexport type ActionRightClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionRightClick = (\n rightClick: NonNullable<PointerInputPrimitives['rightClick']>,\n): DeviceAction<ActionRightClickParam> => {\n return defineLocatedPointAction<\n typeof actionRightClickParamSchema,\n ActionRightClickParam\n >({\n name: 'RightClick',\n description: 'Right click the element',\n interfaceAlias: 'aiRightClick',\n paramSchema: actionRightClickParamSchema,\n sample: {\n locate: { prompt: 'the file icon on the desktop' },\n },\n missingLocateMessage: 'Element not found, cannot right click',\n call: async (point) => {\n await rightClick(point);\n },\n });\n};\n\n// DoubleClick\nexport const actionDoubleClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be double clicked',\n ),\n});\nexport type ActionDoubleClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionDoubleClick = (\n doubleClick: NonNullable<PointerInputPrimitives['doubleClick']>,\n): DeviceAction<ActionDoubleClickParam> => {\n return defineLocatedPointAction<\n typeof actionDoubleClickParamSchema,\n ActionDoubleClickParam\n >({\n name: 'DoubleClick',\n description: 'Double click the element',\n interfaceAlias: 'aiDoubleClick',\n paramSchema: actionDoubleClickParamSchema,\n sample: {\n locate: { prompt: 'the folder icon' },\n },\n missingLocateMessage: 'Element not found, cannot double click',\n call: async (point) => {\n await doubleClick(point);\n },\n });\n};\n\n// Hover\nexport const actionHoverParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be hovered'),\n});\nexport type ActionHoverParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionHover = (\n hover: NonNullable<PointerInputPrimitives['hover']>,\n): DeviceAction<ActionHoverParam> => {\n return defineLocatedPointAction<\n typeof actionHoverParamSchema,\n ActionHoverParam\n >({\n name: 'Hover',\n description: 'Move the mouse to the element',\n interfaceAlias: 'aiHover',\n paramSchema: actionHoverParamSchema,\n sample: {\n locate: { prompt: 'the navigation menu item \"Products\"' },\n },\n missingLocateMessage: 'Element not found, cannot hover',\n call: async (point) => {\n await hover(point);\n },\n });\n};\n\n// Input\nconst inputLocateDescription =\n 'the position of the placeholder or text content in the target input field. If there is no content, locate the center of the input field.';\nexport const actionInputParamSchema = z.object({\n value: z\n .union([z.string(), z.number()])\n .transform((val) => String(val))\n .describe(\n 'The text to input. Provide the final content for replace/append modes, or an empty string when using clear mode to remove existing text.',\n ),\n locate: getMidsceneLocationSchema()\n .describe(inputLocateDescription)\n .optional(),\n mode: z\n .enum(['replace', 'clear', 'typeOnly'])\n .default('replace')\n .describe(\n 'Input mode: \"replace\" (default) - clear the field and input the value; \"typeOnly\" - type the value directly without clearing the field first; \"clear\" - clear the field without inputting new text.',\n ),\n autoDismissKeyboard: z\n .boolean()\n .optional()\n .describe(\n 'If true, the keyboard will be dismissed after the input is completed. Do not set it unless the user asks you to do so.',\n ),\n});\nexport type ActionInputParam = {\n value: string;\n locate?: LocateResultElement;\n mode?: 'replace' | 'clear' | 'typeOnly' | 'append';\n autoDismissKeyboard?: boolean;\n};\n\nexport const defineActionInput = (\n keyboard: KeyboardInputPrimitives,\n): DeviceAction<ActionInputParam> => {\n return defineAction<typeof actionInputParamSchema, ActionInputParam>({\n name: 'Input',\n description: 'Input the value into the element',\n interfaceAlias: 'aiInput',\n paramSchema: actionInputParamSchema,\n sample: {\n value: 'test@example.com',\n locate: { prompt: 'the email input field' },\n },\n call: async (param) => {\n // backward compat: convert deprecated 'append' to 'typeOnly'\n if ((param.mode as string) === 'append') {\n param.mode = 'typeOnly';\n }\n\n if (param.mode === 'clear') {\n await keyboard.clearInput(param.locate);\n return;\n }\n\n if (!param || !param.value) {\n return;\n }\n\n await keyboard.typeText(param.value, {\n target: param.locate,\n replace: param.mode !== 'typeOnly',\n autoDismissKeyboard: param.autoDismissKeyboard,\n });\n },\n });\n};\n\n// KeyboardPress\nexport const actionKeyboardPressParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The element to be clicked before pressing the key')\n .optional(),\n keyName: z\n .string()\n .describe(\n \"The key to be pressed. Use '+' for key combinations, e.g., 'Control+A', 'Shift+Enter'\",\n ),\n});\nexport type ActionKeyboardPressParam = {\n locate?: LocateResultElement;\n keyName: string;\n};\n\nexport const defineActionKeyboardPress = (\n keyboardPress: KeyboardInputPrimitives['keyboardPress'],\n): DeviceAction<ActionKeyboardPressParam> => {\n return defineAction<\n typeof actionKeyboardPressParamSchema,\n ActionKeyboardPressParam\n >({\n name: 'KeyboardPress',\n description:\n 'Press a key or key combination, like \"Enter\", \"Tab\", \"Escape\", or \"Control+A\", \"Shift+Enter\". Do not use this to type text.',\n interfaceAlias: 'aiKeyboardPress',\n paramSchema: actionKeyboardPressParamSchema,\n sample: {\n keyName: 'Enter',\n },\n call: async (param) => {\n await keyboardPress(param.keyName, {\n target: param.locate,\n });\n },\n });\n};\n\n// Scroll\nexport const actionScrollParamSchema = z.object({\n scrollType: z\n .enum([\n 'singleAction',\n 'scrollToBottom',\n 'scrollToTop',\n 'scrollToRight',\n 'scrollToLeft',\n ])\n .default('singleAction')\n .describe(\n 'The scroll behavior: \"singleAction\" for a single scroll action, \"scrollToBottom\" for scrolling all the way to the bottom by rapidly scrolling 5-10 times (skipping intermediate content until reaching the bottom), \"scrollToTop\" for scrolling all the way to the top by rapidly scrolling 5-10 times (skipping intermediate content until reaching the top), \"scrollToRight\" for scrolling all the way to the right by rapidly scrolling multiple times, \"scrollToLeft\" for scrolling all the way to the left by rapidly scrolling multiple times',\n ),\n direction: z\n .enum(['down', 'up', 'right', 'left'])\n .default('down')\n .describe(\n 'The direction to scroll. Only effective when scrollType is \"singleAction\".',\n ),\n distance: z\n .number()\n .nullable()\n .optional()\n .describe('The distance in pixels to scroll'),\n locate: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Describe the target element to be scrolled on, like \"the table\" or \"the list\" or \"the content area\" or \"the scrollable area\". Do NOT provide a general intent like \"scroll to find some element\"',\n ),\n});\n\nexport const defineActionScroll = (\n scroll: ScrollInputPrimitives['scroll'],\n): DeviceAction<ActionScrollParam> => {\n return defineAction<typeof actionScrollParamSchema, ActionScrollParam>({\n name: 'Scroll',\n description:\n 'Scroll the page or a scrollable element to browse content. This is the preferred way to scroll on all platforms, including mobile. Supports scrollToBottom/scrollToTop for boundary navigation. Default: direction `down`, scrollType `singleAction`, distance `null`.',\n interfaceAlias: 'aiScroll',\n paramSchema: actionScrollParamSchema,\n sample: {\n direction: 'down',\n scrollType: 'singleAction',\n locate: { prompt: 'the center of the product list area' },\n },\n call: async (param) => {\n await scroll(param);\n },\n });\n};\n\n// DragAndDrop\nexport const actionDragAndDropParamSchema = z.object({\n from: getMidsceneLocationSchema().describe('The position to be dragged'),\n to: getMidsceneLocationSchema().describe('The position to be dropped'),\n});\nexport type ActionDragAndDropParam = {\n from: LocateResultElement;\n to: LocateResultElement;\n};\n\nexport const defineActionDragAndDrop = (\n dragAndDrop: NonNullable<PointerInputPrimitives['dragAndDrop']>,\n): DeviceAction<ActionDragAndDropParam> => {\n return defineAction<\n typeof actionDragAndDropParamSchema,\n ActionDragAndDropParam\n >({\n name: 'DragAndDrop',\n description:\n 'Pick up a specific UI element and move it to a new position (e.g., reorder a card, move a file into a folder, sort list items). The element itself moves with your finger/mouse.',\n interfaceAlias: 'aiDragAndDrop',\n paramSchema: actionDragAndDropParamSchema,\n sample: {\n from: { prompt: 'the \"report.pdf\" file icon' },\n to: { prompt: 'the upload drop zone' },\n },\n call: async (param) => {\n const from = param.from;\n const to = param.to;\n if (!from) {\n throw new Error('missing \"from\" param for drag and drop');\n }\n if (!to) {\n throw new Error('missing \"to\" param for drag and drop');\n }\n await dragAndDrop(\n { x: from.center[0], y: from.center[1] },\n { x: to.center[0], y: to.center[1] },\n );\n },\n });\n};\n\nexport const ActionLongPressParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be long pressed',\n ),\n duration: z\n .number()\n .optional()\n .describe('Long press duration in milliseconds'),\n});\n\nexport type ActionLongPressParam = {\n locate: LocateResultElement;\n duration?: number;\n};\nexport const defineActionLongPress = (\n longPress: NonNullable<PointerInputPrimitives['longPress']>,\n): DeviceAction<ActionLongPressParam> => {\n return defineLocatedPointAction<\n typeof ActionLongPressParamSchema,\n ActionLongPressParam\n >({\n name: 'LongPress',\n description: 'Long press the element',\n interfaceAlias: 'aiLongPress',\n paramSchema: ActionLongPressParamSchema,\n sample: {\n locate: { prompt: 'the message bubble' },\n },\n missingLocateMessage: 'LongPress requires an element to be located',\n call: async (point, param) => {\n await longPress(point, { duration: param.duration });\n },\n });\n};\n\nexport const ActionSwipeParamSchema = z.object({\n start: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Starting point of the swipe gesture, if not specified, the center of the page will be used',\n ),\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .optional()\n .describe(\n 'The direction to swipe (required when using distance). The direction means the direction of the finger swipe.',\n ),\n distance: z\n .number()\n .optional()\n .describe('The distance in pixels to swipe (mutually exclusive with end)'),\n end: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Ending point of the swipe gesture (mutually exclusive with distance)',\n ),\n duration: z\n .number()\n .default(300)\n .describe('Duration of the swipe gesture in milliseconds'),\n repeat: z\n .number()\n .optional()\n .describe(\n 'The number of times to repeat the swipe gesture. 1 for default, 0 for infinite (e.g. endless swipe until the end of the page)',\n ),\n});\n\nexport type ActionSwipeParam = {\n start?: LocateResultElement;\n direction?: 'up' | 'down' | 'left' | 'right';\n distance?: number;\n end?: LocateResultElement;\n duration?: number;\n repeat?: number;\n};\n\nexport function normalizeMobileSwipeParam(\n param: ActionSwipeParam,\n screenSize: { width: number; height: number },\n): {\n startPoint: { x: number; y: number };\n endPoint: { x: number; y: number };\n duration: number;\n repeatCount: number;\n} {\n const { width, height } = screenSize;\n const { start, end } = param;\n\n const startPoint = start\n ? { x: start.center[0], y: start.center[1] }\n : { x: width / 2, y: height / 2 };\n\n let endPoint: { x: number; y: number };\n\n if (end) {\n endPoint = { x: end.center[0], y: end.center[1] };\n } else if (param.distance) {\n const direction = param.direction;\n if (!direction) {\n throw new Error('direction is required for swipe gesture');\n }\n endPoint = {\n x:\n startPoint.x +\n (direction === 'right'\n ? param.distance\n : direction === 'left'\n ? -param.distance\n : 0),\n y:\n startPoint.y +\n (direction === 'down'\n ? param.distance\n : direction === 'up'\n ? -param.distance\n : 0),\n };\n } else {\n throw new Error(\n 'Either end or distance must be specified for swipe gesture',\n );\n }\n\n endPoint.x = Math.max(0, Math.min(endPoint.x, width));\n endPoint.y = Math.max(0, Math.min(endPoint.y, height));\n\n const duration = param.duration ?? 300;\n\n let repeatCount = typeof param.repeat === 'number' ? param.repeat : 1;\n if (repeatCount === 0) {\n repeatCount = 10;\n }\n\n return { startPoint, endPoint, duration, repeatCount };\n}\n\nexport const defineActionSwipe = (config: {\n swipe: TouchInputPrimitives['swipe'];\n size(): Promise<Size>;\n}): DeviceAction<ActionSwipeParam> => {\n return defineAction<typeof ActionSwipeParamSchema, ActionSwipeParam>({\n name: 'Swipe',\n description:\n 'Perform a touch gesture for interactions beyond regular scrolling (e.g., flip pages in a carousel, dismiss a notification, swipe-to-delete a list item). For regular content scrolling, use Scroll instead. Use \"distance\" + \"direction\" for relative movement, or \"end\" for precise endpoint.',\n paramSchema: ActionSwipeParamSchema,\n sample: {\n start: { prompt: 'center of the notification' },\n end: { prompt: 'upper edge of the screen' },\n },\n call: async (param) => {\n const { startPoint, endPoint, duration, repeatCount } =\n normalizeMobileSwipeParam(param, await config.size());\n for (let i = 0; i < repeatCount; i++) {\n await config.swipe(startPoint, endPoint, { duration });\n }\n },\n });\n};\n\n// ClearInput\nexport const actionClearInputParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The input field to be cleared')\n .optional(),\n});\nexport type ActionClearInputParam = {\n locate?: LocateResultElement;\n};\n\nexport const defineActionClearInput = (\n clearInput: KeyboardInputPrimitives['clearInput'],\n): DeviceAction<ActionClearInputParam> => {\n return defineAction<\n typeof actionClearInputParamSchema,\n ActionClearInputParam\n >({\n name: 'ClearInput',\n description: inputLocateDescription,\n interfaceAlias: 'aiClearInput',\n paramSchema: actionClearInputParamSchema,\n sample: {\n locate: { prompt: 'the search input field' },\n },\n call: async (param) => {\n await clearInput(param.locate);\n },\n });\n};\n\n// CursorMove\nexport const actionCursorMoveParamSchema = z.object({\n direction: z\n .enum(['left', 'right'])\n .describe('The direction to move the cursor'),\n times: z\n .number()\n .int()\n .min(1)\n .default(1)\n .describe(\n 'The number of times to move the cursor in the specified direction',\n ),\n});\nexport type ActionCursorMoveParam = {\n direction: 'left' | 'right';\n times?: number;\n};\n\nexport const defineActionCursorMove = (config: {\n keyboard: Pick<KeyboardInputPrimitives, 'keyboardPress' | 'cursorMove'>;\n sleep?(timeMs: number): Promise<void>;\n}): DeviceAction<ActionCursorMoveParam> => {\n return defineAction<\n typeof actionCursorMoveParamSchema,\n ActionCursorMoveParam\n >({\n name: 'CursorMove',\n description:\n 'Move the text cursor (caret) left or right within an input field or text area. Use this to reposition the cursor without selecting text.',\n paramSchema: actionCursorMoveParamSchema,\n sample: {\n direction: 'left',\n times: 3,\n },\n call: async (param) => {\n const times = param.times ?? 1;\n if (config.keyboard.cursorMove) {\n await config.keyboard.cursorMove(param.direction, times);\n return;\n }\n\n const wait =\n config.sleep ??\n ((timeMs: number) =>\n new Promise<void>((resolve) => setTimeout(resolve, timeMs)));\n const arrowKey = param.direction === 'left' ? 'ArrowLeft' : 'ArrowRight';\n for (let i = 0; i < times; i++) {\n await config.keyboard.keyboardPress(arrowKey);\n await wait(100);\n }\n },\n });\n};\n\n// Pinch\nexport const ActionPinchParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'The element to pinch on. If not specified, the center of the screen will be used',\n ),\n direction: z\n .enum(['in', 'out'])\n .describe(\n 'Pinch direction. \"in\" = pinch fingers together (zoom out / shrink), \"out\" = spread fingers apart (zoom in / enlarge).',\n ),\n distance: z\n .number()\n .positive()\n .optional()\n .describe(\n 'How far each finger moves in pixels. Defaults to a quarter of the shorter screen dimension.',\n ),\n duration: z\n .number()\n .default(500)\n .optional()\n .describe('Duration of the pinch gesture in milliseconds'),\n});\n\nexport type ActionPinchParam = {\n locate?: LocateResultElement;\n direction: 'in' | 'out';\n distance?: number;\n duration?: number;\n};\n\nexport const defineActionPinch = (config: {\n pinch: TouchInputPrimitives['pinch'];\n size(): Promise<Size>;\n}): DeviceAction<ActionPinchParam> | undefined => {\n if (!config.pinch) {\n return undefined;\n }\n\n return defineAction<typeof ActionPinchParamSchema, ActionPinchParam>({\n name: 'Pinch',\n description:\n 'Perform a two-finger pinch gesture. Use direction \"in\" to pinch fingers together (zoom out), or \"out\" to spread fingers apart (zoom in). Optionally specify distance for how far each finger moves.',\n interfaceAlias: 'aiPinch',\n paramSchema: ActionPinchParamSchema,\n sample: {\n locate: { prompt: 'the map area' },\n direction: 'out',\n distance: 200,\n },\n call: async (param) => {\n const { centerX, centerY, startDistance, endDistance, duration } =\n normalizePinchParam(param, await config.size());\n await config.pinch?.(\n { x: centerX, y: centerY },\n { startDistance, endDistance, duration },\n );\n },\n });\n};\n\nexport function normalizePinchParam(\n param: ActionPinchParam,\n screenSize: { width: number; height: number },\n): {\n centerX: number;\n centerY: number;\n startDistance: number;\n endDistance: number;\n duration: number;\n} {\n const { width, height } = screenSize;\n const element = param.locate;\n const centerX = element\n ? Math.round(element.center[0])\n : Math.round(width / 2);\n const centerY = element\n ? Math.round(element.center[1])\n : Math.round(height / 2);\n const duration = param.duration ?? 500;\n\n const baseDistance = Math.round(Math.min(width, height) / 4);\n const fingerDistance = param.distance ?? baseDistance;\n\n const startDistance = baseDistance;\n const endDistance =\n param.direction === 'out'\n ? baseDistance + fingerDistance\n : Math.max(10, baseDistance - fingerDistance);\n\n return { centerX, centerY, startDistance, endDistance, duration };\n}\n\nexport interface MobileInputActionContext {\n input: MobileInputPrimitives;\n size(): Promise<Size>;\n sleep?(timeMs: number): Promise<void>;\n getDefaultAutoDismissKeyboard?(): boolean | undefined;\n systemActions?: SystemInputActionOptions;\n}\n\nexport interface SystemInputActionConfig {\n name: string;\n description: string;\n interfaceAlias?: string;\n delayBeforeRunner?: number;\n delayAfterRunner?: number;\n}\n\nexport interface SystemInputActionOptions {\n backButton?: SystemInputActionConfig;\n homeButton?: SystemInputActionConfig;\n recentAppsButton?: SystemInputActionConfig;\n}\n\nexport interface InputPrimitiveActionOptions {\n size?: () => Promise<Size>;\n sleep?: (timeMs: number) => Promise<void>;\n includeSwipe?: boolean;\n includePinch?: boolean;\n systemActions?: SystemInputActionOptions;\n}\n\nfunction defineSystemInputAction(\n config: SystemInputActionConfig,\n call: () => Promise<void>,\n): DeviceAction<undefined, void> {\n return defineAction<undefined, undefined, void>({\n name: config.name,\n description: config.description,\n interfaceAlias: config.interfaceAlias,\n delayBeforeRunner: config.delayBeforeRunner,\n delayAfterRunner: config.delayAfterRunner,\n call,\n });\n}\n\nexport function defineActionsFromInputPrimitives(\n input: InputPrimitives,\n options: InputPrimitiveActionOptions = {},\n): DeviceAction<any>[] {\n const actions: Array<DeviceAction<any> | undefined> = [];\n const { pointer, keyboard, scroll, touch, system } = input;\n\n if (pointer) {\n actions.push(defineActionTap(pointer.tap));\n if (pointer.doubleClick) {\n actions.push(defineActionDoubleClick(pointer.doubleClick));\n }\n if (pointer.rightClick) {\n actions.push(defineActionRightClick(pointer.rightClick));\n }\n if (pointer.hover) {\n actions.push(defineActionHover(pointer.hover));\n }\n if (pointer.dragAndDrop) {\n actions.push(defineActionDragAndDrop(pointer.dragAndDrop));\n }\n if (pointer.longPress) {\n actions.push(defineActionLongPress(pointer.longPress));\n }\n }\n\n if (keyboard) {\n actions.push(\n defineActionInput(keyboard),\n defineActionClearInput(keyboard.clearInput),\n defineActionKeyboardPress(keyboard.keyboardPress),\n defineActionCursorMove({ keyboard, sleep: options.sleep }),\n );\n }\n\n if (scroll) {\n actions.push(defineActionScroll(scroll.scroll));\n }\n\n if (touch?.swipe && options.size && options.includeSwipe !== false) {\n actions.push(defineActionSwipe({ swipe: touch.swipe, size: options.size }));\n }\n\n if (touch?.pinch && options.size && options.includePinch !== false) {\n actions.push(defineActionPinch({ pinch: touch.pinch, size: options.size }));\n }\n\n if (system && options.systemActions) {\n const { systemActions } = options;\n if (system.backButton && systemActions.backButton) {\n actions.push(\n defineSystemInputAction(systemActions.backButton, system.backButton),\n );\n }\n if (system.homeButton && systemActions.homeButton) {\n actions.push(\n defineSystemInputAction(systemActions.homeButton, system.homeButton),\n );\n }\n if (system.recentAppsButton && systemActions.recentAppsButton) {\n actions.push(\n defineSystemInputAction(\n systemActions.recentAppsButton,\n system.recentAppsButton,\n ),\n );\n }\n }\n\n return actions.filter((action): action is DeviceAction<any> =>\n Boolean(action),\n );\n}\n\nexport function createDefaultMobileActions(\n context: MobileInputActionContext,\n): DeviceAction<any>[] {\n return defineActionsFromInputPrimitives(context.input, {\n size: context.size,\n sleep: context.sleep,\n systemActions: context.systemActions,\n });\n}\n\n// Sleep\nexport const ActionSleepParamSchema = z.object({\n timeMs: z\n .number()\n .default(1000)\n .optional()\n .describe('Sleep duration in milliseconds, defaults to 1000ms (1 second)'),\n});\n\nexport type ActionSleepParam = {\n timeMs?: number;\n};\n\nexport const defineActionSleep = (): DeviceAction<ActionSleepParam> => {\n return defineAction<typeof ActionSleepParamSchema, ActionSleepParam>({\n name: 'Sleep',\n description:\n 'Wait for a specified duration before continuing. Defaults to 1 second (1000ms) if not specified.',\n paramSchema: ActionSleepParamSchema,\n sample: {\n timeMs: 2000,\n },\n call: async (param) => {\n const duration = param?.timeMs ?? 1000;\n getDebug('device:common-action')(`Sleeping for ${duration}ms`);\n await new Promise((resolve) => setTimeout(resolve, duration));\n },\n });\n};\n\nexport type { DeviceAction } from '../types';\nexport type {\n AndroidDeviceOpt,\n AndroidDeviceInputOpt,\n IOSDeviceOpt,\n IOSDeviceInputOpt,\n HarmonyDeviceOpt,\n HarmonyDeviceInputOpt,\n} from './device-options';\n"],"names":["AbstractInterface","defineAction","config","pointFromLocate","locate","missingMessage","Error","defineLocatedPointAction","param","actionTapParamSchema","z","getMidsceneLocationSchema","defineActionTap","tap","point","actionRightClickParamSchema","defineActionRightClick","rightClick","actionDoubleClickParamSchema","defineActionDoubleClick","doubleClick","actionHoverParamSchema","defineActionHover","hover","inputLocateDescription","actionInputParamSchema","val","String","defineActionInput","keyboard","actionKeyboardPressParamSchema","defineActionKeyboardPress","keyboardPress","actionScrollParamSchema","defineActionScroll","scroll","actionDragAndDropParamSchema","defineActionDragAndDrop","dragAndDrop","from","to","ActionLongPressParamSchema","defineActionLongPress","longPress","ActionSwipeParamSchema","normalizeMobileSwipeParam","screenSize","width","height","start","end","startPoint","endPoint","direction","Math","duration","repeatCount","defineActionSwipe","i","actionClearInputParamSchema","defineActionClearInput","clearInput","actionCursorMoveParamSchema","defineActionCursorMove","times","wait","timeMs","Promise","resolve","setTimeout","arrowKey","ActionPinchParamSchema","defineActionPinch","centerX","centerY","startDistance","endDistance","normalizePinchParam","element","baseDistance","fingerDistance","defineSystemInputAction","call","defineActionsFromInputPrimitives","input","options","actions","pointer","touch","system","systemActions","action","Boolean","createDefaultMobileActions","context","ActionSleepParamSchema","defineActionSleep","getDebug"],"mappings":";;;;;;;;;;;;;AA+HO,MAAeA;;QA8CpB;QA8BA;;AACF;AAKO,MAAMC,eAAe,CAK1BC,SAaOA;AAGT,SAASC,gBACPC,MAAuC,EACvCC,cAAsB;IAEtB,IAAI,CAACD,QACH,MAAM,IAAIE,MAAMD;IAElB,OAAO;QAAE,GAAGD,OAAO,MAAM,CAAC,EAAE;QAAE,GAAGA,OAAO,MAAM,CAAC,EAAE;IAAC;AACpD;AAEA,SAASG,yBAGPL,MAQD;IACC,OAAOD,aAA8B;QACnC,MAAMC,OAAO,IAAI;QACjB,aAAaA,OAAO,WAAW;QAC/B,gBAAgBA,OAAO,cAAc;QACrC,aAAaA,OAAO,WAAW;QAC/B,QAAQA,OAAO,MAAM;QACrB,MAAM,OAAOM;YACX,MAAMN,OAAO,IAAI,CACfC,gBAAgBK,MAAM,MAAM,EAAEN,OAAO,oBAAoB,GACzDM;QAEJ;IACF;AACF;AAGO,MAAMC,uBAAuBC,EAAE,MAAM,CAAC;IAC3C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMC,kBAAkB,CAC7BC,MAEON,yBAAsE;QAC3E,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaE;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAsB;QAC1C;QACA,sBAAsB;QACtB,MAAM,OAAOK;YACX,MAAMD,IAAIC;QACZ;IACF;AAIK,MAAMC,8BAA8BL,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMK,yBAAyB,CACpCC,aAEOV,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaQ;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAA+B;QACnD;QACA,sBAAsB;QACtB,MAAM,OAAOD;YACX,MAAMG,WAAWH;QACnB;IACF;AAIK,MAAMI,+BAA+BR,EAAE,MAAM,CAAC;IACnD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMQ,0BAA0B,CACrCC,cAEOb,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaW;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAkB;QACtC;QACA,sBAAsB;QACtB,MAAM,OAAOJ;YACX,MAAMM,YAAYN;QACpB;IACF;AAIK,MAAMO,yBAAyBX,EAAE,MAAM,CAAC;IAC7C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMW,oBAAoB,CAC/BC,QAEOhB,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAac;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAsC;QAC1D;QACA,sBAAsB;QACtB,MAAM,OAAOP;YACX,MAAMS,MAAMT;QACd;IACF;AAIF,MAAMU,yBACJ;AACK,MAAMC,yBAAyBf,EAAE,MAAM,CAAC;IAC7C,OAAOA,EAAAA,KACC,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG,EAC9B,SAAS,CAAC,CAACgB,MAAQC,OAAOD,MAC1B,QAAQ,CACP;IAEJ,QAAQf,4BACL,QAAQ,CAACa,wBACT,QAAQ;IACX,MAAMd,CAAC,CAADA,OACC,CAAC;QAAC;QAAW;QAAS;KAAW,EACrC,OAAO,CAAC,WACR,QAAQ,CACP;IAEJ,qBAAqBA,EAAAA,OACX,GACP,QAAQ,GACR,QAAQ,CACP;AAEN;AAQO,MAAMkB,oBAAoB,CAC/BC,WAEO5B,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAawB;QACb,QAAQ;YACN,OAAO;YACP,QAAQ;gBAAE,QAAQ;YAAwB;QAC5C;QACA,MAAM,OAAOjB;YAEX,IAAKA,AAA0B,aAA1BA,MAAM,IAAI,EACbA,MAAM,IAAI,GAAG;YAGf,IAAIA,AAAe,YAAfA,MAAM,IAAI,EAAc,YAC1B,MAAMqB,SAAS,UAAU,CAACrB,MAAM,MAAM;YAIxC,IAAI,CAACA,SAAS,CAACA,MAAM,KAAK,EACxB;YAGF,MAAMqB,SAAS,QAAQ,CAACrB,MAAM,KAAK,EAAE;gBACnC,QAAQA,MAAM,MAAM;gBACpB,SAASA,AAAe,eAAfA,MAAM,IAAI;gBACnB,qBAAqBA,MAAM,mBAAmB;YAChD;QACF;IACF;AAIK,MAAMsB,iCAAiCpB,EAAE,MAAM,CAAC;IACrD,QAAQC,4BACL,QAAQ,CAAC,qDACT,QAAQ;IACX,SAASD,EAAAA,MACA,GACN,QAAQ,CACP;AAEN;AAMO,MAAMqB,4BAA4B,CACvCC,gBAEO/B,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAa6B;QACb,QAAQ;YACN,SAAS;QACX;QACA,MAAM,OAAOtB;YACX,MAAMwB,cAAcxB,MAAM,OAAO,EAAE;gBACjC,QAAQA,MAAM,MAAM;YACtB;QACF;IACF;AAIK,MAAMyB,0BAA0BvB,EAAE,MAAM,CAAC;IAC9C,YAAYA,CAAC,CAADA,OACL,CAAC;QACJ;QACA;QACA;QACA;QACA;KACD,EACA,OAAO,CAAC,gBACR,QAAQ,CACP;IAEJ,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;QAAM;QAAS;KAAO,EACpC,OAAO,CAAC,QACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CAAC;IACZ,QAAQC,4BACL,QAAQ,GACR,QAAQ,CACP;AAEN;AAEO,MAAMuB,qBAAqB,CAChCC,SAEOlC,aAAgE;QACrE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAagC;QACb,QAAQ;YACN,WAAW;YACX,YAAY;YACZ,QAAQ;gBAAE,QAAQ;YAAsC;QAC1D;QACA,MAAM,OAAOzB;YACX,MAAM2B,OAAO3B;QACf;IACF;AAIK,MAAM4B,+BAA+B1B,EAAE,MAAM,CAAC;IACnD,MAAMC,4BAA4B,QAAQ,CAAC;IAC3C,IAAIA,4BAA4B,QAAQ,CAAC;AAC3C;AAMO,MAAM0B,0BAA0B,CACrCC,cAEOrC,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAamC;QACb,QAAQ;YACN,MAAM;gBAAE,QAAQ;YAA6B;YAC7C,IAAI;gBAAE,QAAQ;YAAuB;QACvC;QACA,MAAM,OAAO5B;YACX,MAAM+B,OAAO/B,MAAM,IAAI;YACvB,MAAMgC,KAAKhC,MAAM,EAAE;YACnB,IAAI,CAAC+B,MACH,MAAM,IAAIjC,MAAM;YAElB,IAAI,CAACkC,IACH,MAAM,IAAIlC,MAAM;YAElB,MAAMgC,YACJ;gBAAE,GAAGC,KAAK,MAAM,CAAC,EAAE;gBAAE,GAAGA,KAAK,MAAM,CAAC,EAAE;YAAC,GACvC;gBAAE,GAAGC,GAAG,MAAM,CAAC,EAAE;gBAAE,GAAGA,GAAG,MAAM,CAAC,EAAE;YAAC;QAEvC;IACF;AAGK,MAAMC,6BAA6B/B,EAAE,MAAM,CAAC;IACjD,QAAQC,4BAA4B,QAAQ,CAC1C;IAEF,UAAUD,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMgC,wBAAwB,CACnCC,YAEOpC,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAakC;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAqB;QACzC;QACA,sBAAsB;QACtB,MAAM,OAAO3B,OAAON;YAClB,MAAMmC,UAAU7B,OAAO;gBAAE,UAAUN,MAAM,QAAQ;YAAC;QACpD;IACF;AAGK,MAAMoC,yBAAyBlC,EAAE,MAAM,CAAC;IAC7C,OAAOC,4BACJ,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,CAAC,CAADA,OACJ,CAAC;QAAC;QAAM;QAAQ;QAAQ;KAAQ,EACpC,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;IACZ,KAAKC,4BACF,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,CAAC;IACZ,QAAQA,EAAAA,MACC,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAWO,SAASmC,0BACdrC,KAAuB,EACvBsC,UAA6C;IAO7C,MAAM,EAAEC,KAAK,EAAEC,MAAM,EAAE,GAAGF;IAC1B,MAAM,EAAEG,KAAK,EAAEC,GAAG,EAAE,GAAG1C;IAEvB,MAAM2C,aAAaF,QACf;QAAE,GAAGA,MAAM,MAAM,CAAC,EAAE;QAAE,GAAGA,MAAM,MAAM,CAAC,EAAE;IAAC,IACzC;QAAE,GAAGF,QAAQ;QAAG,GAAGC,SAAS;IAAE;IAElC,IAAII;IAEJ,IAAIF,KACFE,WAAW;QAAE,GAAGF,IAAI,MAAM,CAAC,EAAE;QAAE,GAAGA,IAAI,MAAM,CAAC,EAAE;IAAC;SAC3C,IAAI1C,MAAM,QAAQ,EAAE;QACzB,MAAM6C,YAAY7C,MAAM,SAAS;QACjC,IAAI,CAAC6C,WACH,MAAM,IAAI/C,MAAM;QAElB8C,WAAW;YACT,GACED,WAAW,CAAC,GACXE,CAAAA,AAAc,YAAdA,YACG7C,MAAM,QAAQ,GACd6C,AAAc,WAAdA,YACE,CAAC7C,MAAM,QAAQ,GACf;YACR,GACE2C,WAAW,CAAC,GACXE,CAAAA,AAAc,WAAdA,YACG7C,MAAM,QAAQ,GACd6C,AAAc,SAAdA,YACE,CAAC7C,MAAM,QAAQ,GACf;QACV;IACF,OACE,MAAM,IAAIF,MACR;IAIJ8C,SAAS,CAAC,GAAGE,KAAK,GAAG,CAAC,GAAGA,KAAK,GAAG,CAACF,SAAS,CAAC,EAAEL;IAC9CK,SAAS,CAAC,GAAGE,KAAK,GAAG,CAAC,GAAGA,KAAK,GAAG,CAACF,SAAS,CAAC,EAAEJ;IAE9C,MAAMO,WAAW/C,MAAM,QAAQ,IAAI;IAEnC,IAAIgD,cAAc,AAAwB,YAAxB,OAAOhD,MAAM,MAAM,GAAgBA,MAAM,MAAM,GAAG;IACpE,IAAIgD,AAAgB,MAAhBA,aACFA,cAAc;IAGhB,OAAO;QAAEL;QAAYC;QAAUG;QAAUC;IAAY;AACvD;AAEO,MAAMC,oBAAoB,CAACvD,SAIzBD,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa2C;QACb,QAAQ;YACN,OAAO;gBAAE,QAAQ;YAA6B;YAC9C,KAAK;gBAAE,QAAQ;YAA2B;QAC5C;QACA,MAAM,OAAOpC;YACX,MAAM,EAAE2C,UAAU,EAAEC,QAAQ,EAAEG,QAAQ,EAAEC,WAAW,EAAE,GACnDX,0BAA0BrC,OAAO,MAAMN,OAAO,IAAI;YACpD,IAAK,IAAIwD,IAAI,GAAGA,IAAIF,aAAaE,IAC/B,MAAMxD,OAAO,KAAK,CAACiD,YAAYC,UAAU;gBAAEG;YAAS;QAExD;IACF;AAIK,MAAMI,8BAA8BjD,EAAE,MAAM,CAAC;IAClD,QAAQC,4BACL,QAAQ,CAAC,iCACT,QAAQ;AACb;AAKO,MAAMiD,yBAAyB,CACpCC,aAEO5D,aAGL;QACA,MAAM;QACN,aAAauB;QACb,gBAAgB;QAChB,aAAamC;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAyB;QAC7C;QACA,MAAM,OAAOnD;YACX,MAAMqD,WAAWrD,MAAM,MAAM;QAC/B;IACF;AAIK,MAAMsD,8BAA8BpD,EAAE,MAAM,CAAC;IAClD,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;KAAQ,EACtB,QAAQ,CAAC;IACZ,OAAOA,EAAAA,MACE,GACN,GAAG,GACH,GAAG,CAAC,GACJ,OAAO,CAAC,GACR,QAAQ,CACP;AAEN;AAMO,MAAMqD,yBAAyB,CAAC7D,SAI9BD,aAGL;QACA,MAAM;QACN,aACE;QACF,aAAa6D;QACb,QAAQ;YACN,WAAW;YACX,OAAO;QACT;QACA,MAAM,OAAOtD;YACX,MAAMwD,QAAQxD,MAAM,KAAK,IAAI;YAC7B,IAAIN,OAAO,QAAQ,CAAC,UAAU,EAAE,YAC9B,MAAMA,OAAO,QAAQ,CAAC,UAAU,CAACM,MAAM,SAAS,EAAEwD;YAIpD,MAAMC,OACJ/D,OAAO,KAAK,IACV,EAAAgE,SACA,IAAIC,QAAc,CAACC,UAAYC,WAAWD,SAASF,QAAO;YAC9D,MAAMI,WAAW9D,AAAoB,WAApBA,MAAM,SAAS,GAAc,cAAc;YAC5D,IAAK,IAAIkD,IAAI,GAAGA,IAAIM,OAAON,IAAK;gBAC9B,MAAMxD,OAAO,QAAQ,CAAC,aAAa,CAACoE;gBACpC,MAAML,KAAK;YACb;QACF;IACF;AAIK,MAAMM,yBAAyB7D,EAAE,MAAM,CAAC;IAC7C,QAAQC,4BACL,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,CAAC,CAADA,OACJ,CAAC;QAAC;QAAM;KAAM,EAClB,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,GACR,QAAQ,CAAC;AACd;AASO,MAAM8D,oBAAoB,CAACtE;IAIhC,IAAI,CAACA,OAAO,KAAK,EACf;IAGF,OAAOD,aAA8D;QACnE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAasE;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAe;YACjC,WAAW;YACX,UAAU;QACZ;QACA,MAAM,OAAO/D;YACX,MAAM,EAAEiE,OAAO,EAAEC,OAAO,EAAEC,aAAa,EAAEC,WAAW,EAAErB,QAAQ,EAAE,GAC9DsB,oBAAoBrE,OAAO,MAAMN,OAAO,IAAI;YAC9C,MAAMA,OAAO,KAAK,GAChB;gBAAE,GAAGuE;gBAAS,GAAGC;YAAQ,GACzB;gBAAEC;gBAAeC;gBAAarB;YAAS;QAE3C;IACF;AACF;AAEO,SAASsB,oBACdrE,KAAuB,EACvBsC,UAA6C;IAQ7C,MAAM,EAAEC,KAAK,EAAEC,MAAM,EAAE,GAAGF;IAC1B,MAAMgC,UAAUtE,MAAM,MAAM;IAC5B,MAAMiE,UAAUK,UACZxB,KAAK,KAAK,CAACwB,QAAQ,MAAM,CAAC,EAAE,IAC5BxB,KAAK,KAAK,CAACP,QAAQ;IACvB,MAAM2B,UAAUI,UACZxB,KAAK,KAAK,CAACwB,QAAQ,MAAM,CAAC,EAAE,IAC5BxB,KAAK,KAAK,CAACN,SAAS;IACxB,MAAMO,WAAW/C,MAAM,QAAQ,IAAI;IAEnC,MAAMuE,eAAezB,KAAK,KAAK,CAACA,KAAK,GAAG,CAACP,OAAOC,UAAU;IAC1D,MAAMgC,iBAAiBxE,MAAM,QAAQ,IAAIuE;IAEzC,MAAMJ,gBAAgBI;IACtB,MAAMH,cACJpE,AAAoB,UAApBA,MAAM,SAAS,GACXuE,eAAeC,iBACf1B,KAAK,GAAG,CAAC,IAAIyB,eAAeC;IAElC,OAAO;QAAEP;QAASC;QAASC;QAAeC;QAAarB;IAAS;AAClE;AAgCA,SAAS0B,wBACP/E,MAA+B,EAC/BgF,IAAyB;IAEzB,OAAOjF,aAAyC;QAC9C,MAAMC,OAAO,IAAI;QACjB,aAAaA,OAAO,WAAW;QAC/B,gBAAgBA,OAAO,cAAc;QACrC,mBAAmBA,OAAO,iBAAiB;QAC3C,kBAAkBA,OAAO,gBAAgB;QACzCgF;IACF;AACF;AAEO,SAASC,iCACdC,KAAsB,EACtBC,UAAuC,CAAC,CAAC;IAEzC,MAAMC,UAAgD,EAAE;IACxD,MAAM,EAAEC,OAAO,EAAE1D,QAAQ,EAAEM,MAAM,EAAEqD,KAAK,EAAEC,MAAM,EAAE,GAAGL;IAErD,IAAIG,SAAS;QACXD,QAAQ,IAAI,CAAC1E,gBAAgB2E,QAAQ,GAAG;QACxC,IAAIA,QAAQ,WAAW,EACrBD,QAAQ,IAAI,CAACnE,wBAAwBoE,QAAQ,WAAW;QAE1D,IAAIA,QAAQ,UAAU,EACpBD,QAAQ,IAAI,CAACtE,uBAAuBuE,QAAQ,UAAU;QAExD,IAAIA,QAAQ,KAAK,EACfD,QAAQ,IAAI,CAAChE,kBAAkBiE,QAAQ,KAAK;QAE9C,IAAIA,QAAQ,WAAW,EACrBD,QAAQ,IAAI,CAACjD,wBAAwBkD,QAAQ,WAAW;QAE1D,IAAIA,QAAQ,SAAS,EACnBD,QAAQ,IAAI,CAAC5C,sBAAsB6C,QAAQ,SAAS;IAExD;IAEA,IAAI1D,UACFyD,QAAQ,IAAI,CACV1D,kBAAkBC,WAClB+B,uBAAuB/B,SAAS,UAAU,GAC1CE,0BAA0BF,SAAS,aAAa,GAChDkC,uBAAuB;QAAElC;QAAU,OAAOwD,QAAQ,KAAK;IAAC;IAI5D,IAAIlD,QACFmD,QAAQ,IAAI,CAACpD,mBAAmBC,OAAO,MAAM;IAG/C,IAAIqD,OAAO,SAASH,QAAQ,IAAI,IAAIA,AAAyB,UAAzBA,QAAQ,YAAY,EACtDC,QAAQ,IAAI,CAAC7B,kBAAkB;QAAE,OAAO+B,MAAM,KAAK;QAAE,MAAMH,QAAQ,IAAI;IAAC;IAG1E,IAAIG,OAAO,SAASH,QAAQ,IAAI,IAAIA,AAAyB,UAAzBA,QAAQ,YAAY,EACtDC,QAAQ,IAAI,CAACd,kBAAkB;QAAE,OAAOgB,MAAM,KAAK;QAAE,MAAMH,QAAQ,IAAI;IAAC;IAG1E,IAAII,UAAUJ,QAAQ,aAAa,EAAE;QACnC,MAAM,EAAEK,aAAa,EAAE,GAAGL;QAC1B,IAAII,OAAO,UAAU,IAAIC,cAAc,UAAU,EAC/CJ,QAAQ,IAAI,CACVL,wBAAwBS,cAAc,UAAU,EAAED,OAAO,UAAU;QAGvE,IAAIA,OAAO,UAAU,IAAIC,cAAc,UAAU,EAC/CJ,QAAQ,IAAI,CACVL,wBAAwBS,cAAc,UAAU,EAAED,OAAO,UAAU;QAGvE,IAAIA,OAAO,gBAAgB,IAAIC,cAAc,gBAAgB,EAC3DJ,QAAQ,IAAI,CACVL,wBACES,cAAc,gBAAgB,EAC9BD,OAAO,gBAAgB;IAI/B;IAEA,OAAOH,QAAQ,MAAM,CAAC,CAACK,SACrBC,QAAQD;AAEZ;AAEO,SAASE,2BACdC,OAAiC;IAEjC,OAAOX,iCAAiCW,QAAQ,KAAK,EAAE;QACrD,MAAMA,QAAQ,IAAI;QAClB,OAAOA,QAAQ,KAAK;QACpB,eAAeA,QAAQ,aAAa;IACtC;AACF;AAGO,MAAMC,yBAAyBrF,EAAE,MAAM,CAAC;IAC7C,QAAQA,EAAAA,MACC,GACN,OAAO,CAAC,MACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMsF,oBAAoB,IACxB/F,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa8F;QACb,QAAQ;YACN,QAAQ;QACV;QACA,MAAM,OAAOvF;YACX,MAAM+C,WAAW/C,OAAO,UAAU;YAClCyF,SAAS,wBAAwB,CAAC,aAAa,EAAE1C,SAAS,EAAE,CAAC;YAC7D,MAAM,IAAIY,QAAQ,CAACC,UAAYC,WAAWD,SAASb;QACrD;IACF"}
|
|
1
|
+
{"version":3,"file":"device/index.mjs","sources":["../../../src/device/index.ts"],"sourcesContent":["import { getMidsceneLocationSchema } from '@/common';\nimport type {\n ActionScrollParam,\n DeviceAction,\n LocateResultElement,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport type { ElementNode } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { z } from 'zod';\nimport type { ElementCacheFeature, Rect, Size, UIContext } from '../types';\n\nexport interface FileChooserHandler {\n accept(files: string[]): Promise<void>;\n}\n\nexport interface MjpegStreamFrame {\n /** Raw base64-encoded image bytes WITHOUT a `data:image/...;base64,` prefix. */\n data: string;\n contentType?: string;\n}\n\nexport interface MjpegStreamHandle {\n stop(): void | Promise<void>;\n}\n\nexport interface MjpegStreamOptions {\n signal?: AbortSignal;\n onFrame(frame: MjpegStreamFrame): void;\n onError?(error: unknown): void;\n}\n\n/** A point in device-pixel coordinates on the screen. */\nexport interface PointerPoint {\n x: number;\n y: number;\n}\n\nexport interface PointerInputPrimitives {\n tap(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n doubleClick?(p: PointerPoint): Promise<void>;\n rightClick?(p: PointerPoint): Promise<void>;\n hover?(p: PointerPoint): Promise<void>;\n longPress?(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n dragAndDrop?(from: PointerPoint, to: PointerPoint): Promise<void>;\n}\n\nexport interface TouchInputPrimitives {\n swipe(\n start: PointerPoint,\n end: PointerPoint,\n opts?: { duration?: number; repeat?: number },\n ): Promise<void>;\n pinch?(\n center: PointerPoint,\n opts: { startDistance: number; endDistance: number; duration: number },\n ): Promise<void>;\n}\n\nexport interface KeyboardInputPrimitives {\n keyboardPress(keyName: string, opts?: { target?: unknown }): Promise<void>;\n cursorMove?(direction: 'left' | 'right', times?: number): Promise<void>;\n typeText(\n value: string,\n opts?: {\n autoDismissKeyboard?: boolean;\n keyboardDismissStrategy?: 'esc-first' | 'back-first';\n target?: unknown;\n replace?: boolean;\n focusOnly?: boolean;\n },\n ): Promise<void>;\n clearInput(target?: unknown): Promise<void>;\n}\n\nexport interface ScrollInputPrimitives {\n scroll(param: ActionScrollParam): Promise<void>;\n}\n\nexport interface SystemInputPrimitives {\n backButton?(): Promise<void>;\n homeButton?(): Promise<void>;\n recentAppsButton?(): Promise<void>;\n}\n\nexport interface InputPrimitives {\n pointer?: PointerInputPrimitives;\n keyboard?: KeyboardInputPrimitives;\n touch?: TouchInputPrimitives;\n scroll?: ScrollInputPrimitives;\n system?: SystemInputPrimitives;\n}\n\nexport interface MobileInputPrimitives extends InputPrimitives {\n pointer: PointerInputPrimitives & {\n doubleClick(p: PointerPoint): Promise<void>;\n longPress(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n dragAndDrop(from: PointerPoint, to: PointerPoint): Promise<void>;\n };\n keyboard: KeyboardInputPrimitives;\n touch: TouchInputPrimitives;\n}\n\nexport interface BrowserInputPrimitives extends InputPrimitives {\n pointer: PointerInputPrimitives & {\n doubleClick(p: PointerPoint): Promise<void>;\n rightClick(p: PointerPoint): Promise<void>;\n hover(p: PointerPoint): Promise<void>;\n dragAndDrop(from: PointerPoint, to: PointerPoint): Promise<void>;\n longPress(p: PointerPoint, opts?: { duration?: number }): Promise<void>;\n };\n keyboard: KeyboardInputPrimitives;\n scroll: ScrollInputPrimitives;\n touch: TouchInputPrimitives;\n}\n\nexport interface ComputerInputPrimitives extends InputPrimitives {\n pointer: PointerInputPrimitives & {\n doubleClick(p: PointerPoint): Promise<void>;\n rightClick(p: PointerPoint): Promise<void>;\n hover(p: PointerPoint): Promise<void>;\n dragAndDrop(from: PointerPoint, to: PointerPoint): Promise<void>;\n };\n keyboard: KeyboardInputPrimitives;\n scroll: ScrollInputPrimitives;\n}\n\nexport abstract class AbstractInterface {\n abstract interfaceType: string;\n\n abstract screenshotBase64(): Promise<string>;\n abstract size(): Promise<Size>;\n abstract actionSpace(): DeviceAction[];\n\n abstract cacheFeatureForPoint?(\n center: [number, number],\n options?: {\n targetDescription?: string;\n modelConfig?: IModelConfig;\n },\n ): Promise<ElementCacheFeature>;\n abstract rectMatchesCacheFeature?(\n feature: ElementCacheFeature,\n ): Promise<Rect>;\n\n abstract destroy?(): Promise<void>;\n\n abstract describe?(): string;\n abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;\n abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;\n\n // for web only\n registerFileChooserListener?(\n handler: (chooser: FileChooserHandler) => Promise<void>,\n ): Promise<{ dispose: () => void; getError: () => Error | undefined }>;\n\n // @deprecated do NOT extend this method\n abstract getElementsNodeTree?: () => Promise<ElementNode>;\n\n // @deprecated do NOT extend this method\n abstract url?: () => string | Promise<string>;\n\n // @deprecated do NOT extend this method\n abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;\n\n /**\n * Get the current device-local time as a formatted string.\n * Prefer this for user-visible time because timestamps alone do not preserve\n * the target device's timezone when formatted on the host machine.\n */\n getDeviceLocalTimeString?(format?: string): Promise<string>;\n\n /** URL of native MJPEG stream for real-time screen preview (e.g. WDA MJPEG server) */\n mjpegStreamUrl?: string;\n\n /**\n * Optional in-process MJPEG frame producer. Implementations can push raw\n * base64 frames here when there is no standalone native MJPEG URL, e.g.\n * Chromium CDP Page.startScreencast for web previews.\n */\n startMjpegStream?(\n options: MjpegStreamOptions,\n ): MjpegStreamHandle | undefined | Promise<MjpegStreamHandle | undefined>;\n\n /**\n * Optional hook used after keyboard-only actions to force a fresh frame on\n * the active MJPEG stream. Implementations should be a no-op when no stream\n * is active.\n */\n flushPendingVisualUpdate?(): Promise<void>;\n\n /**\n * Optional navigation state probe for browser-like interfaces, used to drive\n * loading indicators in playground UIs. Returning `undefined` means the\n * interface does not expose this concept.\n */\n navigationState?(): Promise<{ isLoading: boolean }>;\n\n /**\n * Low-level device input surface. Platform implementations expose transport\n * primitives here; higher-level AI actions and manual pointer dispatch should\n * adapt to this instead of duplicating platform gesture logic.\n */\n inputPrimitives?: InputPrimitives;\n}\n\n// Generic function to define actions with proper type inference\n// TRuntime allows specifying a different type for the runtime parameter (after location resolution)\n// TReturn allows specifying the return type of the action\nexport const defineAction = <\n TSchema extends z.ZodType | undefined = undefined,\n TRuntime = TSchema extends z.ZodType ? z.infer<TSchema> : undefined,\n TReturn = any,\n>(\n config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema?: TSchema;\n call: (param: TRuntime) => Promise<TReturn> | TReturn;\n } & Partial<\n Omit<\n DeviceAction<TRuntime, TReturn>,\n 'name' | 'description' | 'interfaceAlias' | 'paramSchema' | 'call'\n >\n >,\n): DeviceAction<TRuntime, TReturn> => {\n return config as any; // Type assertion needed because schema validation type differs from runtime type\n};\n\nfunction pointFromLocate(\n locate: LocateResultElement | undefined,\n missingMessage: string,\n): PointerPoint {\n if (!locate) {\n throw new Error(missingMessage);\n }\n return { x: locate.center[0], y: locate.center[1] };\n}\n\nfunction defineLocatedPointAction<\n TSchema extends z.ZodType,\n TParam extends { locate: LocateResultElement },\n>(config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema: TSchema;\n sample: DeviceAction<TParam>['sample'];\n missingLocateMessage: string;\n call: (point: PointerPoint, param: TParam) => Promise<void>;\n}): DeviceAction<TParam> {\n return defineAction<TSchema, TParam>({\n name: config.name,\n description: config.description,\n interfaceAlias: config.interfaceAlias,\n paramSchema: config.paramSchema,\n sample: config.sample,\n call: async (param) => {\n await config.call(\n pointFromLocate(param.locate, config.missingLocateMessage),\n param,\n );\n },\n });\n}\n\n// Tap\nexport const actionTapParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be tapped'),\n});\nexport type ActionTapParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionTap = (\n tap: PointerInputPrimitives['tap'],\n): DeviceAction<ActionTapParam> => {\n return defineLocatedPointAction<typeof actionTapParamSchema, ActionTapParam>({\n name: 'Tap',\n description: 'Tap the element',\n interfaceAlias: 'aiTap',\n paramSchema: actionTapParamSchema,\n sample: {\n locate: { prompt: 'the \"Submit\" button' },\n },\n missingLocateMessage: 'Element not found, cannot tap',\n call: async (point) => {\n await tap(point);\n },\n });\n};\n\n// RightClick\nexport const actionRightClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be right clicked',\n ),\n});\nexport type ActionRightClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionRightClick = (\n rightClick: NonNullable<PointerInputPrimitives['rightClick']>,\n): DeviceAction<ActionRightClickParam> => {\n return defineLocatedPointAction<\n typeof actionRightClickParamSchema,\n ActionRightClickParam\n >({\n name: 'RightClick',\n description: 'Right click the element',\n interfaceAlias: 'aiRightClick',\n paramSchema: actionRightClickParamSchema,\n sample: {\n locate: { prompt: 'the file icon on the desktop' },\n },\n missingLocateMessage: 'Element not found, cannot right click',\n call: async (point) => {\n await rightClick(point);\n },\n });\n};\n\n// DoubleClick\nexport const actionDoubleClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be double clicked',\n ),\n});\nexport type ActionDoubleClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionDoubleClick = (\n doubleClick: NonNullable<PointerInputPrimitives['doubleClick']>,\n): DeviceAction<ActionDoubleClickParam> => {\n return defineLocatedPointAction<\n typeof actionDoubleClickParamSchema,\n ActionDoubleClickParam\n >({\n name: 'DoubleClick',\n description: 'Double click the element',\n interfaceAlias: 'aiDoubleClick',\n paramSchema: actionDoubleClickParamSchema,\n sample: {\n locate: { prompt: 'the folder icon' },\n },\n missingLocateMessage: 'Element not found, cannot double click',\n call: async (point) => {\n await doubleClick(point);\n },\n });\n};\n\n// Hover\nexport const actionHoverParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be hovered'),\n});\nexport type ActionHoverParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionHover = (\n hover: NonNullable<PointerInputPrimitives['hover']>,\n): DeviceAction<ActionHoverParam> => {\n return defineLocatedPointAction<\n typeof actionHoverParamSchema,\n ActionHoverParam\n >({\n name: 'Hover',\n description: 'Move the mouse to the element',\n interfaceAlias: 'aiHover',\n paramSchema: actionHoverParamSchema,\n sample: {\n locate: { prompt: 'the navigation menu item \"Products\"' },\n },\n missingLocateMessage: 'Element not found, cannot hover',\n call: async (point) => {\n await hover(point);\n },\n });\n};\n\n// Input\nconst inputLocateDescription =\n 'the position of the placeholder or text content in the target input field. If there is no content, locate the center of the input field.';\nexport const actionInputParamSchema = z.object({\n value: z\n .union([z.string(), z.number()])\n .transform((val) => String(val))\n .describe(\n 'The text to input. Provide the final content for replace/append modes, or an empty string when using clear mode to remove existing text.',\n ),\n locate: getMidsceneLocationSchema()\n .describe(inputLocateDescription)\n .optional(),\n mode: z\n .enum(['replace', 'clear', 'typeOnly'])\n .default('replace')\n .describe(\n 'Input mode: \"replace\" (default) - clear the field and input the value; \"typeOnly\" - type the value directly without clearing the field first; \"clear\" - clear the field without inputting new text.',\n ),\n autoDismissKeyboard: z\n .boolean()\n .optional()\n .describe(\n 'If true, the keyboard will be dismissed after the input is completed. Do not set it unless the user asks you to do so.',\n ),\n});\nexport type ActionInputParam = {\n value: string;\n locate?: LocateResultElement;\n mode?: 'replace' | 'clear' | 'typeOnly' | 'append';\n autoDismissKeyboard?: boolean;\n};\n\nexport const defineActionInput = (\n keyboard: KeyboardInputPrimitives,\n): DeviceAction<ActionInputParam> => {\n return defineAction<typeof actionInputParamSchema, ActionInputParam>({\n name: 'Input',\n description: 'Input the value into the element',\n interfaceAlias: 'aiInput',\n paramSchema: actionInputParamSchema,\n sample: {\n value: 'test@example.com',\n locate: { prompt: 'the email input field' },\n },\n call: async (param) => {\n // backward compat: convert deprecated 'append' to 'typeOnly'\n if ((param.mode as string) === 'append') {\n param.mode = 'typeOnly';\n }\n\n if (param.mode === 'clear') {\n await keyboard.clearInput(param.locate);\n return;\n }\n\n if (!param || !param.value) {\n return;\n }\n\n await keyboard.typeText(param.value, {\n target: param.locate,\n replace: param.mode !== 'typeOnly',\n autoDismissKeyboard: param.autoDismissKeyboard,\n });\n },\n });\n};\n\n// KeyboardPress\nexport const actionKeyboardPressParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The element to be clicked before pressing the key')\n .optional(),\n keyName: z\n .string()\n .describe(\n \"The key to be pressed. Use '+' for key combinations, e.g., 'Control+A', 'Shift+Enter'\",\n ),\n});\nexport type ActionKeyboardPressParam = {\n locate?: LocateResultElement;\n keyName: string;\n};\n\nexport const defineActionKeyboardPress = (\n keyboardPress: KeyboardInputPrimitives['keyboardPress'],\n): DeviceAction<ActionKeyboardPressParam> => {\n return defineAction<\n typeof actionKeyboardPressParamSchema,\n ActionKeyboardPressParam\n >({\n name: 'KeyboardPress',\n description:\n 'Press a key or key combination, like \"Enter\", \"Tab\", \"Escape\", or \"Control+A\", \"Shift+Enter\". Do not use this to type text.',\n interfaceAlias: 'aiKeyboardPress',\n paramSchema: actionKeyboardPressParamSchema,\n sample: {\n keyName: 'Enter',\n },\n call: async (param) => {\n await keyboardPress(param.keyName, {\n target: param.locate,\n });\n },\n });\n};\n\n// Scroll\nexport const actionScrollParamSchema = z.object({\n scrollType: z\n .enum([\n 'singleAction',\n 'scrollToBottom',\n 'scrollToTop',\n 'scrollToRight',\n 'scrollToLeft',\n ])\n .default('singleAction')\n .describe(\n 'The scroll behavior: \"singleAction\" for a single scroll action, \"scrollToBottom\" for scrolling all the way to the bottom by rapidly scrolling 5-10 times (skipping intermediate content until reaching the bottom), \"scrollToTop\" for scrolling all the way to the top by rapidly scrolling 5-10 times (skipping intermediate content until reaching the top), \"scrollToRight\" for scrolling all the way to the right by rapidly scrolling multiple times, \"scrollToLeft\" for scrolling all the way to the left by rapidly scrolling multiple times',\n ),\n direction: z\n .enum(['down', 'up', 'right', 'left'])\n .default('down')\n .describe(\n 'The direction to scroll. Only effective when scrollType is \"singleAction\".',\n ),\n distance: z\n .number()\n .nullable()\n .optional()\n .describe('The distance in pixels to scroll'),\n locate: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Describe the target element to be scrolled on, like \"the table\" or \"the list\" or \"the content area\" or \"the scrollable area\". Do NOT provide a general intent like \"scroll to find some element\"',\n ),\n});\n\nexport const defineActionScroll = (\n scroll: ScrollInputPrimitives['scroll'],\n): DeviceAction<ActionScrollParam> => {\n return defineAction<typeof actionScrollParamSchema, ActionScrollParam>({\n name: 'Scroll',\n description:\n 'Scroll the page or a scrollable element to browse content. This is the preferred way to scroll on all platforms, including mobile. Supports scrollToBottom/scrollToTop for boundary navigation. Default: direction `down`, scrollType `singleAction`, distance `null`.',\n interfaceAlias: 'aiScroll',\n paramSchema: actionScrollParamSchema,\n sample: {\n direction: 'down',\n scrollType: 'singleAction',\n locate: { prompt: 'the center of the product list area' },\n },\n call: async (param) => {\n await scroll(param);\n },\n });\n};\n\n// DragAndDrop\nexport const actionDragAndDropParamSchema = z.object({\n from: getMidsceneLocationSchema().describe('The position to be dragged'),\n to: getMidsceneLocationSchema().describe('The position to be dropped'),\n});\nexport type ActionDragAndDropParam = {\n from: LocateResultElement;\n to: LocateResultElement;\n};\n\nexport const defineActionDragAndDrop = (\n dragAndDrop: NonNullable<PointerInputPrimitives['dragAndDrop']>,\n): DeviceAction<ActionDragAndDropParam> => {\n return defineAction<\n typeof actionDragAndDropParamSchema,\n ActionDragAndDropParam\n >({\n name: 'DragAndDrop',\n description:\n 'Pick up a specific UI element and move it to a new position (e.g., reorder a card, move a file into a folder, sort list items). The element itself moves with your finger/mouse.',\n interfaceAlias: 'aiDragAndDrop',\n paramSchema: actionDragAndDropParamSchema,\n sample: {\n from: { prompt: 'the \"report.pdf\" file icon' },\n to: { prompt: 'the upload drop zone' },\n },\n call: async (param) => {\n const from = param.from;\n const to = param.to;\n if (!from) {\n throw new Error('missing \"from\" param for drag and drop');\n }\n if (!to) {\n throw new Error('missing \"to\" param for drag and drop');\n }\n await dragAndDrop(\n { x: from.center[0], y: from.center[1] },\n { x: to.center[0], y: to.center[1] },\n );\n },\n });\n};\n\nexport const ActionLongPressParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be long pressed',\n ),\n duration: z\n .number()\n .optional()\n .describe('Long press duration in milliseconds'),\n});\n\nexport type ActionLongPressParam = {\n locate: LocateResultElement;\n duration?: number;\n};\nexport const defineActionLongPress = (\n longPress: NonNullable<PointerInputPrimitives['longPress']>,\n): DeviceAction<ActionLongPressParam> => {\n return defineLocatedPointAction<\n typeof ActionLongPressParamSchema,\n ActionLongPressParam\n >({\n name: 'LongPress',\n description: 'Long press the element',\n interfaceAlias: 'aiLongPress',\n paramSchema: ActionLongPressParamSchema,\n sample: {\n locate: { prompt: 'the message bubble' },\n },\n missingLocateMessage: 'LongPress requires an element to be located',\n call: async (point, param) => {\n await longPress(point, { duration: param.duration });\n },\n });\n};\n\nexport const ActionSwipeParamSchema = z.object({\n start: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Starting point of the swipe gesture, if not specified, the center of the page will be used',\n ),\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .optional()\n .describe(\n 'The direction to swipe (required when using distance). The direction means the direction of the finger swipe.',\n ),\n distance: z\n .number()\n .optional()\n .describe('The distance in pixels to swipe (mutually exclusive with end)'),\n end: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Ending point of the swipe gesture (mutually exclusive with distance)',\n ),\n duration: z\n .number()\n .default(300)\n .describe('Duration of the swipe gesture in milliseconds'),\n repeat: z\n .number()\n .optional()\n .describe(\n 'The number of times to repeat the swipe gesture. 1 for default, 0 for infinite (e.g. endless swipe until the end of the page)',\n ),\n});\n\nexport type ActionSwipeParam = {\n start?: LocateResultElement;\n direction?: 'up' | 'down' | 'left' | 'right';\n distance?: number;\n end?: LocateResultElement;\n duration?: number;\n repeat?: number;\n};\n\nexport function normalizeMobileSwipeParam(\n param: ActionSwipeParam,\n screenSize: { width: number; height: number },\n): {\n startPoint: { x: number; y: number };\n endPoint: { x: number; y: number };\n duration: number;\n repeatCount: number;\n} {\n const { width, height } = screenSize;\n const { start, end } = param;\n\n const startPoint = start\n ? { x: start.center[0], y: start.center[1] }\n : { x: width / 2, y: height / 2 };\n\n let endPoint: { x: number; y: number };\n\n if (end) {\n endPoint = { x: end.center[0], y: end.center[1] };\n } else if (param.distance) {\n const direction = param.direction;\n if (!direction) {\n throw new Error('direction is required for swipe gesture');\n }\n endPoint = {\n x:\n startPoint.x +\n (direction === 'right'\n ? param.distance\n : direction === 'left'\n ? -param.distance\n : 0),\n y:\n startPoint.y +\n (direction === 'down'\n ? param.distance\n : direction === 'up'\n ? -param.distance\n : 0),\n };\n } else {\n throw new Error(\n 'Either end or distance must be specified for swipe gesture',\n );\n }\n\n endPoint.x = Math.max(0, Math.min(endPoint.x, width));\n endPoint.y = Math.max(0, Math.min(endPoint.y, height));\n\n const duration = param.duration ?? 300;\n\n let repeatCount = typeof param.repeat === 'number' ? param.repeat : 1;\n if (repeatCount === 0) {\n repeatCount = 10;\n }\n\n return { startPoint, endPoint, duration, repeatCount };\n}\n\nexport const defineActionSwipe = (config: {\n swipe: TouchInputPrimitives['swipe'];\n size(): Promise<Size>;\n}): DeviceAction<ActionSwipeParam> => {\n return defineAction<typeof ActionSwipeParamSchema, ActionSwipeParam>({\n name: 'Swipe',\n description:\n 'Perform a touch gesture for interactions beyond regular scrolling (e.g., flip pages in a carousel, dismiss a notification, swipe-to-delete a list item). For regular content scrolling, use Scroll instead. Use \"distance\" + \"direction\" for relative movement, or \"end\" for precise endpoint.',\n paramSchema: ActionSwipeParamSchema,\n sample: {\n start: { prompt: 'center of the notification' },\n end: { prompt: 'upper edge of the screen' },\n },\n call: async (param) => {\n const { startPoint, endPoint, duration, repeatCount } =\n normalizeMobileSwipeParam(param, await config.size());\n for (let i = 0; i < repeatCount; i++) {\n await config.swipe(startPoint, endPoint, { duration });\n }\n },\n });\n};\n\n// ClearInput\nexport const actionClearInputParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The input field to be cleared')\n .optional(),\n});\nexport type ActionClearInputParam = {\n locate?: LocateResultElement;\n};\n\nexport const defineActionClearInput = (\n clearInput: KeyboardInputPrimitives['clearInput'],\n): DeviceAction<ActionClearInputParam> => {\n return defineAction<\n typeof actionClearInputParamSchema,\n ActionClearInputParam\n >({\n name: 'ClearInput',\n description: inputLocateDescription,\n interfaceAlias: 'aiClearInput',\n paramSchema: actionClearInputParamSchema,\n sample: {\n locate: { prompt: 'the search input field' },\n },\n call: async (param) => {\n await clearInput(param.locate);\n },\n });\n};\n\n// CursorMove\nexport const actionCursorMoveParamSchema = z.object({\n direction: z\n .enum(['left', 'right'])\n .describe('The direction to move the cursor'),\n times: z\n .number()\n .int()\n .min(1)\n .default(1)\n .describe(\n 'The number of times to move the cursor in the specified direction',\n ),\n});\nexport type ActionCursorMoveParam = {\n direction: 'left' | 'right';\n times?: number;\n};\n\nexport const defineActionCursorMove = (config: {\n keyboard: Pick<KeyboardInputPrimitives, 'keyboardPress' | 'cursorMove'>;\n sleep?(timeMs: number): Promise<void>;\n}): DeviceAction<ActionCursorMoveParam> => {\n return defineAction<\n typeof actionCursorMoveParamSchema,\n ActionCursorMoveParam\n >({\n name: 'CursorMove',\n description:\n 'Move the text cursor (caret) left or right within an input field or text area. Use this to reposition the cursor without selecting text.',\n paramSchema: actionCursorMoveParamSchema,\n sample: {\n direction: 'left',\n times: 3,\n },\n call: async (param) => {\n const times = param.times ?? 1;\n if (config.keyboard.cursorMove) {\n await config.keyboard.cursorMove(param.direction, times);\n return;\n }\n\n const wait =\n config.sleep ??\n ((timeMs: number) =>\n new Promise<void>((resolve) => setTimeout(resolve, timeMs)));\n const arrowKey = param.direction === 'left' ? 'ArrowLeft' : 'ArrowRight';\n for (let i = 0; i < times; i++) {\n await config.keyboard.keyboardPress(arrowKey);\n await wait(100);\n }\n },\n });\n};\n\n// Pinch\nexport const ActionPinchParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'The element to pinch on. If not specified, the center of the screen will be used',\n ),\n direction: z\n .enum(['in', 'out'])\n .describe(\n 'Pinch direction. \"in\" = pinch fingers together (zoom out / shrink), \"out\" = spread fingers apart (zoom in / enlarge).',\n ),\n distance: z\n .number()\n .positive()\n .optional()\n .describe(\n 'How far each finger moves in pixels. Defaults to a quarter of the shorter screen dimension.',\n ),\n duration: z\n .number()\n .default(500)\n .optional()\n .describe('Duration of the pinch gesture in milliseconds'),\n});\n\nexport type ActionPinchParam = {\n locate?: LocateResultElement;\n direction: 'in' | 'out';\n distance?: number;\n duration?: number;\n};\n\nexport const defineActionPinch = (config: {\n pinch: TouchInputPrimitives['pinch'];\n size(): Promise<Size>;\n}): DeviceAction<ActionPinchParam> | undefined => {\n if (!config.pinch) {\n return undefined;\n }\n\n return defineAction<typeof ActionPinchParamSchema, ActionPinchParam>({\n name: 'Pinch',\n description:\n 'Perform a two-finger pinch gesture. Use direction \"in\" to pinch fingers together (zoom out), or \"out\" to spread fingers apart (zoom in). Optionally specify distance for how far each finger moves.',\n interfaceAlias: 'aiPinch',\n paramSchema: ActionPinchParamSchema,\n sample: {\n locate: { prompt: 'the map area' },\n direction: 'out',\n distance: 200,\n },\n call: async (param) => {\n const { centerX, centerY, startDistance, endDistance, duration } =\n normalizePinchParam(param, await config.size());\n await config.pinch?.(\n { x: centerX, y: centerY },\n { startDistance, endDistance, duration },\n );\n },\n });\n};\n\nexport function normalizePinchParam(\n param: ActionPinchParam,\n screenSize: { width: number; height: number },\n): {\n centerX: number;\n centerY: number;\n startDistance: number;\n endDistance: number;\n duration: number;\n} {\n const { width, height } = screenSize;\n const element = param.locate;\n const centerX = element\n ? Math.round(element.center[0])\n : Math.round(width / 2);\n const centerY = element\n ? Math.round(element.center[1])\n : Math.round(height / 2);\n const duration = param.duration ?? 500;\n\n const baseDistance = Math.round(Math.min(width, height) / 4);\n const fingerDistance = param.distance ?? baseDistance;\n\n const startDistance = baseDistance;\n const endDistance =\n param.direction === 'out'\n ? baseDistance + fingerDistance\n : Math.max(10, baseDistance - fingerDistance);\n\n return { centerX, centerY, startDistance, endDistance, duration };\n}\n\nexport interface MobileInputActionContext {\n input: MobileInputPrimitives;\n size(): Promise<Size>;\n sleep?(timeMs: number): Promise<void>;\n getDefaultAutoDismissKeyboard?(): boolean | undefined;\n systemActions?: SystemInputActionOptions;\n}\n\nexport interface SystemInputActionConfig {\n name: string;\n description: string;\n interfaceAlias?: string;\n delayBeforeRunner?: number;\n delayAfterRunner?: number;\n}\n\nexport interface SystemInputActionOptions {\n backButton?: SystemInputActionConfig;\n homeButton?: SystemInputActionConfig;\n recentAppsButton?: SystemInputActionConfig;\n}\n\nexport interface InputPrimitiveActionOptions {\n size?: () => Promise<Size>;\n sleep?: (timeMs: number) => Promise<void>;\n includeSwipe?: boolean;\n includePinch?: boolean;\n systemActions?: SystemInputActionOptions;\n}\n\nfunction defineSystemInputAction(\n config: SystemInputActionConfig,\n call: () => Promise<void>,\n): DeviceAction<undefined, void> {\n return defineAction<undefined, undefined, void>({\n name: config.name,\n description: config.description,\n interfaceAlias: config.interfaceAlias,\n delayBeforeRunner: config.delayBeforeRunner,\n delayAfterRunner: config.delayAfterRunner,\n call,\n });\n}\n\nexport function defineActionsFromInputPrimitives(\n input: InputPrimitives,\n options: InputPrimitiveActionOptions = {},\n): DeviceAction<any>[] {\n const actions: Array<DeviceAction<any> | undefined> = [];\n const { pointer, keyboard, scroll, touch, system } = input;\n\n if (pointer) {\n actions.push(defineActionTap(pointer.tap));\n if (pointer.doubleClick) {\n actions.push(defineActionDoubleClick(pointer.doubleClick));\n }\n if (pointer.rightClick) {\n actions.push(defineActionRightClick(pointer.rightClick));\n }\n if (pointer.hover) {\n actions.push(defineActionHover(pointer.hover));\n }\n if (pointer.dragAndDrop) {\n actions.push(defineActionDragAndDrop(pointer.dragAndDrop));\n }\n if (pointer.longPress) {\n actions.push(defineActionLongPress(pointer.longPress));\n }\n }\n\n if (keyboard) {\n actions.push(\n defineActionInput(keyboard),\n defineActionClearInput(keyboard.clearInput),\n defineActionKeyboardPress(keyboard.keyboardPress),\n defineActionCursorMove({ keyboard, sleep: options.sleep }),\n );\n }\n\n if (scroll) {\n actions.push(defineActionScroll(scroll.scroll));\n }\n\n if (touch?.swipe && options.size && options.includeSwipe !== false) {\n actions.push(defineActionSwipe({ swipe: touch.swipe, size: options.size }));\n }\n\n if (touch?.pinch && options.size && options.includePinch !== false) {\n actions.push(defineActionPinch({ pinch: touch.pinch, size: options.size }));\n }\n\n if (system && options.systemActions) {\n const { systemActions } = options;\n if (system.backButton && systemActions.backButton) {\n actions.push(\n defineSystemInputAction(systemActions.backButton, system.backButton),\n );\n }\n if (system.homeButton && systemActions.homeButton) {\n actions.push(\n defineSystemInputAction(systemActions.homeButton, system.homeButton),\n );\n }\n if (system.recentAppsButton && systemActions.recentAppsButton) {\n actions.push(\n defineSystemInputAction(\n systemActions.recentAppsButton,\n system.recentAppsButton,\n ),\n );\n }\n }\n\n return actions.filter((action): action is DeviceAction<any> =>\n Boolean(action),\n );\n}\n\nexport function createDefaultMobileActions(\n context: MobileInputActionContext,\n): DeviceAction<any>[] {\n return defineActionsFromInputPrimitives(context.input, {\n size: context.size,\n sleep: context.sleep,\n systemActions: context.systemActions,\n });\n}\n\n// Sleep\nexport const ActionSleepParamSchema = z.object({\n timeMs: z\n .number()\n .default(1000)\n .optional()\n .describe('Sleep duration in milliseconds, defaults to 1000ms (1 second)'),\n});\n\nexport type ActionSleepParam = {\n timeMs?: number;\n};\n\nexport const defineActionSleep = (): DeviceAction<ActionSleepParam> => {\n return defineAction<typeof ActionSleepParamSchema, ActionSleepParam>({\n name: 'Sleep',\n description:\n 'Wait for a specified duration before continuing. Defaults to 1 second (1000ms) if not specified.',\n paramSchema: ActionSleepParamSchema,\n sample: {\n timeMs: 2000,\n },\n call: async (param) => {\n const duration = param?.timeMs ?? 1000;\n getDebug('device:common-action')(`Sleeping for ${duration}ms`);\n await new Promise((resolve) => setTimeout(resolve, duration));\n },\n });\n};\n\nexport type { DeviceAction } from '../types';\nexport type {\n AndroidDeviceOpt,\n AndroidDeviceInputOpt,\n IOSDeviceOpt,\n IOSDeviceInputOpt,\n HarmonyDeviceOpt,\n HarmonyDeviceInputOpt,\n} from './device-options';\n"],"names":["AbstractInterface","defineAction","config","pointFromLocate","locate","missingMessage","Error","defineLocatedPointAction","param","actionTapParamSchema","z","getMidsceneLocationSchema","defineActionTap","tap","point","actionRightClickParamSchema","defineActionRightClick","rightClick","actionDoubleClickParamSchema","defineActionDoubleClick","doubleClick","actionHoverParamSchema","defineActionHover","hover","inputLocateDescription","actionInputParamSchema","val","String","defineActionInput","keyboard","actionKeyboardPressParamSchema","defineActionKeyboardPress","keyboardPress","actionScrollParamSchema","defineActionScroll","scroll","actionDragAndDropParamSchema","defineActionDragAndDrop","dragAndDrop","from","to","ActionLongPressParamSchema","defineActionLongPress","longPress","ActionSwipeParamSchema","normalizeMobileSwipeParam","screenSize","width","height","start","end","startPoint","endPoint","direction","Math","duration","repeatCount","defineActionSwipe","i","actionClearInputParamSchema","defineActionClearInput","clearInput","actionCursorMoveParamSchema","defineActionCursorMove","times","wait","timeMs","Promise","resolve","setTimeout","arrowKey","ActionPinchParamSchema","defineActionPinch","centerX","centerY","startDistance","endDistance","normalizePinchParam","element","baseDistance","fingerDistance","defineSystemInputAction","call","defineActionsFromInputPrimitives","input","options","actions","pointer","touch","system","systemActions","action","Boolean","createDefaultMobileActions","context","ActionSleepParamSchema","defineActionSleep","getDebug"],"mappings":";;;;;;;;;;;;;AAgIO,MAAeA;;QA8CpB;QA8BA;;AACF;AAKO,MAAMC,eAAe,CAK1BC,SAaOA;AAGT,SAASC,gBACPC,MAAuC,EACvCC,cAAsB;IAEtB,IAAI,CAACD,QACH,MAAM,IAAIE,MAAMD;IAElB,OAAO;QAAE,GAAGD,OAAO,MAAM,CAAC,EAAE;QAAE,GAAGA,OAAO,MAAM,CAAC,EAAE;IAAC;AACpD;AAEA,SAASG,yBAGPL,MAQD;IACC,OAAOD,aAA8B;QACnC,MAAMC,OAAO,IAAI;QACjB,aAAaA,OAAO,WAAW;QAC/B,gBAAgBA,OAAO,cAAc;QACrC,aAAaA,OAAO,WAAW;QAC/B,QAAQA,OAAO,MAAM;QACrB,MAAM,OAAOM;YACX,MAAMN,OAAO,IAAI,CACfC,gBAAgBK,MAAM,MAAM,EAAEN,OAAO,oBAAoB,GACzDM;QAEJ;IACF;AACF;AAGO,MAAMC,uBAAuBC,EAAE,MAAM,CAAC;IAC3C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMC,kBAAkB,CAC7BC,MAEON,yBAAsE;QAC3E,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaE;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAsB;QAC1C;QACA,sBAAsB;QACtB,MAAM,OAAOK;YACX,MAAMD,IAAIC;QACZ;IACF;AAIK,MAAMC,8BAA8BL,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMK,yBAAyB,CACpCC,aAEOV,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaQ;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAA+B;QACnD;QACA,sBAAsB;QACtB,MAAM,OAAOD;YACX,MAAMG,WAAWH;QACnB;IACF;AAIK,MAAMI,+BAA+BR,EAAE,MAAM,CAAC;IACnD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMQ,0BAA0B,CACrCC,cAEOb,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaW;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAkB;QACtC;QACA,sBAAsB;QACtB,MAAM,OAAOJ;YACX,MAAMM,YAAYN;QACpB;IACF;AAIK,MAAMO,yBAAyBX,EAAE,MAAM,CAAC;IAC7C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMW,oBAAoB,CAC/BC,QAEOhB,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAac;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAsC;QAC1D;QACA,sBAAsB;QACtB,MAAM,OAAOP;YACX,MAAMS,MAAMT;QACd;IACF;AAIF,MAAMU,yBACJ;AACK,MAAMC,yBAAyBf,EAAE,MAAM,CAAC;IAC7C,OAAOA,EAAAA,KACC,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG,EAC9B,SAAS,CAAC,CAACgB,MAAQC,OAAOD,MAC1B,QAAQ,CACP;IAEJ,QAAQf,4BACL,QAAQ,CAACa,wBACT,QAAQ;IACX,MAAMd,CAAC,CAADA,OACC,CAAC;QAAC;QAAW;QAAS;KAAW,EACrC,OAAO,CAAC,WACR,QAAQ,CACP;IAEJ,qBAAqBA,EAAAA,OACX,GACP,QAAQ,GACR,QAAQ,CACP;AAEN;AAQO,MAAMkB,oBAAoB,CAC/BC,WAEO5B,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAawB;QACb,QAAQ;YACN,OAAO;YACP,QAAQ;gBAAE,QAAQ;YAAwB;QAC5C;QACA,MAAM,OAAOjB;YAEX,IAAKA,AAA0B,aAA1BA,MAAM,IAAI,EACbA,MAAM,IAAI,GAAG;YAGf,IAAIA,AAAe,YAAfA,MAAM,IAAI,EAAc,YAC1B,MAAMqB,SAAS,UAAU,CAACrB,MAAM,MAAM;YAIxC,IAAI,CAACA,SAAS,CAACA,MAAM,KAAK,EACxB;YAGF,MAAMqB,SAAS,QAAQ,CAACrB,MAAM,KAAK,EAAE;gBACnC,QAAQA,MAAM,MAAM;gBACpB,SAASA,AAAe,eAAfA,MAAM,IAAI;gBACnB,qBAAqBA,MAAM,mBAAmB;YAChD;QACF;IACF;AAIK,MAAMsB,iCAAiCpB,EAAE,MAAM,CAAC;IACrD,QAAQC,4BACL,QAAQ,CAAC,qDACT,QAAQ;IACX,SAASD,EAAAA,MACA,GACN,QAAQ,CACP;AAEN;AAMO,MAAMqB,4BAA4B,CACvCC,gBAEO/B,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAa6B;QACb,QAAQ;YACN,SAAS;QACX;QACA,MAAM,OAAOtB;YACX,MAAMwB,cAAcxB,MAAM,OAAO,EAAE;gBACjC,QAAQA,MAAM,MAAM;YACtB;QACF;IACF;AAIK,MAAMyB,0BAA0BvB,EAAE,MAAM,CAAC;IAC9C,YAAYA,CAAC,CAADA,OACL,CAAC;QACJ;QACA;QACA;QACA;QACA;KACD,EACA,OAAO,CAAC,gBACR,QAAQ,CACP;IAEJ,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;QAAM;QAAS;KAAO,EACpC,OAAO,CAAC,QACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CAAC;IACZ,QAAQC,4BACL,QAAQ,GACR,QAAQ,CACP;AAEN;AAEO,MAAMuB,qBAAqB,CAChCC,SAEOlC,aAAgE;QACrE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAagC;QACb,QAAQ;YACN,WAAW;YACX,YAAY;YACZ,QAAQ;gBAAE,QAAQ;YAAsC;QAC1D;QACA,MAAM,OAAOzB;YACX,MAAM2B,OAAO3B;QACf;IACF;AAIK,MAAM4B,+BAA+B1B,EAAE,MAAM,CAAC;IACnD,MAAMC,4BAA4B,QAAQ,CAAC;IAC3C,IAAIA,4BAA4B,QAAQ,CAAC;AAC3C;AAMO,MAAM0B,0BAA0B,CACrCC,cAEOrC,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAamC;QACb,QAAQ;YACN,MAAM;gBAAE,QAAQ;YAA6B;YAC7C,IAAI;gBAAE,QAAQ;YAAuB;QACvC;QACA,MAAM,OAAO5B;YACX,MAAM+B,OAAO/B,MAAM,IAAI;YACvB,MAAMgC,KAAKhC,MAAM,EAAE;YACnB,IAAI,CAAC+B,MACH,MAAM,IAAIjC,MAAM;YAElB,IAAI,CAACkC,IACH,MAAM,IAAIlC,MAAM;YAElB,MAAMgC,YACJ;gBAAE,GAAGC,KAAK,MAAM,CAAC,EAAE;gBAAE,GAAGA,KAAK,MAAM,CAAC,EAAE;YAAC,GACvC;gBAAE,GAAGC,GAAG,MAAM,CAAC,EAAE;gBAAE,GAAGA,GAAG,MAAM,CAAC,EAAE;YAAC;QAEvC;IACF;AAGK,MAAMC,6BAA6B/B,EAAE,MAAM,CAAC;IACjD,QAAQC,4BAA4B,QAAQ,CAC1C;IAEF,UAAUD,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMgC,wBAAwB,CACnCC,YAEOpC,yBAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAakC;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAqB;QACzC;QACA,sBAAsB;QACtB,MAAM,OAAO3B,OAAON;YAClB,MAAMmC,UAAU7B,OAAO;gBAAE,UAAUN,MAAM,QAAQ;YAAC;QACpD;IACF;AAGK,MAAMoC,yBAAyBlC,EAAE,MAAM,CAAC;IAC7C,OAAOC,4BACJ,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,CAAC,CAADA,OACJ,CAAC;QAAC;QAAM;QAAQ;QAAQ;KAAQ,EACpC,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;IACZ,KAAKC,4BACF,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,CAAC;IACZ,QAAQA,EAAAA,MACC,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAWO,SAASmC,0BACdrC,KAAuB,EACvBsC,UAA6C;IAO7C,MAAM,EAAEC,KAAK,EAAEC,MAAM,EAAE,GAAGF;IAC1B,MAAM,EAAEG,KAAK,EAAEC,GAAG,EAAE,GAAG1C;IAEvB,MAAM2C,aAAaF,QACf;QAAE,GAAGA,MAAM,MAAM,CAAC,EAAE;QAAE,GAAGA,MAAM,MAAM,CAAC,EAAE;IAAC,IACzC;QAAE,GAAGF,QAAQ;QAAG,GAAGC,SAAS;IAAE;IAElC,IAAII;IAEJ,IAAIF,KACFE,WAAW;QAAE,GAAGF,IAAI,MAAM,CAAC,EAAE;QAAE,GAAGA,IAAI,MAAM,CAAC,EAAE;IAAC;SAC3C,IAAI1C,MAAM,QAAQ,EAAE;QACzB,MAAM6C,YAAY7C,MAAM,SAAS;QACjC,IAAI,CAAC6C,WACH,MAAM,IAAI/C,MAAM;QAElB8C,WAAW;YACT,GACED,WAAW,CAAC,GACXE,CAAAA,AAAc,YAAdA,YACG7C,MAAM,QAAQ,GACd6C,AAAc,WAAdA,YACE,CAAC7C,MAAM,QAAQ,GACf;YACR,GACE2C,WAAW,CAAC,GACXE,CAAAA,AAAc,WAAdA,YACG7C,MAAM,QAAQ,GACd6C,AAAc,SAAdA,YACE,CAAC7C,MAAM,QAAQ,GACf;QACV;IACF,OACE,MAAM,IAAIF,MACR;IAIJ8C,SAAS,CAAC,GAAGE,KAAK,GAAG,CAAC,GAAGA,KAAK,GAAG,CAACF,SAAS,CAAC,EAAEL;IAC9CK,SAAS,CAAC,GAAGE,KAAK,GAAG,CAAC,GAAGA,KAAK,GAAG,CAACF,SAAS,CAAC,EAAEJ;IAE9C,MAAMO,WAAW/C,MAAM,QAAQ,IAAI;IAEnC,IAAIgD,cAAc,AAAwB,YAAxB,OAAOhD,MAAM,MAAM,GAAgBA,MAAM,MAAM,GAAG;IACpE,IAAIgD,AAAgB,MAAhBA,aACFA,cAAc;IAGhB,OAAO;QAAEL;QAAYC;QAAUG;QAAUC;IAAY;AACvD;AAEO,MAAMC,oBAAoB,CAACvD,SAIzBD,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa2C;QACb,QAAQ;YACN,OAAO;gBAAE,QAAQ;YAA6B;YAC9C,KAAK;gBAAE,QAAQ;YAA2B;QAC5C;QACA,MAAM,OAAOpC;YACX,MAAM,EAAE2C,UAAU,EAAEC,QAAQ,EAAEG,QAAQ,EAAEC,WAAW,EAAE,GACnDX,0BAA0BrC,OAAO,MAAMN,OAAO,IAAI;YACpD,IAAK,IAAIwD,IAAI,GAAGA,IAAIF,aAAaE,IAC/B,MAAMxD,OAAO,KAAK,CAACiD,YAAYC,UAAU;gBAAEG;YAAS;QAExD;IACF;AAIK,MAAMI,8BAA8BjD,EAAE,MAAM,CAAC;IAClD,QAAQC,4BACL,QAAQ,CAAC,iCACT,QAAQ;AACb;AAKO,MAAMiD,yBAAyB,CACpCC,aAEO5D,aAGL;QACA,MAAM;QACN,aAAauB;QACb,gBAAgB;QAChB,aAAamC;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAyB;QAC7C;QACA,MAAM,OAAOnD;YACX,MAAMqD,WAAWrD,MAAM,MAAM;QAC/B;IACF;AAIK,MAAMsD,8BAA8BpD,EAAE,MAAM,CAAC;IAClD,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;KAAQ,EACtB,QAAQ,CAAC;IACZ,OAAOA,EAAAA,MACE,GACN,GAAG,GACH,GAAG,CAAC,GACJ,OAAO,CAAC,GACR,QAAQ,CACP;AAEN;AAMO,MAAMqD,yBAAyB,CAAC7D,SAI9BD,aAGL;QACA,MAAM;QACN,aACE;QACF,aAAa6D;QACb,QAAQ;YACN,WAAW;YACX,OAAO;QACT;QACA,MAAM,OAAOtD;YACX,MAAMwD,QAAQxD,MAAM,KAAK,IAAI;YAC7B,IAAIN,OAAO,QAAQ,CAAC,UAAU,EAAE,YAC9B,MAAMA,OAAO,QAAQ,CAAC,UAAU,CAACM,MAAM,SAAS,EAAEwD;YAIpD,MAAMC,OACJ/D,OAAO,KAAK,IACV,EAAAgE,SACA,IAAIC,QAAc,CAACC,UAAYC,WAAWD,SAASF,QAAO;YAC9D,MAAMI,WAAW9D,AAAoB,WAApBA,MAAM,SAAS,GAAc,cAAc;YAC5D,IAAK,IAAIkD,IAAI,GAAGA,IAAIM,OAAON,IAAK;gBAC9B,MAAMxD,OAAO,QAAQ,CAAC,aAAa,CAACoE;gBACpC,MAAML,KAAK;YACb;QACF;IACF;AAIK,MAAMM,yBAAyB7D,EAAE,MAAM,CAAC;IAC7C,QAAQC,4BACL,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,CAAC,CAADA,OACJ,CAAC;QAAC;QAAM;KAAM,EAClB,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,GACR,QAAQ,CAAC;AACd;AASO,MAAM8D,oBAAoB,CAACtE;IAIhC,IAAI,CAACA,OAAO,KAAK,EACf;IAGF,OAAOD,aAA8D;QACnE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAasE;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAe;YACjC,WAAW;YACX,UAAU;QACZ;QACA,MAAM,OAAO/D;YACX,MAAM,EAAEiE,OAAO,EAAEC,OAAO,EAAEC,aAAa,EAAEC,WAAW,EAAErB,QAAQ,EAAE,GAC9DsB,oBAAoBrE,OAAO,MAAMN,OAAO,IAAI;YAC9C,MAAMA,OAAO,KAAK,GAChB;gBAAE,GAAGuE;gBAAS,GAAGC;YAAQ,GACzB;gBAAEC;gBAAeC;gBAAarB;YAAS;QAE3C;IACF;AACF;AAEO,SAASsB,oBACdrE,KAAuB,EACvBsC,UAA6C;IAQ7C,MAAM,EAAEC,KAAK,EAAEC,MAAM,EAAE,GAAGF;IAC1B,MAAMgC,UAAUtE,MAAM,MAAM;IAC5B,MAAMiE,UAAUK,UACZxB,KAAK,KAAK,CAACwB,QAAQ,MAAM,CAAC,EAAE,IAC5BxB,KAAK,KAAK,CAACP,QAAQ;IACvB,MAAM2B,UAAUI,UACZxB,KAAK,KAAK,CAACwB,QAAQ,MAAM,CAAC,EAAE,IAC5BxB,KAAK,KAAK,CAACN,SAAS;IACxB,MAAMO,WAAW/C,MAAM,QAAQ,IAAI;IAEnC,MAAMuE,eAAezB,KAAK,KAAK,CAACA,KAAK,GAAG,CAACP,OAAOC,UAAU;IAC1D,MAAMgC,iBAAiBxE,MAAM,QAAQ,IAAIuE;IAEzC,MAAMJ,gBAAgBI;IACtB,MAAMH,cACJpE,AAAoB,UAApBA,MAAM,SAAS,GACXuE,eAAeC,iBACf1B,KAAK,GAAG,CAAC,IAAIyB,eAAeC;IAElC,OAAO;QAAEP;QAASC;QAASC;QAAeC;QAAarB;IAAS;AAClE;AAgCA,SAAS0B,wBACP/E,MAA+B,EAC/BgF,IAAyB;IAEzB,OAAOjF,aAAyC;QAC9C,MAAMC,OAAO,IAAI;QACjB,aAAaA,OAAO,WAAW;QAC/B,gBAAgBA,OAAO,cAAc;QACrC,mBAAmBA,OAAO,iBAAiB;QAC3C,kBAAkBA,OAAO,gBAAgB;QACzCgF;IACF;AACF;AAEO,SAASC,iCACdC,KAAsB,EACtBC,UAAuC,CAAC,CAAC;IAEzC,MAAMC,UAAgD,EAAE;IACxD,MAAM,EAAEC,OAAO,EAAE1D,QAAQ,EAAEM,MAAM,EAAEqD,KAAK,EAAEC,MAAM,EAAE,GAAGL;IAErD,IAAIG,SAAS;QACXD,QAAQ,IAAI,CAAC1E,gBAAgB2E,QAAQ,GAAG;QACxC,IAAIA,QAAQ,WAAW,EACrBD,QAAQ,IAAI,CAACnE,wBAAwBoE,QAAQ,WAAW;QAE1D,IAAIA,QAAQ,UAAU,EACpBD,QAAQ,IAAI,CAACtE,uBAAuBuE,QAAQ,UAAU;QAExD,IAAIA,QAAQ,KAAK,EACfD,QAAQ,IAAI,CAAChE,kBAAkBiE,QAAQ,KAAK;QAE9C,IAAIA,QAAQ,WAAW,EACrBD,QAAQ,IAAI,CAACjD,wBAAwBkD,QAAQ,WAAW;QAE1D,IAAIA,QAAQ,SAAS,EACnBD,QAAQ,IAAI,CAAC5C,sBAAsB6C,QAAQ,SAAS;IAExD;IAEA,IAAI1D,UACFyD,QAAQ,IAAI,CACV1D,kBAAkBC,WAClB+B,uBAAuB/B,SAAS,UAAU,GAC1CE,0BAA0BF,SAAS,aAAa,GAChDkC,uBAAuB;QAAElC;QAAU,OAAOwD,QAAQ,KAAK;IAAC;IAI5D,IAAIlD,QACFmD,QAAQ,IAAI,CAACpD,mBAAmBC,OAAO,MAAM;IAG/C,IAAIqD,OAAO,SAASH,QAAQ,IAAI,IAAIA,AAAyB,UAAzBA,QAAQ,YAAY,EACtDC,QAAQ,IAAI,CAAC7B,kBAAkB;QAAE,OAAO+B,MAAM,KAAK;QAAE,MAAMH,QAAQ,IAAI;IAAC;IAG1E,IAAIG,OAAO,SAASH,QAAQ,IAAI,IAAIA,AAAyB,UAAzBA,QAAQ,YAAY,EACtDC,QAAQ,IAAI,CAACd,kBAAkB;QAAE,OAAOgB,MAAM,KAAK;QAAE,MAAMH,QAAQ,IAAI;IAAC;IAG1E,IAAII,UAAUJ,QAAQ,aAAa,EAAE;QACnC,MAAM,EAAEK,aAAa,EAAE,GAAGL;QAC1B,IAAII,OAAO,UAAU,IAAIC,cAAc,UAAU,EAC/CJ,QAAQ,IAAI,CACVL,wBAAwBS,cAAc,UAAU,EAAED,OAAO,UAAU;QAGvE,IAAIA,OAAO,UAAU,IAAIC,cAAc,UAAU,EAC/CJ,QAAQ,IAAI,CACVL,wBAAwBS,cAAc,UAAU,EAAED,OAAO,UAAU;QAGvE,IAAIA,OAAO,gBAAgB,IAAIC,cAAc,gBAAgB,EAC3DJ,QAAQ,IAAI,CACVL,wBACES,cAAc,gBAAgB,EAC9BD,OAAO,gBAAgB;IAI/B;IAEA,OAAOH,QAAQ,MAAM,CAAC,CAACK,SACrBC,QAAQD;AAEZ;AAEO,SAASE,2BACdC,OAAiC;IAEjC,OAAOX,iCAAiCW,QAAQ,KAAK,EAAE;QACrD,MAAMA,QAAQ,IAAI;QAClB,OAAOA,QAAQ,KAAK;QACpB,eAAeA,QAAQ,aAAa;IACtC;AACF;AAGO,MAAMC,yBAAyBrF,EAAE,MAAM,CAAC;IAC7C,QAAQA,EAAAA,MACC,GACN,OAAO,CAAC,MACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMsF,oBAAoB,IACxB/F,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa8F;QACb,QAAQ;YACN,QAAQ;QACV;QACA,MAAM,OAAOvF;YACX,MAAM+C,WAAW/C,OAAO,UAAU;YAClCyF,SAAS,wBAAwB,CAAC,aAAa,EAAE1C,SAAS,EAAE,CAAC;YAC7D,MAAM,IAAIY,QAAQ,CAACC,UAAYC,WAAWD,SAASb;QACrD;IACF"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { isAutoGLM } from "../ai-model/auto-glm/util.mjs";
|
|
2
|
-
import {
|
|
3
|
-
import { AiLocateSection, buildSearchAreaConfig } from "../ai-model/inspect.mjs";
|
|
2
|
+
import { AiExtractElementInfo, AiLocateElement, AiLocateSection, buildSearchAreaConfig } from "../ai-model/inspect.mjs";
|
|
4
3
|
import { elementDescriberInstruction } from "../ai-model/prompt/describe.mjs";
|
|
4
|
+
import { AIResponseParseError, callAIWithObjectResponse } from "../ai-model/service-caller/index.mjs";
|
|
5
5
|
import { expandSearchArea } from "../common.mjs";
|
|
6
6
|
import { ServiceError } from "../types.mjs";
|
|
7
7
|
import { compositeElementInfoImg, cropByRect } from "@midscene/shared/img";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"service/index.mjs","sources":["../../../src/service/index.ts"],"sourcesContent":["import { isAutoGLM, isUITars } from '@/ai-model/auto-glm/util';\nimport {\n AIResponseParseError,\n AiExtractElementInfo,\n AiLocateElement,\n callAIWithObjectResponse,\n} from '@/ai-model/index';\nimport { AiLocateSection, buildSearchAreaConfig } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport { type AIArgs, expandSearchArea } from '@/common';\nimport type {\n AIDescribeElementResponse,\n AIUsageInfo,\n DetailedLocateParam,\n LocateResultElement,\n LocateResultWithDump,\n PartialServiceDumpFromSDK,\n PlanningLocateParam,\n Rect,\n ServiceExtractOption,\n ServiceExtractParam,\n ServiceExtractResult,\n ServiceTaskInfo,\n UIContext,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TMultimodalPrompt } from '../common';\nimport { createServiceDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext;\n planLocatedElement?: LocateResultElement;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\ninterface ServiceOptions {\n taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;\n}\n\nconst debug = getDebug('ai:service');\nexport default class Service {\n contextRetrieverFn: () => Promise<UIContext> | UIContext;\n\n taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;\n\n constructor(\n context: UIContext | (() => Promise<UIContext> | UIContext),\n opt?: ServiceOptions,\n ) {\n assert(context, 'context is required for Service');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: PlanningLocateParam,\n opt: LocateOpts,\n modelConfig: IModelConfig,\n abortSignal?: AbortSignal,\n ): Promise<LocateResultWithDump> {\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const hasPlanLocatedElement = !!opt?.planLocatedElement?.rect;\n\n let searchAreaPrompt;\n if (query.deepLocate && !hasPlanLocatedElement) {\n searchAreaPrompt = query.prompt;\n }\n\n const { modelFamily } = modelConfig;\n\n if (searchAreaPrompt && !modelFamily) {\n console.warn(\n 'The \"deepLocate\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/model-config',\n );\n searchAreaPrompt = undefined;\n }\n\n if (searchAreaPrompt && isAutoGLM(modelFamily)) {\n console.warn('The \"deepLocate\" feature is not supported with AutoGLM.');\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn());\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (query.deepLocate && hasPlanLocatedElement) {\n const searchAreaConfig = await buildSearchAreaConfig({\n context,\n baseRect: opt.planLocatedElement!.rect,\n modelFamily,\n });\n searchArea = searchAreaConfig.rect;\n\n searchAreaRawResponse = JSON.stringify({\n source: 'plan-located-element',\n rect: opt.planLocatedElement!.rect,\n });\n searchAreaResponse = {\n rect: searchArea,\n imageBase64: searchAreaConfig.imageBase64,\n scale: searchAreaConfig.scale,\n rawResponse: searchAreaRawResponse,\n };\n } else if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n modelConfig,\n abortSignal,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const { parseResult, rect, rawResponse, usage, reasoning_content } =\n await AiLocateElement({\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n modelConfig,\n abortSignal,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n reasoning_content,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `failed to locate element: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialServiceDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepLocate: !!searchArea,\n error: errorLog,\n };\n\n const elements = parseResult.elements || [];\n\n const dump = createServiceDump({\n ...dumpData,\n matchedElement: elements,\n });\n\n if (errorLog) {\n throw new ServiceError(errorLog, dump);\n }\n\n if (elements.length > 1) {\n throw new ServiceError(\n `locate: multiple elements found, length = ${elements.length}`,\n dump,\n );\n }\n\n if (elements.length === 1) {\n return {\n element: {\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n description: elements[0]!.description,\n },\n rect,\n dump,\n };\n }\n\n return {\n element: null,\n rect,\n dump,\n };\n }\n\n async extract<T>(\n dataDemand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n pageDescription?: string,\n multimodalPrompt?: TMultimodalPrompt,\n context?: UIContext,\n ): Promise<ServiceExtractResult<T>> {\n assert(context, 'context is required for extract');\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n\n const startTime = Date.now();\n\n let parseResult: Awaited<\n ReturnType<typeof AiExtractElementInfo<T>>\n >['parseResult'];\n let rawResponse: string;\n let usage: Awaited<ReturnType<typeof AiExtractElementInfo<T>>>['usage'];\n let reasoning_content: string | undefined;\n\n try {\n const result = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n multimodalPrompt,\n extractOption: opt,\n modelConfig,\n pageDescription,\n });\n parseResult = result.parseResult;\n rawResponse = result.rawResponse;\n usage = result.usage;\n reasoning_content = result.reasoning_content;\n } catch (error) {\n if (error instanceof AIResponseParseError) {\n // Create dump with usage and rawResponse from the error\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: error.rawResponse,\n usage: error.usage,\n };\n const dump = createServiceDump({\n type: 'extract',\n userQuery: { dataDemand },\n matchedElement: [],\n data: null,\n taskInfo,\n error: error.message,\n });\n throw new ServiceError(error.message, dump);\n }\n throw error;\n }\n\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse,\n formatResponse: JSON.stringify(parseResult),\n usage,\n reasoning_content,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialServiceDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data, thought } = parseResult || {};\n\n const dump = createServiceDump({\n ...dumpData,\n data,\n });\n\n if (errorLog && !data) {\n throw new ServiceError(errorLog, dump);\n }\n\n return {\n data,\n thought,\n usage,\n reasoning_content,\n dump,\n };\n }\n\n async describe(\n target: Rect | [number, number],\n modelConfig: IModelConfig,\n opt?: {\n deepLocate?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for service.describe');\n const context = await this.contextRetrieverFn();\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n assert(screenshotBase64, 'screenshot is required for service.describe');\n // The result of the \"describe\" function will be used for positioning, so essentially it is a form of grounding.\n const { modelFamily } = modelConfig;\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n size: shotSize,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepLocate) {\n const searchArea = expandSearchArea(targetRect, shotSize);\n // Always crop in describe mode. Unlike locate's deepLocate (where\n // cropping too small loses context for finding elements), describe's\n // deepLocate intentionally zooms in so the model produces a more\n // precise description from a focused view. expandSearchArea already\n // guarantees a minimum 400x400 area with surrounding context.\n debug('describe: cropping to searchArea', searchArea);\n const croppedResult = await cropByRect(\n imagePayload,\n searchArea,\n modelFamily === 'qwen2.5-vl',\n );\n imagePayload = croppedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const res = await callAIWithObjectResponse<AIDescribeElementResponse>(\n msgs,\n modelConfig,\n );\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n"],"names":["debug","getDebug","Service","query","opt","modelConfig","abortSignal","queryPrompt","assert","hasPlanLocatedElement","searchAreaPrompt","modelFamily","console","undefined","isAutoGLM","context","searchArea","searchAreaRawResponse","searchAreaUsage","searchAreaResponse","searchAreaConfig","buildSearchAreaConfig","JSON","AiLocateSection","startTime","Date","parseResult","rect","rawResponse","usage","reasoning_content","AiLocateElement","timeCost","taskInfo","errorLog","dumpData","elements","dump","createServiceDump","ServiceError","dataDemand","pageDescription","multimodalPrompt","result","AiExtractElementInfo","error","AIResponseParseError","data","thought","target","shotSize","screenshotBase64","systemPrompt","elementDescriberInstruction","defaultRectSize","targetRect","Array","Math","imagePayload","compositeElementInfoImg","expandSearchArea","croppedResult","cropByRect","msgs","res","callAIWithObjectResponse","content","Promise"],"mappings":";;;;;;;;;;;;;;;;;;;;AA8CA,MAAMA,QAAQC,SAAS;AACR,MAAMC;IAqBnB,MAAM,OACJC,KAA0B,EAC1BC,GAAe,EACfC,WAAyB,EACzBC,WAAyB,EACM;QAC/B,MAAMC,cAAc,AAAiB,YAAjB,OAAOJ,QAAqBA,QAAQA,MAAM,MAAM;QACpEK,OAAOD,aAAa;QAEpBC,OAAO,AAAiB,YAAjB,OAAOL,OAAoB;QAElC,MAAMM,wBAAwB,CAAC,CAACL,KAAK,oBAAoB;QAEzD,IAAIM;QACJ,IAAIP,MAAM,UAAU,IAAI,CAACM,uBACvBC,mBAAmBP,MAAM,MAAM;QAGjC,MAAM,EAAEQ,WAAW,EAAE,GAAGN;QAExB,IAAIK,oBAAoB,CAACC,aAAa;YACpCC,QAAQ,IAAI,CACV;YAEFF,mBAAmBG;QACrB;QAEA,IAAIH,oBAAoBI,UAAUH,cAAc;YAC9CC,QAAQ,IAAI,CAAC;YACbF,mBAAmBG;QACrB;QAEA,MAAME,UAAUX,KAAK,WAAY,MAAM,IAAI,CAAC,kBAAkB;QAE9D,IAAIY;QACJ,IAAIC;QACJ,IAAIC;QACJ,IAAIC;QAGJ,IAAIhB,MAAM,UAAU,IAAIM,uBAAuB;YAC7C,MAAMW,mBAAmB,MAAMC,sBAAsB;gBACnDN;gBACA,UAAUX,IAAI,kBAAkB,CAAE,IAAI;gBACtCO;YACF;YACAK,aAAaI,iBAAiB,IAAI;YAElCH,wBAAwBK,KAAK,SAAS,CAAC;gBACrC,QAAQ;gBACR,MAAMlB,IAAI,kBAAkB,CAAE,IAAI;YACpC;YACAe,qBAAqB;gBACnB,MAAMH;gBACN,aAAaI,iBAAiB,WAAW;gBACzC,OAAOA,iBAAiB,KAAK;gBAC7B,aAAaH;YACf;QACF,OAAO,IAAIP,kBAAkB;YAC3BS,qBAAqB,MAAMI,gBAAgB;gBACzCR;gBACA,oBAAoBL;gBACpBL;gBACAC;YACF;YACAE,OACEW,mBAAmB,IAAI,EACvB,CAAC,6BAA6B,EAAET,iBAAiB,CAAC,EAChDS,mBAAmB,KAAK,GAAG,CAAC,EAAE,EAAEA,mBAAmB,KAAK,EAAE,GAAG,IAC7D;YAEJF,wBAAwBE,mBAAmB,WAAW;YACtDD,kBAAkBC,mBAAmB,KAAK;YAC1CH,aAAaG,mBAAmB,IAAI;QACtC;QAEA,MAAMK,YAAYC,KAAK,GAAG;QAC1B,MAAM,EAAEC,WAAW,EAAEC,IAAI,EAAEC,WAAW,EAAEC,KAAK,EAAEC,iBAAiB,EAAE,GAChE,MAAMC,gBAAgB;YACpBhB;YACA,0BAA0BR;YAC1B,cAAcY;YACdd;YACAC;QACF;QAEF,MAAM0B,WAAWP,KAAK,GAAG,KAAKD;QAC9B,MAAMS,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaV,KAAK,SAAS,CAACM;YAC5B,gBAAgBN,KAAK,SAAS,CAACI;YAC/BG;YACAb;YACAC;YACAC;YACAY;QACF;QAEA,IAAII;QACJ,IAAIR,YAAY,MAAM,EAAE,QACtBQ,WAAW,CAAC,4BAA4B,EAAER,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAG3E,MAAMS,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACT,SAAS5B;YACX;YACA,gBAAgB,EAAE;YAClB,aAAaoB;YACb,MAAM;YACNM;YACA,YAAY,CAAC,CAACjB;YACd,OAAOkB;QACT;QAEA,MAAME,WAAWV,YAAY,QAAQ,IAAI,EAAE;QAE3C,MAAMW,OAAOC,kBAAkB;YAC7B,GAAGH,QAAQ;YACX,gBAAgBC;QAClB;QAEA,IAAIF,UACF,MAAM,IAAIK,aAAaL,UAAUG;QAGnC,IAAID,SAAS,MAAM,GAAG,GACpB,MAAM,IAAIG,aACR,CAAC,0CAA0C,EAAEH,SAAS,MAAM,EAAE,EAC9DC;QAIJ,IAAID,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACL,SAAS;gBACP,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM;gBAC3B,MAAMA,QAAQ,CAAC,EAAE,CAAE,IAAI;gBACvB,aAAaA,QAAQ,CAAC,EAAE,CAAE,WAAW;YACvC;YACAT;YACAU;QACF;QAGF,OAAO;YACL,SAAS;YACTV;YACAU;QACF;IACF;IAEA,MAAM,QACJG,UAA+B,EAC/BnC,WAAyB,EACzBD,GAA0B,EAC1BqC,eAAwB,EACxBC,gBAAoC,EACpC3B,OAAmB,EACe;QAClCP,OAAOO,SAAS;QAChBP,OACE,AAAsB,YAAtB,OAAOgC,cAA2B,AAAsB,YAAtB,OAAOA,YACzC,CAAC,+CAA+C,EAAE,OAAOA,YAAY;QAGvE,MAAMhB,YAAYC,KAAK,GAAG;QAE1B,IAAIC;QAGJ,IAAIE;QACJ,IAAIC;QACJ,IAAIC;QAEJ,IAAI;YACF,MAAMa,SAAS,MAAMC,qBAAwB;gBAC3C7B;gBACA,WAAWyB;gBACXE;gBACA,eAAetC;gBACfC;gBACAoC;YACF;YACAf,cAAciB,OAAO,WAAW;YAChCf,cAAce,OAAO,WAAW;YAChCd,QAAQc,OAAO,KAAK;YACpBb,oBAAoBa,OAAO,iBAAiB;QAC9C,EAAE,OAAOE,OAAO;YACd,IAAIA,iBAAiBC,sBAAsB;gBAEzC,MAAMd,WAAWP,KAAK,GAAG,KAAKD;gBAC9B,MAAMS,WAA4B;oBAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;oBACtC,YAAYD;oBACZ,aAAaa,MAAM,WAAW;oBAC9B,OAAOA,MAAM,KAAK;gBACpB;gBACA,MAAMR,OAAOC,kBAAkB;oBAC7B,MAAM;oBACN,WAAW;wBAAEE;oBAAW;oBACxB,gBAAgB,EAAE;oBAClB,MAAM;oBACNP;oBACA,OAAOY,MAAM,OAAO;gBACtB;gBACA,MAAM,IAAIN,aAAaM,MAAM,OAAO,EAAER;YACxC;YACA,MAAMQ;QACR;QAEA,MAAMb,WAAWP,KAAK,GAAG,KAAKD;QAC9B,MAAMS,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZJ;YACA,gBAAgBN,KAAK,SAAS,CAACI;YAC/BG;YACAC;QACF;QAEA,IAAII;QACJ,IAAIR,YAAY,MAAM,EAAE,QACtBQ,WAAW,CAAC,qBAAqB,EAAER,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAGpE,MAAMS,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACTK;YACF;YACA,gBAAgB,EAAE;YAClB,MAAM;YACNP;YACA,OAAOC;QACT;QAEA,MAAM,EAAEa,IAAI,EAAEC,OAAO,EAAE,GAAGtB,eAAe,CAAC;QAE1C,MAAMW,OAAOC,kBAAkB;YAC7B,GAAGH,QAAQ;YACXY;QACF;QAEA,IAAIb,YAAY,CAACa,MACf,MAAM,IAAIR,aAAaL,UAAUG;QAGnC,OAAO;YACLU;YACAC;YACAnB;YACAC;YACAO;QACF;IACF;IAEA,MAAM,SACJY,MAA+B,EAC/B5C,WAAyB,EACzBD,GAEC,EACwD;QACzDI,OAAOyC,QAAQ;QACf,MAAMlC,UAAU,MAAM,IAAI,CAAC,kBAAkB;QAC7C,MAAM,EAAEmC,QAAQ,EAAE,GAAGnC;QACrB,MAAMoC,mBAAmBpC,QAAQ,UAAU,CAAC,MAAM;QAClDP,OAAO2C,kBAAkB;QAEzB,MAAM,EAAExC,WAAW,EAAE,GAAGN;QACxB,MAAM+C,eAAeC;QAGrB,MAAMC,kBAAkB;QACxB,MAAMC,aAAmBC,MAAM,OAAO,CAACP,UACnC;YACE,MAAMQ,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC/C,KAAKG,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC9C,OAAOA;YACP,QAAQA;QACV,IACAL;QAEJ,IAAIS,eAAe,MAAMC,wBAAwB;YAC/C,gBAAgBR;YAChB,MAAMD;YACN,sBAAsB;gBACpB;oBACE,MAAMK;gBACR;aACD;YACD,iBAAiB;QACnB;QAEA,IAAInD,KAAK,YAAY;YACnB,MAAMY,aAAa4C,iBAAiBL,YAAYL;YAMhDlD,MAAM,oCAAoCgB;YAC1C,MAAM6C,gBAAgB,MAAMC,WAC1BJ,cACA1C,YACAL,AAAgB,iBAAhBA;YAEF+C,eAAeG,cAAc,WAAW;QAC1C;QAEA,MAAME,OAAe;YACnB;gBAAE,MAAM;gBAAU,SAASX;YAAa;YACxC;gBACE,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKM;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;SACD;QAED,MAAMM,MAAM,MAAMC,yBAChBF,MACA1D;QAGF,MAAM,EAAE6D,OAAO,EAAE,GAAGF;QACpBxD,OAAO,CAAC0D,QAAQ,KAAK,EAAE,CAAC,iBAAiB,EAAEA,QAAQ,KAAK,EAAE;QAC1D1D,OAAO0D,QAAQ,WAAW,EAAE;QAC5B,OAAOA;IACT;IAlWA,YACEnD,OAA2D,EAC3DX,GAAoB,CACpB;QAPF;QAEA;QAMEI,OAAOO,SAAS;QAChB,IAAI,AAAmB,cAAnB,OAAOA,SACT,IAAI,CAAC,kBAAkB,GAAGA;aAE1B,IAAI,CAAC,kBAAkB,GAAG,IAAMoD,QAAQ,OAAO,CAACpD;QAGlD,IAAI,AAAyB,WAAlBX,KAAK,UACd,IAAI,CAAC,QAAQ,GAAGA,IAAI,QAAQ;IAEhC;AAqVF"}
|
|
1
|
+
{"version":3,"file":"service/index.mjs","sources":["../../../src/service/index.ts"],"sourcesContent":["import { isAutoGLM, isUITars } from '@/ai-model/auto-glm/util';\nimport {\n AiExtractElementInfo,\n AiLocateElement,\n AiLocateSection,\n buildSearchAreaConfig,\n} from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport {\n AIResponseParseError,\n callAIWithObjectResponse,\n} from '@/ai-model/service-caller';\nimport { type AIArgs, expandSearchArea } from '@/common';\nimport type {\n AIDescribeElementResponse,\n AIUsageInfo,\n DetailedLocateParam,\n LocateResultElement,\n LocateResultWithDump,\n PartialServiceDumpFromSDK,\n PlanningLocateParam,\n Rect,\n ServiceExtractOption,\n ServiceExtractParam,\n ServiceExtractResult,\n ServiceTaskInfo,\n UIContext,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TMultimodalPrompt } from '../common';\nimport { createServiceDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext;\n planLocatedElement?: LocateResultElement;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\ninterface ServiceOptions {\n taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;\n}\n\nconst debug = getDebug('ai:service');\nexport default class Service {\n contextRetrieverFn: () => Promise<UIContext> | UIContext;\n\n taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;\n\n constructor(\n context: UIContext | (() => Promise<UIContext> | UIContext),\n opt?: ServiceOptions,\n ) {\n assert(context, 'context is required for Service');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: PlanningLocateParam,\n opt: LocateOpts,\n modelConfig: IModelConfig,\n abortSignal?: AbortSignal,\n ): Promise<LocateResultWithDump> {\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const hasPlanLocatedElement = !!opt?.planLocatedElement?.rect;\n\n let searchAreaPrompt;\n if (query.deepLocate && !hasPlanLocatedElement) {\n searchAreaPrompt = query.prompt;\n }\n\n const { modelFamily } = modelConfig;\n\n if (searchAreaPrompt && !modelFamily) {\n console.warn(\n 'The \"deepLocate\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/model-config',\n );\n searchAreaPrompt = undefined;\n }\n\n if (searchAreaPrompt && isAutoGLM(modelFamily)) {\n console.warn('The \"deepLocate\" feature is not supported with AutoGLM.');\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn());\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (query.deepLocate && hasPlanLocatedElement) {\n const searchAreaConfig = await buildSearchAreaConfig({\n context,\n baseRect: opt.planLocatedElement!.rect,\n modelFamily,\n });\n searchArea = searchAreaConfig.rect;\n\n searchAreaRawResponse = JSON.stringify({\n source: 'plan-located-element',\n rect: opt.planLocatedElement!.rect,\n });\n searchAreaResponse = {\n rect: searchArea,\n imageBase64: searchAreaConfig.imageBase64,\n scale: searchAreaConfig.scale,\n rawResponse: searchAreaRawResponse,\n };\n } else if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n modelConfig,\n abortSignal,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const { parseResult, rect, rawResponse, usage, reasoning_content } =\n await AiLocateElement({\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n modelConfig,\n abortSignal,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n reasoning_content,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `failed to locate element: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialServiceDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepLocate: !!searchArea,\n error: errorLog,\n };\n\n const elements = parseResult.elements || [];\n\n const dump = createServiceDump({\n ...dumpData,\n matchedElement: elements,\n });\n\n if (errorLog) {\n throw new ServiceError(errorLog, dump);\n }\n\n if (elements.length > 1) {\n throw new ServiceError(\n `locate: multiple elements found, length = ${elements.length}`,\n dump,\n );\n }\n\n if (elements.length === 1) {\n return {\n element: {\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n description: elements[0]!.description,\n },\n rect,\n dump,\n };\n }\n\n return {\n element: null,\n rect,\n dump,\n };\n }\n\n async extract<T>(\n dataDemand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n pageDescription?: string,\n multimodalPrompt?: TMultimodalPrompt,\n context?: UIContext,\n ): Promise<ServiceExtractResult<T>> {\n assert(context, 'context is required for extract');\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n\n const startTime = Date.now();\n\n let parseResult: Awaited<\n ReturnType<typeof AiExtractElementInfo<T>>\n >['parseResult'];\n let rawResponse: string;\n let usage: Awaited<ReturnType<typeof AiExtractElementInfo<T>>>['usage'];\n let reasoning_content: string | undefined;\n\n try {\n const result = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n multimodalPrompt,\n extractOption: opt,\n modelConfig,\n pageDescription,\n });\n parseResult = result.parseResult;\n rawResponse = result.rawResponse;\n usage = result.usage;\n reasoning_content = result.reasoning_content;\n } catch (error) {\n if (error instanceof AIResponseParseError) {\n // Create dump with usage and rawResponse from the error\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: error.rawResponse,\n usage: error.usage,\n };\n const dump = createServiceDump({\n type: 'extract',\n userQuery: { dataDemand },\n matchedElement: [],\n data: null,\n taskInfo,\n error: error.message,\n });\n throw new ServiceError(error.message, dump);\n }\n throw error;\n }\n\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse,\n formatResponse: JSON.stringify(parseResult),\n usage,\n reasoning_content,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialServiceDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data, thought } = parseResult || {};\n\n const dump = createServiceDump({\n ...dumpData,\n data,\n });\n\n if (errorLog && !data) {\n throw new ServiceError(errorLog, dump);\n }\n\n return {\n data,\n thought,\n usage,\n reasoning_content,\n dump,\n };\n }\n\n async describe(\n target: Rect | [number, number],\n modelConfig: IModelConfig,\n opt?: {\n deepLocate?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for service.describe');\n const context = await this.contextRetrieverFn();\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n assert(screenshotBase64, 'screenshot is required for service.describe');\n // The result of the \"describe\" function will be used for positioning, so essentially it is a form of grounding.\n const { modelFamily } = modelConfig;\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n size: shotSize,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepLocate) {\n const searchArea = expandSearchArea(targetRect, shotSize);\n // Always crop in describe mode. Unlike locate's deepLocate (where\n // cropping too small loses context for finding elements), describe's\n // deepLocate intentionally zooms in so the model produces a more\n // precise description from a focused view. expandSearchArea already\n // guarantees a minimum 400x400 area with surrounding context.\n debug('describe: cropping to searchArea', searchArea);\n const croppedResult = await cropByRect(\n imagePayload,\n searchArea,\n modelFamily === 'qwen2.5-vl',\n );\n imagePayload = croppedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const res = await callAIWithObjectResponse<AIDescribeElementResponse>(\n msgs,\n modelConfig,\n );\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n"],"names":["debug","getDebug","Service","query","opt","modelConfig","abortSignal","queryPrompt","assert","hasPlanLocatedElement","searchAreaPrompt","modelFamily","console","undefined","isAutoGLM","context","searchArea","searchAreaRawResponse","searchAreaUsage","searchAreaResponse","searchAreaConfig","buildSearchAreaConfig","JSON","AiLocateSection","startTime","Date","parseResult","rect","rawResponse","usage","reasoning_content","AiLocateElement","timeCost","taskInfo","errorLog","dumpData","elements","dump","createServiceDump","ServiceError","dataDemand","pageDescription","multimodalPrompt","result","AiExtractElementInfo","error","AIResponseParseError","data","thought","target","shotSize","screenshotBase64","systemPrompt","elementDescriberInstruction","defaultRectSize","targetRect","Array","Math","imagePayload","compositeElementInfoImg","expandSearchArea","croppedResult","cropByRect","msgs","res","callAIWithObjectResponse","content","Promise"],"mappings":";;;;;;;;;;;;;;;;;;;;AAiDA,MAAMA,QAAQC,SAAS;AACR,MAAMC;IAqBnB,MAAM,OACJC,KAA0B,EAC1BC,GAAe,EACfC,WAAyB,EACzBC,WAAyB,EACM;QAC/B,MAAMC,cAAc,AAAiB,YAAjB,OAAOJ,QAAqBA,QAAQA,MAAM,MAAM;QACpEK,OAAOD,aAAa;QAEpBC,OAAO,AAAiB,YAAjB,OAAOL,OAAoB;QAElC,MAAMM,wBAAwB,CAAC,CAACL,KAAK,oBAAoB;QAEzD,IAAIM;QACJ,IAAIP,MAAM,UAAU,IAAI,CAACM,uBACvBC,mBAAmBP,MAAM,MAAM;QAGjC,MAAM,EAAEQ,WAAW,EAAE,GAAGN;QAExB,IAAIK,oBAAoB,CAACC,aAAa;YACpCC,QAAQ,IAAI,CACV;YAEFF,mBAAmBG;QACrB;QAEA,IAAIH,oBAAoBI,UAAUH,cAAc;YAC9CC,QAAQ,IAAI,CAAC;YACbF,mBAAmBG;QACrB;QAEA,MAAME,UAAUX,KAAK,WAAY,MAAM,IAAI,CAAC,kBAAkB;QAE9D,IAAIY;QACJ,IAAIC;QACJ,IAAIC;QACJ,IAAIC;QAGJ,IAAIhB,MAAM,UAAU,IAAIM,uBAAuB;YAC7C,MAAMW,mBAAmB,MAAMC,sBAAsB;gBACnDN;gBACA,UAAUX,IAAI,kBAAkB,CAAE,IAAI;gBACtCO;YACF;YACAK,aAAaI,iBAAiB,IAAI;YAElCH,wBAAwBK,KAAK,SAAS,CAAC;gBACrC,QAAQ;gBACR,MAAMlB,IAAI,kBAAkB,CAAE,IAAI;YACpC;YACAe,qBAAqB;gBACnB,MAAMH;gBACN,aAAaI,iBAAiB,WAAW;gBACzC,OAAOA,iBAAiB,KAAK;gBAC7B,aAAaH;YACf;QACF,OAAO,IAAIP,kBAAkB;YAC3BS,qBAAqB,MAAMI,gBAAgB;gBACzCR;gBACA,oBAAoBL;gBACpBL;gBACAC;YACF;YACAE,OACEW,mBAAmB,IAAI,EACvB,CAAC,6BAA6B,EAAET,iBAAiB,CAAC,EAChDS,mBAAmB,KAAK,GAAG,CAAC,EAAE,EAAEA,mBAAmB,KAAK,EAAE,GAAG,IAC7D;YAEJF,wBAAwBE,mBAAmB,WAAW;YACtDD,kBAAkBC,mBAAmB,KAAK;YAC1CH,aAAaG,mBAAmB,IAAI;QACtC;QAEA,MAAMK,YAAYC,KAAK,GAAG;QAC1B,MAAM,EAAEC,WAAW,EAAEC,IAAI,EAAEC,WAAW,EAAEC,KAAK,EAAEC,iBAAiB,EAAE,GAChE,MAAMC,gBAAgB;YACpBhB;YACA,0BAA0BR;YAC1B,cAAcY;YACdd;YACAC;QACF;QAEF,MAAM0B,WAAWP,KAAK,GAAG,KAAKD;QAC9B,MAAMS,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaV,KAAK,SAAS,CAACM;YAC5B,gBAAgBN,KAAK,SAAS,CAACI;YAC/BG;YACAb;YACAC;YACAC;YACAY;QACF;QAEA,IAAII;QACJ,IAAIR,YAAY,MAAM,EAAE,QACtBQ,WAAW,CAAC,4BAA4B,EAAER,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAG3E,MAAMS,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACT,SAAS5B;YACX;YACA,gBAAgB,EAAE;YAClB,aAAaoB;YACb,MAAM;YACNM;YACA,YAAY,CAAC,CAACjB;YACd,OAAOkB;QACT;QAEA,MAAME,WAAWV,YAAY,QAAQ,IAAI,EAAE;QAE3C,MAAMW,OAAOC,kBAAkB;YAC7B,GAAGH,QAAQ;YACX,gBAAgBC;QAClB;QAEA,IAAIF,UACF,MAAM,IAAIK,aAAaL,UAAUG;QAGnC,IAAID,SAAS,MAAM,GAAG,GACpB,MAAM,IAAIG,aACR,CAAC,0CAA0C,EAAEH,SAAS,MAAM,EAAE,EAC9DC;QAIJ,IAAID,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACL,SAAS;gBACP,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM;gBAC3B,MAAMA,QAAQ,CAAC,EAAE,CAAE,IAAI;gBACvB,aAAaA,QAAQ,CAAC,EAAE,CAAE,WAAW;YACvC;YACAT;YACAU;QACF;QAGF,OAAO;YACL,SAAS;YACTV;YACAU;QACF;IACF;IAEA,MAAM,QACJG,UAA+B,EAC/BnC,WAAyB,EACzBD,GAA0B,EAC1BqC,eAAwB,EACxBC,gBAAoC,EACpC3B,OAAmB,EACe;QAClCP,OAAOO,SAAS;QAChBP,OACE,AAAsB,YAAtB,OAAOgC,cAA2B,AAAsB,YAAtB,OAAOA,YACzC,CAAC,+CAA+C,EAAE,OAAOA,YAAY;QAGvE,MAAMhB,YAAYC,KAAK,GAAG;QAE1B,IAAIC;QAGJ,IAAIE;QACJ,IAAIC;QACJ,IAAIC;QAEJ,IAAI;YACF,MAAMa,SAAS,MAAMC,qBAAwB;gBAC3C7B;gBACA,WAAWyB;gBACXE;gBACA,eAAetC;gBACfC;gBACAoC;YACF;YACAf,cAAciB,OAAO,WAAW;YAChCf,cAAce,OAAO,WAAW;YAChCd,QAAQc,OAAO,KAAK;YACpBb,oBAAoBa,OAAO,iBAAiB;QAC9C,EAAE,OAAOE,OAAO;YACd,IAAIA,iBAAiBC,sBAAsB;gBAEzC,MAAMd,WAAWP,KAAK,GAAG,KAAKD;gBAC9B,MAAMS,WAA4B;oBAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;oBACtC,YAAYD;oBACZ,aAAaa,MAAM,WAAW;oBAC9B,OAAOA,MAAM,KAAK;gBACpB;gBACA,MAAMR,OAAOC,kBAAkB;oBAC7B,MAAM;oBACN,WAAW;wBAAEE;oBAAW;oBACxB,gBAAgB,EAAE;oBAClB,MAAM;oBACNP;oBACA,OAAOY,MAAM,OAAO;gBACtB;gBACA,MAAM,IAAIN,aAAaM,MAAM,OAAO,EAAER;YACxC;YACA,MAAMQ;QACR;QAEA,MAAMb,WAAWP,KAAK,GAAG,KAAKD;QAC9B,MAAMS,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZJ;YACA,gBAAgBN,KAAK,SAAS,CAACI;YAC/BG;YACAC;QACF;QAEA,IAAII;QACJ,IAAIR,YAAY,MAAM,EAAE,QACtBQ,WAAW,CAAC,qBAAqB,EAAER,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAGpE,MAAMS,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACTK;YACF;YACA,gBAAgB,EAAE;YAClB,MAAM;YACNP;YACA,OAAOC;QACT;QAEA,MAAM,EAAEa,IAAI,EAAEC,OAAO,EAAE,GAAGtB,eAAe,CAAC;QAE1C,MAAMW,OAAOC,kBAAkB;YAC7B,GAAGH,QAAQ;YACXY;QACF;QAEA,IAAIb,YAAY,CAACa,MACf,MAAM,IAAIR,aAAaL,UAAUG;QAGnC,OAAO;YACLU;YACAC;YACAnB;YACAC;YACAO;QACF;IACF;IAEA,MAAM,SACJY,MAA+B,EAC/B5C,WAAyB,EACzBD,GAEC,EACwD;QACzDI,OAAOyC,QAAQ;QACf,MAAMlC,UAAU,MAAM,IAAI,CAAC,kBAAkB;QAC7C,MAAM,EAAEmC,QAAQ,EAAE,GAAGnC;QACrB,MAAMoC,mBAAmBpC,QAAQ,UAAU,CAAC,MAAM;QAClDP,OAAO2C,kBAAkB;QAEzB,MAAM,EAAExC,WAAW,EAAE,GAAGN;QACxB,MAAM+C,eAAeC;QAGrB,MAAMC,kBAAkB;QACxB,MAAMC,aAAmBC,MAAM,OAAO,CAACP,UACnC;YACE,MAAMQ,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC/C,KAAKG,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC9C,OAAOA;YACP,QAAQA;QACV,IACAL;QAEJ,IAAIS,eAAe,MAAMC,wBAAwB;YAC/C,gBAAgBR;YAChB,MAAMD;YACN,sBAAsB;gBACpB;oBACE,MAAMK;gBACR;aACD;YACD,iBAAiB;QACnB;QAEA,IAAInD,KAAK,YAAY;YACnB,MAAMY,aAAa4C,iBAAiBL,YAAYL;YAMhDlD,MAAM,oCAAoCgB;YAC1C,MAAM6C,gBAAgB,MAAMC,WAC1BJ,cACA1C,YACAL,AAAgB,iBAAhBA;YAEF+C,eAAeG,cAAc,WAAW;QAC1C;QAEA,MAAME,OAAe;YACnB;gBAAE,MAAM;gBAAU,SAASX;YAAa;YACxC;gBACE,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKM;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;SACD;QAED,MAAMM,MAAM,MAAMC,yBAChBF,MACA1D;QAGF,MAAM,EAAE6D,OAAO,EAAE,GAAGF;QACpBxD,OAAO,CAAC0D,QAAQ,KAAK,EAAE,CAAC,iBAAiB,EAAEA,QAAQ,KAAK,EAAE;QAC1D1D,OAAO0D,QAAQ,WAAW,EAAE;QAC5B,OAAOA;IACT;IAlWA,YACEnD,OAA2D,EAC3DX,GAAoB,CACpB;QAPF;QAEA;QAMEI,OAAOO,SAAS;QAChB,IAAI,AAAmB,cAAnB,OAAOA,SACT,IAAI,CAAC,kBAAkB,GAAGA;aAE1B,IAAI,CAAC,kBAAkB,GAAG,IAAMoD,QAAQ,OAAO,CAACpD;QAGlD,IAAI,AAAyB,WAAlBX,KAAK,UACd,IAAI,CAAC,QAAQ,GAAGA,IAAI,QAAQ;IAEhC;AAqVF"}
|