@midscene/core 1.4.5 → 1.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/dist/es/agent/agent.mjs +4 -56
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/task-builder.mjs +4 -1
  4. package/dist/es/agent/task-builder.mjs.map +1 -1
  5. package/dist/es/agent/utils.mjs +91 -18
  6. package/dist/es/agent/utils.mjs.map +1 -1
  7. package/dist/es/ai-model/auto-glm/planning.mjs +1 -1
  8. package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -1
  9. package/dist/es/ai-model/inspect.mjs +5 -5
  10. package/dist/es/ai-model/inspect.mjs.map +1 -1
  11. package/dist/es/ai-model/llm-planning.mjs +3 -3
  12. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  13. package/dist/es/ai-model/service-caller/index.mjs +73 -47
  14. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  15. package/dist/es/ai-model/ui-tars-planning.mjs +14 -14
  16. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
  17. package/dist/es/common.mjs +2 -15
  18. package/dist/es/common.mjs.map +1 -1
  19. package/dist/es/device/index.mjs.map +1 -1
  20. package/dist/es/dump/html-utils.mjs +2 -1
  21. package/dist/es/dump/html-utils.mjs.map +1 -1
  22. package/dist/es/index.mjs.map +1 -1
  23. package/dist/es/report-generator.mjs +2 -2
  24. package/dist/es/report-generator.mjs.map +1 -1
  25. package/dist/es/report.mjs +39 -7
  26. package/dist/es/report.mjs.map +1 -1
  27. package/dist/es/service/index.mjs +6 -6
  28. package/dist/es/service/index.mjs.map +1 -1
  29. package/dist/es/types.mjs.map +1 -1
  30. package/dist/es/utils.mjs +2 -2
  31. package/dist/lib/agent/agent.js +4 -56
  32. package/dist/lib/agent/agent.js.map +1 -1
  33. package/dist/lib/agent/task-builder.js +4 -1
  34. package/dist/lib/agent/task-builder.js.map +1 -1
  35. package/dist/lib/agent/utils.js +96 -14
  36. package/dist/lib/agent/utils.js.map +1 -1
  37. package/dist/lib/ai-model/auto-glm/planning.js +1 -1
  38. package/dist/lib/ai-model/auto-glm/planning.js.map +1 -1
  39. package/dist/lib/ai-model/inspect.js +5 -5
  40. package/dist/lib/ai-model/inspect.js.map +1 -1
  41. package/dist/lib/ai-model/llm-planning.js +3 -3
  42. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  43. package/dist/lib/ai-model/service-caller/index.js +75 -49
  44. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  45. package/dist/lib/ai-model/ui-tars-planning.js +14 -14
  46. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
  47. package/dist/lib/common.js +2 -15
  48. package/dist/lib/common.js.map +1 -1
  49. package/dist/lib/device/index.js.map +1 -1
  50. package/dist/lib/dump/html-utils.js +4 -0
  51. package/dist/lib/dump/html-utils.js.map +1 -1
  52. package/dist/lib/index.js.map +1 -1
  53. package/dist/lib/report-generator.js +1 -1
  54. package/dist/lib/report-generator.js.map +1 -1
  55. package/dist/lib/report.js +36 -4
  56. package/dist/lib/report.js.map +1 -1
  57. package/dist/lib/service/index.js +6 -6
  58. package/dist/lib/service/index.js.map +1 -1
  59. package/dist/lib/types.js.map +1 -1
  60. package/dist/lib/utils.js +2 -2
  61. package/dist/types/agent/agent.d.ts +1 -15
  62. package/dist/types/agent/utils.d.ts +13 -1
  63. package/dist/types/ai-model/index.d.ts +1 -1
  64. package/dist/types/ai-model/service-caller/index.d.ts +4 -2
  65. package/dist/types/common.d.ts +0 -310
  66. package/dist/types/device/index.d.ts +1 -2
  67. package/dist/types/dump/html-utils.d.ts +11 -0
  68. package/dist/types/index.d.ts +1 -1
  69. package/dist/types/report.d.ts +5 -0
  70. package/dist/types/types.d.ts +2 -2
  71. package/package.json +2 -2
@@ -174,11 +174,19 @@ async function callAI(messages, modelConfig, options) {
174
174
  commonConfig.top_p = 0.85;
175
175
  commonConfig.frequency_penalty = 0.2;
176
176
  }
177
- const { config: deepThinkConfig, debugMessage, warningMessage } = resolveDeepThinkConfig({
178
- deepThink: options?.deepThink,
177
+ const mergedEnableReasoning = (()=>{
178
+ const normalizedDeepThink = options?.deepThink === 'unset' ? void 0 : options?.deepThink;
179
+ if (true === normalizedDeepThink) return true;
180
+ if (false === normalizedDeepThink) return false;
181
+ return modelConfig.reasoningEnabled;
182
+ })();
183
+ const { config: reasoningEffortConfig, debugMessage: reasoningEffortDebugMessage, warningMessage } = resolveReasoningConfig({
184
+ reasoningEnabled: mergedEnableReasoning,
185
+ reasoningEffort: modelConfig.reasoningEffort,
186
+ reasoningBudget: modelConfig.reasoningBudget,
179
187
  modelFamily
180
188
  });
181
- if (debugMessage) debugCall(debugMessage);
189
+ if (reasoningEffortDebugMessage) debugCall(reasoningEffortDebugMessage);
182
190
  if (warningMessage) warnCall(warningMessage);
183
191
  try {
184
192
  debugCall(`sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`);
@@ -187,7 +195,7 @@ async function callAI(messages, modelConfig, options) {
187
195
  model: modelName,
188
196
  messages,
189
197
  ...commonConfig,
190
- ...deepThinkConfig
198
+ ...reasoningEffortConfig
191
199
  }, {
192
200
  stream: true
193
201
  });
@@ -241,7 +249,7 @@ async function callAI(messages, modelConfig, options) {
241
249
  model: modelName,
242
250
  messages,
243
251
  ...commonConfig,
244
- ...deepThinkConfig
252
+ ...reasoningEffortConfig
245
253
  });
246
254
  timeCost = Date.now() - startTime;
247
255
  debugProfileStats(`model, ${modelName}, mode, ${modelFamily || 'default'}, ui-tars-version, ${uiTarsModelVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}, temperature, ${temperature ?? ''}`);
@@ -251,7 +259,7 @@ async function callAI(messages, modelConfig, options) {
251
259
  accumulatedReasoning = result.choices[0].message?.reasoning_content || '';
252
260
  usage = result.usage;
253
261
  requestId = result._request_id;
254
- if (!content && accumulatedReasoning && 'doubao-vision' === modelFamily) {
262
+ if (!content && accumulatedReasoning && ('doubao-vision' === modelFamily || 'doubao-seed' === modelFamily)) {
255
263
  warnCall('empty content from AI model, using reasoning content');
256
264
  content = accumulatedReasoning;
257
265
  }
@@ -327,50 +335,68 @@ function preprocessDoubaoBboxJson(input) {
327
335
  if (input.includes('bbox')) while(/\d+\s+\d+/.test(input))input = input.replace(/(\d+)\s+(\d+)/g, '$1,$2');
328
336
  return input;
329
337
  }
330
- function resolveDeepThinkConfig({ deepThink, modelFamily }) {
331
- const normalizedDeepThink = 'unset' === deepThink ? void 0 : deepThink;
332
- if (void 0 === normalizedDeepThink) return {
333
- config: {},
334
- debugMessage: void 0
335
- };
336
- if ('qwen3-vl' === modelFamily) return {
337
- config: {
338
- enable_thinking: normalizedDeepThink
339
- },
340
- debugMessage: `deepThink mapped to enable_thinking=${normalizedDeepThink} for qwen3-vl`
341
- };
342
- if ('doubao-vision' === modelFamily) return {
343
- config: {
344
- thinking: {
345
- type: normalizedDeepThink ? 'enabled' : 'disabled'
346
- }
347
- },
348
- debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for doubao-vision`
349
- };
350
- if ('glm-v' === modelFamily) return {
351
- config: {
352
- thinking: {
353
- type: normalizedDeepThink ? 'enabled' : 'disabled'
354
- }
355
- },
356
- debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for glm-v`
338
+ function resolveReasoningConfig({ reasoningEnabled, reasoningEffort, reasoningBudget, modelFamily }) {
339
+ if (void 0 === reasoningEnabled && !reasoningEffort && void 0 === reasoningBudget) return {
340
+ config: {}
357
341
  };
358
- if ('gpt-5' === modelFamily) return {
359
- config: normalizedDeepThink ? {
360
- reasoning: {
342
+ const debugMessages = [];
343
+ const config = {};
344
+ if ('qwen3-vl' === modelFamily || 'qwen3.5' === modelFamily) {
345
+ if (void 0 !== reasoningEnabled) {
346
+ config.enable_thinking = reasoningEnabled;
347
+ debugMessages.push(`enable_thinking=${reasoningEnabled}`);
348
+ }
349
+ if (void 0 !== reasoningBudget) {
350
+ config.thinking_budget = reasoningBudget;
351
+ debugMessages.push(`thinking_budget=${reasoningBudget}`);
352
+ }
353
+ } else if ('doubao-vision' === modelFamily || 'doubao-seed' === modelFamily) {
354
+ if (void 0 !== reasoningEnabled) {
355
+ config.thinking = {
356
+ type: reasoningEnabled ? 'enabled' : 'disabled'
357
+ };
358
+ debugMessages.push(`thinking.type=${reasoningEnabled ? 'enabled' : 'disabled'}`);
359
+ }
360
+ if (reasoningEffort) {
361
+ config.reasoning_effort = reasoningEffort;
362
+ debugMessages.push(`reasoning_effort="${reasoningEffort}"`);
363
+ }
364
+ } else if ('glm-v' === modelFamily) {
365
+ if (void 0 !== reasoningEnabled) {
366
+ config.thinking = {
367
+ type: reasoningEnabled ? 'enabled' : 'disabled'
368
+ };
369
+ debugMessages.push(`thinking.type=${reasoningEnabled ? 'enabled' : 'disabled'}`);
370
+ }
371
+ } else if ('gpt-5' === modelFamily) {
372
+ if (reasoningEffort) {
373
+ config.reasoning = {
374
+ effort: reasoningEffort
375
+ };
376
+ debugMessages.push(`reasoning.effort="${reasoningEffort}"`);
377
+ } else if (true === reasoningEnabled) {
378
+ config.reasoning = {
361
379
  effort: 'high'
362
- }
363
- } : {
364
- reasoning: {
380
+ };
381
+ debugMessages.push('reasoning.effort="high" (from reasoningEnabled)');
382
+ } else if (false === reasoningEnabled) {
383
+ config.reasoning = {
365
384
  effort: 'low'
366
- }
367
- },
368
- debugMessage: normalizedDeepThink ? 'deepThink mapped to reasoning.effort=high for gpt-5' : 'deepThink disabled for gpt-5'
385
+ };
386
+ debugMessages.push('reasoning.effort="low" (from reasoningEnabled)');
387
+ }
388
+ } else if (!modelFamily) return {
389
+ config: {},
390
+ debugMessage: 'reasoning config ignored: no model_family configured',
391
+ warningMessage: 'Reasoning config is set but no model_family is configured. Set MIDSCENE_MODEL_FAMILY to enable reasoning config pass-through.'
369
392
  };
393
+ else if (reasoningEffort) {
394
+ config.reasoning_effort = reasoningEffort;
395
+ debugMessages.push(`reasoning_effort="${reasoningEffort}"`);
396
+ }
370
397
  return {
371
- config: {},
372
- debugMessage: `deepThink ignored: unsupported model_family "${modelFamily ?? 'default'}"`,
373
- warningMessage: `The "deepThink" option is not supported for model_family "${modelFamily ?? 'default'}".`
398
+ config,
399
+ debugMessage: debugMessages.length ? `reasoning config for ${modelFamily}: ${debugMessages.join(', ')}` : void 0
374
400
  };
375
401
  }
376
402
  function normalizeJsonObject(obj) {
@@ -406,7 +432,7 @@ function safeParseJson(input, modelFamily) {
406
432
  } catch (error) {
407
433
  lastError = error;
408
434
  }
409
- if ('doubao-vision' === modelFamily || isUITars(modelFamily)) {
435
+ if ('doubao-vision' === modelFamily || 'doubao-seed' === modelFamily || isUITars(modelFamily)) {
410
436
  const jsonString = preprocessDoubaoBboxJson(cleanJsonString);
411
437
  try {
412
438
  parsed = JSON.parse(jsonrepair(jsonString));
@@ -417,6 +443,6 @@ function safeParseJson(input, modelFamily) {
417
443
  }
418
444
  throw Error(`failed to parse LLM response into JSON. Error - ${String(lastError ?? 'unknown error')}. Response - \n ${input}`);
419
445
  }
420
- export { AIResponseParseError, callAI, callAIWithObjectResponse, callAIWithStringResponse, extractJSONFromCodeBlock, preprocessDoubaoBboxJson, resolveDeepThinkConfig, safeParseJson };
446
+ export { AIResponseParseError, callAI, callAIWithObjectResponse, callAIWithStringResponse, extractJSONFromCodeBlock, preprocessDoubaoBboxJson, resolveReasoningConfig, safeParseJson };
421
447
 
422
448
  //# sourceMappingURL=index.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/service-caller/index.mjs","sources":["../../../../src/ai-model/service-caller/index.ts"],"sourcesContent":["import type { AIUsageInfo, DeepThinkOption } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\n\n// Error class that preserves usage and rawResponse when AI call parsing fails\nexport class AIResponseParseError extends Error {\n usage?: AIUsageInfo;\n rawResponse: string;\n\n constructor(message: string, rawResponse: string, usage?: AIUsageInfo) {\n super(message);\n this.name = 'AIResponseParseError';\n this.rawResponse = rawResponse;\n this.usage = usage;\n }\n}\nimport {\n type IModelConfig,\n MIDSCENE_LANGFUSE_DEBUG,\n MIDSCENE_LANGSMITH_DEBUG,\n MIDSCENE_MODEL_MAX_TOKENS,\n OPENAI_MAX_TOKENS,\n type TModelFamily,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert, ifInBrowser } from '@midscene/shared/utils';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport type { AIArgs } from '../../common';\nimport { isAutoGLM, isUITars } from '../auto-glm/util';\n\nasync function createChatClient({\n modelConfig,\n}: {\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n modelName: string;\n modelDescription: string;\n uiTarsModelVersion?: UITarsModelVersion;\n modelFamily: TModelFamily | undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n modelDescription,\n uiTarsModelVersion,\n modelFamily,\n createOpenAIClient,\n timeout,\n } = modelConfig;\n\n let proxyAgent: any = undefined;\n const warnClient = getDebug('ai:call', { console: true });\n const debugProxy = getDebug('ai:call:proxy');\n const warnProxy = getDebug('ai:call:proxy', { console: true });\n\n // Helper function to sanitize proxy URL for logging (remove credentials)\n // Uses URL API instead of regex to avoid ReDoS vulnerabilities\n const sanitizeProxyUrl = (url: string): string => {\n try {\n const parsed = new URL(url);\n if (parsed.username) {\n // Keep username for debugging, hide password for security\n parsed.password = '****';\n return parsed.href;\n }\n return url;\n } catch {\n // If URL parsing fails, return original URL (will be caught later)\n return url;\n }\n };\n\n if (httpProxy) {\n debugProxy('using http proxy', sanitizeProxyUrl(httpProxy));\n if (ifInBrowser) {\n warnProxy(\n 'HTTP proxy is configured but not supported in browser environment',\n );\n } else {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'undici';\n const { ProxyAgent } = await import(moduleName);\n proxyAgent = new ProxyAgent({\n uri: httpProxy,\n // Note: authentication is handled via the URI (e.g., http://user:pass@proxy.com:8080)\n });\n }\n } else if (socksProxy) {\n debugProxy('using socks proxy', sanitizeProxyUrl(socksProxy));\n if (ifInBrowser) {\n warnProxy(\n 'SOCKS proxy is configured but not supported in browser environment',\n );\n } else {\n try {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'fetch-socks';\n const { socksDispatcher } = await import(moduleName);\n // Parse SOCKS proxy URL (e.g., socks5://127.0.0.1:1080)\n const proxyUrl = new URL(socksProxy);\n\n // Validate hostname\n if (!proxyUrl.hostname) {\n throw new Error('SOCKS proxy URL must include a valid hostname');\n }\n\n // Validate and parse port\n const port = Number.parseInt(proxyUrl.port, 10);\n if (!proxyUrl.port || Number.isNaN(port)) {\n throw new Error('SOCKS proxy URL must include a valid port');\n }\n\n // Parse SOCKS version from protocol\n const protocol = proxyUrl.protocol.replace(':', '');\n const socksType =\n protocol === 'socks4' ? 4 : protocol === 'socks5' ? 5 : 5;\n\n proxyAgent = socksDispatcher({\n type: socksType,\n host: proxyUrl.hostname,\n port,\n ...(proxyUrl.username\n ? {\n userId: decodeURIComponent(proxyUrl.username),\n password: decodeURIComponent(proxyUrl.password || ''),\n }\n : {}),\n });\n debugProxy('socks proxy configured successfully', {\n type: socksType,\n host: proxyUrl.hostname,\n port: port,\n });\n } catch (error) {\n warnProxy('Failed to configure SOCKS proxy:', error);\n throw new Error(\n `Invalid SOCKS proxy URL: ${socksProxy}. Expected format: socks4://host:port, socks5://host:port, or with authentication: socks5://user:pass@host:port`,\n );\n }\n }\n }\n\n const openAIOptions = {\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n // Use fetchOptions.dispatcher for fetch-based SDK instead of httpAgent\n // Note: Type assertion needed due to undici version mismatch between dependencies\n ...(proxyAgent ? { fetchOptions: { dispatcher: proxyAgent as any } } : {}),\n ...openaiExtraConfig,\n ...(typeof timeout === 'number' ? { timeout } : {}),\n dangerouslyAllowBrowser: true,\n };\n\n const baseOpenAI = new OpenAI(openAIOptions);\n\n let openai: OpenAI = baseOpenAI;\n\n // LangSmith wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n warnClient('DEBUGGING MODE: langsmith wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langsmithModule = 'langsmith/wrappers';\n const { wrapOpenAI } = await import(langsmithModule);\n openai = wrapOpenAI(openai);\n }\n\n // Langfuse wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGFUSE_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langfuse is not supported in browser');\n }\n warnClient('DEBUGGING MODE: langfuse wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langfuseModule = '@langfuse/openai';\n const { observeOpenAI } = await import(langfuseModule);\n openai = observeOpenAI(openai);\n }\n\n if (createOpenAIClient) {\n const wrappedClient = await createOpenAIClient(baseOpenAI, openAIOptions);\n\n if (wrappedClient) {\n openai = wrappedClient as OpenAI;\n }\n }\n\n return {\n completion: openai.chat.completions,\n modelName,\n modelDescription,\n uiTarsModelVersion,\n modelFamily,\n };\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: string;\n reasoning_content?: string;\n usage?: AIUsageInfo;\n isStreamed: boolean;\n}> {\n const {\n completion,\n modelName,\n modelDescription,\n uiTarsModelVersion,\n modelFamily,\n } = await createChatClient({\n modelConfig,\n });\n\n const maxTokens =\n globalConfigManager.getEnvConfigValueAsNumber(MIDSCENE_MODEL_MAX_TOKENS) ??\n globalConfigManager.getEnvConfigValueAsNumber(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const warnCall = getDebug('ai:call', { console: true });\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n const temperature = modelConfig.temperature ?? 0;\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let accumulatedReasoning = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n let requestId: string | null | undefined;\n\n const buildUsageInfo = (\n usageData?: OpenAI.CompletionUsage,\n requestId?: string | null,\n ) => {\n if (!usageData) return undefined;\n\n const cachedInputTokens = (\n usageData as { prompt_tokens_details?: { cached_tokens?: number } }\n )?.prompt_tokens_details?.cached_tokens;\n\n return {\n prompt_tokens: usageData.prompt_tokens ?? 0,\n completion_tokens: usageData.completion_tokens ?? 0,\n total_tokens: usageData.total_tokens ?? 0,\n cached_input: cachedInputTokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n request_id: requestId ?? undefined,\n } satisfies AIUsageInfo;\n };\n\n const commonConfig = {\n temperature,\n stream: !!isStreaming,\n max_tokens: maxTokens,\n ...(modelFamily === 'qwen2.5-vl' // qwen vl v2 specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n if (isAutoGLM(modelFamily)) {\n (commonConfig as unknown as Record<string, number>).top_p = 0.85;\n (commonConfig as unknown as Record<string, number>).frequency_penalty = 0.2;\n }\n\n const {\n config: deepThinkConfig,\n debugMessage,\n warningMessage,\n } = resolveDeepThinkConfig({\n deepThink: options?.deepThink,\n modelFamily,\n });\n if (debugMessage) {\n debugCall(debugMessage);\n }\n if (warningMessage) {\n warnCall(warningMessage);\n }\n\n try {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model: modelName,\n messages,\n ...commonConfig,\n ...deepThinkConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n requestId = stream._request_id;\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n accumulatedReasoning += reasoning_content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: buildUsageInfo(usage, requestId),\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${modelFamily || 'default'}, cost-ms, ${timeCost}, temperature, ${temperature ?? ''}`,\n );\n } else {\n // Non-streaming with retry logic\n const retryCount = modelConfig.retryCount ?? 1;\n const retryInterval = modelConfig.retryInterval ?? 2000;\n const maxAttempts = retryCount + 1; // retryCount=1 means 2 total attempts (1 initial + 1 retry)\n\n let lastError: Error | undefined;\n\n for (let attempt = 1; attempt <= maxAttempts; attempt++) {\n try {\n const result = await completion.create({\n model: modelName,\n messages,\n ...commonConfig,\n ...deepThinkConfig,\n } as any);\n\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${modelFamily || 'default'}, ui-tars-version, ${uiTarsModelVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}, temperature, ${temperature ?? ''}`,\n );\n\n debugProfileDetail(\n `model usage detail: ${JSON.stringify(result.usage)}`,\n );\n\n if (!result.choices) {\n throw new Error(\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n }\n\n content = result.choices[0].message.content!;\n accumulatedReasoning =\n (result.choices[0].message as any)?.reasoning_content || '';\n usage = result.usage;\n requestId = result._request_id;\n\n if (\n !content &&\n accumulatedReasoning &&\n modelFamily === 'doubao-vision'\n ) {\n warnCall('empty content from AI model, using reasoning content');\n content = accumulatedReasoning;\n }\n\n if (!content) {\n throw new Error('empty content from AI model');\n }\n\n break; // Success, exit retry loop\n } catch (error) {\n lastError = error as Error;\n if (attempt < maxAttempts) {\n warnCall(\n `AI call failed (attempt ${attempt}/${maxAttempts}), retrying in ${retryInterval}ms... Error: ${lastError.message}`,\n );\n await new Promise((resolve) => setTimeout(resolve, retryInterval));\n }\n }\n }\n\n if (!content) {\n throw lastError;\n }\n }\n\n debugCall(`response reasoning content: ${accumulatedReasoning}`);\n debugCall(`response content: ${content}`);\n\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n } as OpenAI.CompletionUsage;\n }\n\n return {\n content: content || '',\n reasoning_content: accumulatedReasoning || undefined,\n usage: buildUsageInfo(usage, requestId),\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n warnCall('call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service (${modelName}): ${e.message}\\nTrouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport async function callAIWithObjectResponse<T>(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: T;\n contentString: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const response = await callAI(messages, modelConfig, {\n deepThink: options?.deepThink,\n });\n assert(response, 'empty response');\n const modelFamily = modelConfig.modelFamily;\n const jsonContent = safeParseJson(response.content, modelFamily);\n if (typeof jsonContent !== 'object') {\n throw new AIResponseParseError(\n `failed to parse json response from model (${modelConfig.modelName}): ${response.content}`,\n response.content,\n response.usage,\n );\n }\n return {\n content: jsonContent,\n contentString: response.content,\n usage: response.usage,\n reasoning_content: response.reasoning_content,\n };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n modelConfig: IModelConfig,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, modelConfig);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nexport function resolveDeepThinkConfig({\n deepThink,\n modelFamily,\n}: {\n deepThink?: DeepThinkOption;\n modelFamily?: TModelFamily;\n}): {\n config: Record<string, unknown>;\n debugMessage?: string;\n warningMessage?: string;\n} {\n const normalizedDeepThink = deepThink === 'unset' ? undefined : deepThink;\n\n if (normalizedDeepThink === undefined) {\n return { config: {}, debugMessage: undefined };\n }\n\n if (modelFamily === 'qwen3-vl') {\n return {\n config: { enable_thinking: normalizedDeepThink },\n debugMessage: `deepThink mapped to enable_thinking=${normalizedDeepThink} for qwen3-vl`,\n };\n }\n\n if (modelFamily === 'doubao-vision') {\n return {\n config: {\n thinking: { type: normalizedDeepThink ? 'enabled' : 'disabled' },\n },\n debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for doubao-vision`,\n };\n }\n\n if (modelFamily === 'glm-v') {\n return {\n config: {\n thinking: { type: normalizedDeepThink ? 'enabled' : 'disabled' },\n },\n debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for glm-v`,\n };\n }\n\n if (modelFamily === 'gpt-5') {\n return {\n config: normalizedDeepThink\n ? {\n reasoning: { effort: 'high' },\n }\n : {\n reasoning: { effort: 'low' },\n },\n debugMessage: normalizedDeepThink\n ? 'deepThink mapped to reasoning.effort=high for gpt-5'\n : 'deepThink disabled for gpt-5',\n };\n }\n\n return {\n config: {},\n debugMessage: `deepThink ignored: unsupported model_family \"${modelFamily ?? 'default'}\"`,\n warningMessage: `The \"deepThink\" option is not supported for model_family \"${modelFamily ?? 'default'}\".`,\n };\n}\n\n/**\n * Normalize a parsed JSON object by trimming whitespace from:\n * 1. All object keys (e.g., \" prompt \" -> \"prompt\")\n * 2. All string values (e.g., \" Tap \" -> \"Tap\")\n * This handles LLM output that may include leading/trailing spaces.\n */\nfunction normalizeJsonObject(obj: any): any {\n // Handle null and undefined\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n // Handle arrays - recursively normalize each element\n if (Array.isArray(obj)) {\n return obj.map((item) => normalizeJsonObject(item));\n }\n\n // Handle objects\n if (typeof obj === 'object') {\n const normalized: any = {};\n\n for (const [key, value] of Object.entries(obj)) {\n // Trim the key to remove leading/trailing spaces\n const trimmedKey = key.trim();\n\n // Recursively normalize the value\n let normalizedValue = normalizeJsonObject(value);\n\n // Trim all string values\n if (typeof normalizedValue === 'string') {\n normalizedValue = normalizedValue.trim();\n }\n\n normalized[trimmedKey] = normalizedValue;\n }\n\n return normalized;\n }\n\n // Handle primitive strings\n if (typeof obj === 'string') {\n return obj.trim();\n }\n\n // Return other primitives as-is\n return obj;\n}\n\nexport function safeParseJson(\n input: string,\n modelFamily: TModelFamily | undefined,\n) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n\n let parsed: any;\n let lastError: unknown;\n try {\n parsed = JSON.parse(cleanJsonString);\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n try {\n parsed = JSON.parse(jsonrepair(cleanJsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n\n if (modelFamily === 'doubao-vision' || isUITars(modelFamily)) {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n try {\n parsed = JSON.parse(jsonrepair(jsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n }\n throw Error(\n `failed to parse LLM response into JSON. Error - ${String(\n lastError ?? 'unknown error',\n )}. Response - \\n ${input}`,\n );\n}\n"],"names":["AIResponseParseError","Error","message","rawResponse","usage","createChatClient","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","modelDescription","uiTarsModelVersion","modelFamily","createOpenAIClient","timeout","proxyAgent","warnClient","getDebug","debugProxy","warnProxy","sanitizeProxyUrl","url","parsed","URL","ifInBrowser","moduleName","ProxyAgent","socksDispatcher","proxyUrl","port","Number","protocol","socksType","decodeURIComponent","error","openAIOptions","baseOpenAI","OpenAI","openai","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","langsmithModule","wrapOpenAI","MIDSCENE_LANGFUSE_DEBUG","langfuseModule","observeOpenAI","wrappedClient","callAI","messages","options","completion","maxTokens","MIDSCENE_MODEL_MAX_TOKENS","OPENAI_MAX_TOKENS","debugCall","warnCall","debugProfileStats","debugProfileDetail","startTime","Date","temperature","isStreaming","content","accumulated","accumulatedReasoning","timeCost","requestId","buildUsageInfo","usageData","cachedInputTokens","undefined","commonConfig","isAutoGLM","deepThinkConfig","debugMessage","warningMessage","resolveDeepThinkConfig","stream","chunk","reasoning_content","chunkData","estimatedTokens","Math","finalChunk","retryCount","retryInterval","maxAttempts","lastError","attempt","result","JSON","Promise","resolve","setTimeout","e","newError","callAIWithObjectResponse","response","assert","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","deepThink","normalizedDeepThink","normalizeJsonObject","obj","Array","item","normalized","key","value","Object","trimmedKey","normalizedValue","cleanJsonString","jsonrepair","isUITars","jsonString","String"],"mappings":";;;;;;;;;;;;;;;;AAIO,MAAMA,6BAA6BC;IAIxC,YAAYC,OAAe,EAAEC,WAAmB,EAAEC,KAAmB,CAAE;QACrE,KAAK,CAACF,UAJR,yCACA;QAIE,IAAI,CAAC,IAAI,GAAG;QACZ,IAAI,CAAC,WAAW,GAAGC;QACnB,IAAI,CAAC,KAAK,GAAGC;IACf;AACF;AAqBA,eAAeC,iBAAiB,EAC9BC,WAAW,EAGZ;IAOC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,gBAAgB,EAChBC,kBAAkB,EAClBC,WAAW,EACXC,kBAAkB,EAClBC,OAAO,EACR,GAAGX;IAEJ,IAAIY;IACJ,MAAMC,aAAaC,SAAS,WAAW;QAAE,SAAS;IAAK;IACvD,MAAMC,aAAaD,SAAS;IAC5B,MAAME,YAAYF,SAAS,iBAAiB;QAAE,SAAS;IAAK;IAI5D,MAAMG,mBAAmB,CAACC;QACxB,IAAI;YACF,MAAMC,SAAS,IAAIC,IAAIF;YACvB,IAAIC,OAAO,QAAQ,EAAE;gBAEnBA,OAAO,QAAQ,GAAG;gBAClB,OAAOA,OAAO,IAAI;YACpB;YACA,OAAOD;QACT,EAAE,OAAM;YAEN,OAAOA;QACT;IACF;IAEA,IAAIhB,WAAW;QACba,WAAW,oBAAoBE,iBAAiBf;QAChD,IAAImB,aACFL,UACE;aAEG;YAEL,MAAMM,aAAa;YACnB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;YACpCV,aAAa,IAAIW,WAAW;gBAC1B,KAAKrB;YAEP;QACF;IACF,OAAO,IAAID,YAAY;QACrBc,WAAW,qBAAqBE,iBAAiBhB;QACjD,IAAIoB,aACFL,UACE;aAGF,IAAI;YAEF,MAAMM,aAAa;YACnB,MAAM,EAAEE,eAAe,EAAE,GAAG,MAAM,MAAM,CAACF;YAEzC,MAAMG,WAAW,IAAIL,IAAInB;YAGzB,IAAI,CAACwB,SAAS,QAAQ,EACpB,MAAM,IAAI9B,MAAM;YAIlB,MAAM+B,OAAOC,OAAO,QAAQ,CAACF,SAAS,IAAI,EAAE;YAC5C,IAAI,CAACA,SAAS,IAAI,IAAIE,OAAO,KAAK,CAACD,OACjC,MAAM,IAAI/B,MAAM;YAIlB,MAAMiC,WAAWH,SAAS,QAAQ,CAAC,OAAO,CAAC,KAAK;YAChD,MAAMI,YACJD,AAAa,aAAbA,WAAwB,IAAIA,AAAa,aAAbA,WAAwB,IAAI;YAE1DhB,aAAaY,gBAAgB;gBAC3B,MAAMK;gBACN,MAAMJ,SAAS,QAAQ;gBACvBC;gBACA,GAAID,SAAS,QAAQ,GACjB;oBACE,QAAQK,mBAAmBL,SAAS,QAAQ;oBAC5C,UAAUK,mBAAmBL,SAAS,QAAQ,IAAI;gBACpD,IACA,CAAC,CAAC;YACR;YACAV,WAAW,uCAAuC;gBAChD,MAAMc;gBACN,MAAMJ,SAAS,QAAQ;gBACvB,MAAMC;YACR;QACF,EAAE,OAAOK,OAAO;YACdf,UAAU,oCAAoCe;YAC9C,MAAM,IAAIpC,MACR,CAAC,yBAAyB,EAAEM,WAAW,+GAA+G,CAAC;QAE3J;IAEJ;IAEA,MAAM+B,gBAAgB;QACpB,SAAS5B;QACT,QAAQC;QAGR,GAAIO,aAAa;YAAE,cAAc;gBAAE,YAAYA;YAAkB;QAAE,IAAI,CAAC,CAAC;QACzE,GAAGN,iBAAiB;QACpB,GAAI,AAAmB,YAAnB,OAAOK,UAAuB;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAClD,yBAAyB;IAC3B;IAEA,MAAMsB,aAAa,IAAIC,SAAOF;IAE9B,IAAIG,SAAiBF;IAGrB,IACEE,UACAC,oBAAoB,qBAAqB,CAACC,2BAC1C;QACA,IAAIhB,aACF,MAAM,IAAI1B,MAAM;QAElBkB,WAAW;QAEX,MAAMyB,kBAAkB;QACxB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;QACpCH,SAASI,WAAWJ;IACtB;IAGA,IACEA,UACAC,oBAAoB,qBAAqB,CAACI,0BAC1C;QACA,IAAInB,aACF,MAAM,IAAI1B,MAAM;QAElBkB,WAAW;QAEX,MAAM4B,iBAAiB;QACvB,MAAM,EAAEC,aAAa,EAAE,GAAG,MAAM,MAAM,CAACD;QACvCN,SAASO,cAAcP;IACzB;IAEA,IAAIzB,oBAAoB;QACtB,MAAMiC,gBAAgB,MAAMjC,mBAAmBuB,YAAYD;QAE3D,IAAIW,eACFR,SAASQ;IAEb;IAEA,OAAO;QACL,YAAYR,OAAO,IAAI,CAAC,WAAW;QACnChC;QACAI;QACAC;QACAC;IACF;AACF;AAEO,eAAemC,OACpBC,QAAsC,EACtC7C,WAAyB,EACzB8C,OAIC;IAOD,MAAM,EACJC,UAAU,EACV5C,SAAS,EACTI,gBAAgB,EAChBC,kBAAkB,EAClBC,WAAW,EACZ,GAAG,MAAMV,iBAAiB;QACzBC;IACF;IAEA,MAAMgD,YACJZ,oBAAoB,yBAAyB,CAACa,8BAC9Cb,oBAAoB,yBAAyB,CAACc;IAChD,MAAMC,YAAYrC,SAAS;IAC3B,MAAMsC,WAAWtC,SAAS,WAAW;QAAE,SAAS;IAAK;IACrD,MAAMuC,oBAAoBvC,SAAS;IACnC,MAAMwC,qBAAqBxC,SAAS;IAEpC,MAAMyC,YAAYC,KAAK,GAAG;IAC1B,MAAMC,cAAczD,YAAY,WAAW,IAAI;IAE/C,MAAM0D,cAAcZ,SAAS,UAAUA,SAAS;IAChD,IAAIa;IACJ,IAAIC,cAAc;IAClB,IAAIC,uBAAuB;IAC3B,IAAI/D;IACJ,IAAIgE;IACJ,IAAIC;IAEJ,MAAMC,iBAAiB,CACrBC,WACAF;QAEA,IAAI,CAACE,WAAW;QAEhB,MAAMC,oBACJD,WACC,uBAAuB;QAE1B,OAAO;YACL,eAAeA,UAAU,aAAa,IAAI;YAC1C,mBAAmBA,UAAU,iBAAiB,IAAI;YAClD,cAAcA,UAAU,YAAY,IAAI;YACxC,cAAcC,qBAAqB;YACnC,WAAWJ,YAAY;YACvB,YAAY3D;YACZ,mBAAmBI;YACnB,QAAQP,YAAY,MAAM;YAC1B,YAAY+D,aAAaI;QAC3B;IACF;IAEA,MAAMC,eAAe;QACnBX;QACA,QAAQ,CAAC,CAACC;QACV,YAAYV;QACZ,GAAIvC,AAAgB,iBAAhBA,cACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAI4D,UAAU5D,cAAc;QACzB2D,aAAmD,KAAK,GAAG;QAC3DA,aAAmD,iBAAiB,GAAG;IAC1E;IAEA,MAAM,EACJ,QAAQE,eAAe,EACvBC,YAAY,EACZC,cAAc,EACf,GAAGC,uBAAuB;QACzB,WAAW3B,SAAS;QACpBrC;IACF;IACA,IAAI8D,cACFpB,UAAUoB;IAEZ,IAAIC,gBACFpB,SAASoB;IAGX,IAAI;QACFrB,UACE,CAAC,QAAQ,EAAEO,cAAc,eAAe,GAAG,WAAW,EAAEvD,WAAW;QAGrE,IAAIuD,aAAa;YACf,MAAMgB,SAAU,MAAM3B,WAAW,MAAM,CACrC;gBACE,OAAO5C;gBACP0C;gBACA,GAAGuB,YAAY;gBACf,GAAGE,eAAe;YACpB,GACA;gBACE,QAAQ;YACV;YAKFP,YAAYW,OAAO,WAAW;YAE9B,WAAW,MAAMC,SAASD,OAAQ;gBAChC,MAAMf,UAAUgB,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAO,WAAW;gBACtD,MAAMC,oBACHD,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAe,qBAAqB;gBAG3D,IAAIA,MAAM,KAAK,EACb7E,QAAQ6E,MAAM,KAAK;gBAGrB,IAAIhB,WAAWiB,mBAAmB;oBAChChB,eAAeD;oBACfE,wBAAwBe;oBACxB,MAAMC,YAAiC;wBACrClB;wBACAiB;wBACAhB;wBACA,YAAY;wBACZ,OAAOO;oBACT;oBACArB,QAAQ,OAAO,CAAE+B;gBACnB;gBAGA,IAAIF,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,eAAe;oBACrCb,WAAWN,KAAK,GAAG,KAAKD;oBAGxB,IAAI,CAACzD,OAAO;wBAEV,MAAMgF,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACnB,YAAY,MAAM,GAAG;wBAElC9D,QAAQ;4BACN,eAAegF;4BACf,mBAAmBA;4BACnB,cAAcA,AAAkB,IAAlBA;wBAChB;oBACF;oBAGA,MAAME,aAAkC;wBACtC,SAAS;wBACTpB;wBACA,mBAAmB;wBACnB,YAAY;wBACZ,OAAOI,eAAelE,OAAOiE;oBAC/B;oBACAjB,QAAQ,OAAO,CAAEkC;oBACjB;gBACF;YACF;YACArB,UAAUC;YACVP,kBACE,CAAC,iBAAiB,EAAElD,UAAU,QAAQ,EAAEM,eAAe,UAAU,WAAW,EAAEqD,SAAS,eAAe,EAAEL,eAAe,IAAI;QAE/H,OAAO;YAEL,MAAMwB,aAAajF,YAAY,UAAU,IAAI;YAC7C,MAAMkF,gBAAgBlF,YAAY,aAAa,IAAI;YACnD,MAAMmF,cAAcF,aAAa;YAEjC,IAAIG;YAEJ,IAAK,IAAIC,UAAU,GAAGA,WAAWF,aAAaE,UAC5C,IAAI;gBACF,MAAMC,SAAS,MAAMvC,WAAW,MAAM,CAAC;oBACrC,OAAO5C;oBACP0C;oBACA,GAAGuB,YAAY;oBACf,GAAGE,eAAe;gBACpB;gBAEAR,WAAWN,KAAK,GAAG,KAAKD;gBAExBF,kBACE,CAAC,OAAO,EAAElD,UAAU,QAAQ,EAAEM,eAAe,UAAU,mBAAmB,EAAED,mBAAmB,iBAAiB,EAAE8E,OAAO,KAAK,EAAE,iBAAiB,GAAG,qBAAqB,EAAEA,OAAO,KAAK,EAAE,qBAAqB,GAAG,gBAAgB,EAAEA,OAAO,KAAK,EAAE,gBAAgB,GAAG,WAAW,EAAExB,SAAS,aAAa,EAAEwB,OAAO,WAAW,IAAI,GAAG,eAAe,EAAE7B,eAAe,IAAI;gBAGxWH,mBACE,CAAC,oBAAoB,EAAEiC,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;gBAGvD,IAAI,CAACA,OAAO,OAAO,EACjB,MAAM,IAAI3F,MACR,CAAC,mCAAmC,EAAE4F,KAAK,SAAS,CAACD,SAAS;gBAIlE3B,UAAU2B,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;gBAC3CzB,uBACGyB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,EAAU,qBAAqB;gBAC3DxF,QAAQwF,OAAO,KAAK;gBACpBvB,YAAYuB,OAAO,WAAW;gBAE9B,IACE,CAAC3B,WACDE,wBACApD,AAAgB,oBAAhBA,aACA;oBACA2C,SAAS;oBACTO,UAAUE;gBACZ;gBAEA,IAAI,CAACF,SACH,MAAM,IAAIhE,MAAM;gBAGlB;YACF,EAAE,OAAOoC,OAAO;gBACdqD,YAAYrD;gBACZ,IAAIsD,UAAUF,aAAa;oBACzB/B,SACE,CAAC,wBAAwB,EAAEiC,QAAQ,CAAC,EAAEF,YAAY,eAAe,EAAED,cAAc,aAAa,EAAEE,UAAU,OAAO,EAAE;oBAErH,MAAM,IAAII,QAAQ,CAACC,UAAYC,WAAWD,SAASP;gBACrD;YACF;YAGF,IAAI,CAACvB,SACH,MAAMyB;QAEV;QAEAjC,UAAU,CAAC,4BAA4B,EAAEU,sBAAsB;QAC/DV,UAAU,CAAC,kBAAkB,EAAEQ,SAAS;QAGxC,IAAID,eAAe,CAAC5D,OAAO;YAEzB,MAAMgF,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEpB,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtC7D,QAAQ;gBACN,eAAegF;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASnB,WAAW;YACpB,mBAAmBE,wBAAwBM;YAC3C,OAAOH,eAAelE,OAAOiE;YAC7B,YAAY,CAAC,CAACL;QAChB;IACF,EAAE,OAAOiC,GAAQ;QACfvC,SAAS,iBAAiBuC;QAC1B,MAAMC,WAAW,IAAIjG,MACnB,CAAC,eAAe,EAAE+D,cAAc,eAAe,GAAG,kBAAkB,EAAEvD,UAAU,GAAG,EAAEwF,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC9J;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,eAAeC,yBACpBhD,QAAsC,EACtC7C,WAAyB,EACzB8C,OAEC;IAOD,MAAMgD,WAAW,MAAMlD,OAAOC,UAAU7C,aAAa;QACnD,WAAW8C,SAAS;IACtB;IACAiD,OAAOD,UAAU;IACjB,MAAMrF,cAAcT,YAAY,WAAW;IAC3C,MAAMgG,cAAcC,cAAcH,SAAS,OAAO,EAAErF;IACpD,IAAI,AAAuB,YAAvB,OAAOuF,aACT,MAAM,IAAItG,qBACR,CAAC,0CAA0C,EAAEM,YAAY,SAAS,CAAC,GAAG,EAAE8F,SAAS,OAAO,EAAE,EAC1FA,SAAS,OAAO,EAChBA,SAAS,KAAK;IAGlB,OAAO;QACL,SAASE;QACT,eAAeF,SAAS,OAAO;QAC/B,OAAOA,SAAS,KAAK;QACrB,mBAAmBA,SAAS,iBAAiB;IAC/C;AACF;AAEO,eAAeI,yBACpBC,IAAY,EACZnG,WAAyB;IAEzB,MAAM,EAAE2D,OAAO,EAAE7D,KAAK,EAAE,GAAG,MAAM8C,OAAOuD,MAAMnG;IAC9C,OAAO;QAAE2D;QAAS7D;IAAM;AAC1B;AAEO,SAASsG,yBAAyBN,QAAgB;IACvD,IAAI;QAEF,MAAMO,YAAYP,SAAS,KAAK,CAAC;QACjC,IAAIO,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBR,SAAS,KAAK,CACnC;QAEF,IAAIQ,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBT,SAAS,KAAK,CAAC;QACrC,IAAIS,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOT;AACT;AAEO,SAASU,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEO,SAAShC,uBAAuB,EACrCiC,SAAS,EACTjG,WAAW,EAIZ;IAKC,MAAMkG,sBAAsBD,AAAc,YAAdA,YAAwBvC,SAAYuC;IAEhE,IAAIC,AAAwBxC,WAAxBwC,qBACF,OAAO;QAAE,QAAQ,CAAC;QAAG,cAAcxC;IAAU;IAG/C,IAAI1D,AAAgB,eAAhBA,aACF,OAAO;QACL,QAAQ;YAAE,iBAAiBkG;QAAoB;QAC/C,cAAc,CAAC,oCAAoC,EAAEA,oBAAoB,aAAa,CAAC;IACzF;IAGF,IAAIlG,AAAgB,oBAAhBA,aACF,OAAO;QACL,QAAQ;YACN,UAAU;gBAAE,MAAMkG,sBAAsB,YAAY;YAAW;QACjE;QACA,cAAc,CAAC,kCAAkC,EAAEA,sBAAsB,YAAY,WAAW,kBAAkB,CAAC;IACrH;IAGF,IAAIlG,AAAgB,YAAhBA,aACF,OAAO;QACL,QAAQ;YACN,UAAU;gBAAE,MAAMkG,sBAAsB,YAAY;YAAW;QACjE;QACA,cAAc,CAAC,kCAAkC,EAAEA,sBAAsB,YAAY,WAAW,UAAU,CAAC;IAC7G;IAGF,IAAIlG,AAAgB,YAAhBA,aACF,OAAO;QACL,QAAQkG,sBACJ;YACE,WAAW;gBAAE,QAAQ;YAAO;QAC9B,IACA;YACE,WAAW;gBAAE,QAAQ;YAAM;QAC7B;QACJ,cAAcA,sBACV,wDACA;IACN;IAGF,OAAO;QACL,QAAQ,CAAC;QACT,cAAc,CAAC,6CAA6C,EAAElG,eAAe,UAAU,CAAC,CAAC;QACzF,gBAAgB,CAAC,0DAA0D,EAAEA,eAAe,UAAU,EAAE,CAAC;IAC3G;AACF;AAQA,SAASmG,oBAAoBC,GAAQ;IAEnC,IAAIA,QAAAA,KACF,OAAOA;IAIT,IAAIC,MAAM,OAAO,CAACD,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACE,OAASH,oBAAoBG;IAI/C,IAAI,AAAe,YAAf,OAAOF,KAAkB;QAC3B,MAAMG,aAAkB,CAAC;QAEzB,KAAK,MAAM,CAACC,KAAKC,MAAM,IAAIC,OAAO,OAAO,CAACN,KAAM;YAE9C,MAAMO,aAAaH,IAAI,IAAI;YAG3B,IAAII,kBAAkBT,oBAAoBM;YAG1C,IAAI,AAA2B,YAA3B,OAAOG,iBACTA,kBAAkBA,gBAAgB,IAAI;YAGxCL,UAAU,CAACI,WAAW,GAAGC;QAC3B;QAEA,OAAOL;IACT;IAGA,IAAI,AAAe,YAAf,OAAOH,KACT,OAAOA,IAAI,IAAI;IAIjB,OAAOA;AACT;AAEO,SAASZ,cACdQ,KAAa,EACbhG,WAAqC;IAErC,MAAM6G,kBAAkBlB,yBAAyBK;IAEjD,IAAIa,iBAAiB,MAAM,oBACzB,OAAOA,gBACJ,KAAK,CAAC,oBACL,MAAM,GACP,IAAI3F;IAGT,IAAIR;IACJ,IAAIiE;IACJ,IAAI;QACFjE,SAASoE,KAAK,KAAK,CAAC+B;QACpB,OAAOV,oBAAoBzF;IAC7B,EAAE,OAAOY,OAAO;QACdqD,YAAYrD;IACd;IACA,IAAI;QACFZ,SAASoE,KAAK,KAAK,CAACgC,WAAWD;QAC/B,OAAOV,oBAAoBzF;IAC7B,EAAE,OAAOY,OAAO;QACdqD,YAAYrD;IACd;IAEA,IAAItB,AAAgB,oBAAhBA,eAAmC+G,SAAS/G,cAAc;QAC5D,MAAMgH,aAAajB,yBAAyBc;QAC5C,IAAI;YACFnG,SAASoE,KAAK,KAAK,CAACgC,WAAWE;YAC/B,OAAOb,oBAAoBzF;QAC7B,EAAE,OAAOY,OAAO;YACdqD,YAAYrD;QACd;IACF;IACA,MAAMpC,MACJ,CAAC,gDAAgD,EAAE+H,OACjDtC,aAAa,iBACb,gBAAgB,EAAEqB,OAAO;AAE/B"}
1
+ {"version":3,"file":"ai-model/service-caller/index.mjs","sources":["../../../../src/ai-model/service-caller/index.ts"],"sourcesContent":["import type { AIUsageInfo, DeepThinkOption } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\n\n// Error class that preserves usage and rawResponse when AI call parsing fails\nexport class AIResponseParseError extends Error {\n usage?: AIUsageInfo;\n rawResponse: string;\n\n constructor(message: string, rawResponse: string, usage?: AIUsageInfo) {\n super(message);\n this.name = 'AIResponseParseError';\n this.rawResponse = rawResponse;\n this.usage = usage;\n }\n}\nimport {\n type IModelConfig,\n MIDSCENE_LANGFUSE_DEBUG,\n MIDSCENE_LANGSMITH_DEBUG,\n MIDSCENE_MODEL_MAX_TOKENS,\n OPENAI_MAX_TOKENS,\n type TModelFamily,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert, ifInBrowser } from '@midscene/shared/utils';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport type { AIArgs } from '../../common';\nimport { isAutoGLM, isUITars } from '../auto-glm/util';\n\nasync function createChatClient({\n modelConfig,\n}: {\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n modelName: string;\n modelDescription: string;\n uiTarsModelVersion?: UITarsModelVersion;\n modelFamily: TModelFamily | undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n modelDescription,\n uiTarsModelVersion,\n modelFamily,\n createOpenAIClient,\n timeout,\n } = modelConfig;\n\n let proxyAgent: any = undefined;\n const warnClient = getDebug('ai:call', { console: true });\n const debugProxy = getDebug('ai:call:proxy');\n const warnProxy = getDebug('ai:call:proxy', { console: true });\n\n // Helper function to sanitize proxy URL for logging (remove credentials)\n // Uses URL API instead of regex to avoid ReDoS vulnerabilities\n const sanitizeProxyUrl = (url: string): string => {\n try {\n const parsed = new URL(url);\n if (parsed.username) {\n // Keep username for debugging, hide password for security\n parsed.password = '****';\n return parsed.href;\n }\n return url;\n } catch {\n // If URL parsing fails, return original URL (will be caught later)\n return url;\n }\n };\n\n if (httpProxy) {\n debugProxy('using http proxy', sanitizeProxyUrl(httpProxy));\n if (ifInBrowser) {\n warnProxy(\n 'HTTP proxy is configured but not supported in browser environment',\n );\n } else {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'undici';\n const { ProxyAgent } = await import(moduleName);\n proxyAgent = new ProxyAgent({\n uri: httpProxy,\n // Note: authentication is handled via the URI (e.g., http://user:pass@proxy.com:8080)\n });\n }\n } else if (socksProxy) {\n debugProxy('using socks proxy', sanitizeProxyUrl(socksProxy));\n if (ifInBrowser) {\n warnProxy(\n 'SOCKS proxy is configured but not supported in browser environment',\n );\n } else {\n try {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'fetch-socks';\n const { socksDispatcher } = await import(moduleName);\n // Parse SOCKS proxy URL (e.g., socks5://127.0.0.1:1080)\n const proxyUrl = new URL(socksProxy);\n\n // Validate hostname\n if (!proxyUrl.hostname) {\n throw new Error('SOCKS proxy URL must include a valid hostname');\n }\n\n // Validate and parse port\n const port = Number.parseInt(proxyUrl.port, 10);\n if (!proxyUrl.port || Number.isNaN(port)) {\n throw new Error('SOCKS proxy URL must include a valid port');\n }\n\n // Parse SOCKS version from protocol\n const protocol = proxyUrl.protocol.replace(':', '');\n const socksType =\n protocol === 'socks4' ? 4 : protocol === 'socks5' ? 5 : 5;\n\n proxyAgent = socksDispatcher({\n type: socksType,\n host: proxyUrl.hostname,\n port,\n ...(proxyUrl.username\n ? {\n userId: decodeURIComponent(proxyUrl.username),\n password: decodeURIComponent(proxyUrl.password || ''),\n }\n : {}),\n });\n debugProxy('socks proxy configured successfully', {\n type: socksType,\n host: proxyUrl.hostname,\n port: port,\n });\n } catch (error) {\n warnProxy('Failed to configure SOCKS proxy:', error);\n throw new Error(\n `Invalid SOCKS proxy URL: ${socksProxy}. Expected format: socks4://host:port, socks5://host:port, or with authentication: socks5://user:pass@host:port`,\n );\n }\n }\n }\n\n const openAIOptions = {\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n // Use fetchOptions.dispatcher for fetch-based SDK instead of httpAgent\n // Note: Type assertion needed due to undici version mismatch between dependencies\n ...(proxyAgent ? { fetchOptions: { dispatcher: proxyAgent as any } } : {}),\n ...openaiExtraConfig,\n ...(typeof timeout === 'number' ? { timeout } : {}),\n dangerouslyAllowBrowser: true,\n };\n\n const baseOpenAI = new OpenAI(openAIOptions);\n\n let openai: OpenAI = baseOpenAI;\n\n // LangSmith wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n warnClient('DEBUGGING MODE: langsmith wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langsmithModule = 'langsmith/wrappers';\n const { wrapOpenAI } = await import(langsmithModule);\n openai = wrapOpenAI(openai);\n }\n\n // Langfuse wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGFUSE_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langfuse is not supported in browser');\n }\n warnClient('DEBUGGING MODE: langfuse wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langfuseModule = '@langfuse/openai';\n const { observeOpenAI } = await import(langfuseModule);\n openai = observeOpenAI(openai);\n }\n\n if (createOpenAIClient) {\n const wrappedClient = await createOpenAIClient(baseOpenAI, openAIOptions);\n\n if (wrappedClient) {\n openai = wrappedClient as OpenAI;\n }\n }\n\n return {\n completion: openai.chat.completions,\n modelName,\n modelDescription,\n uiTarsModelVersion,\n modelFamily,\n };\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: string;\n reasoning_content?: string;\n usage?: AIUsageInfo;\n isStreamed: boolean;\n}> {\n const {\n completion,\n modelName,\n modelDescription,\n uiTarsModelVersion,\n modelFamily,\n } = await createChatClient({\n modelConfig,\n });\n\n const maxTokens =\n globalConfigManager.getEnvConfigValueAsNumber(MIDSCENE_MODEL_MAX_TOKENS) ??\n globalConfigManager.getEnvConfigValueAsNumber(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const warnCall = getDebug('ai:call', { console: true });\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n const temperature = modelConfig.temperature ?? 0;\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let accumulatedReasoning = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n let requestId: string | null | undefined;\n\n const buildUsageInfo = (\n usageData?: OpenAI.CompletionUsage,\n requestId?: string | null,\n ) => {\n if (!usageData) return undefined;\n\n const cachedInputTokens = (\n usageData as { prompt_tokens_details?: { cached_tokens?: number } }\n )?.prompt_tokens_details?.cached_tokens;\n\n return {\n prompt_tokens: usageData.prompt_tokens ?? 0,\n completion_tokens: usageData.completion_tokens ?? 0,\n total_tokens: usageData.total_tokens ?? 0,\n cached_input: cachedInputTokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n request_id: requestId ?? undefined,\n } satisfies AIUsageInfo;\n };\n\n const commonConfig = {\n temperature,\n stream: !!isStreaming,\n max_tokens: maxTokens,\n ...(modelFamily === 'qwen2.5-vl' // qwen vl v2 specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n if (isAutoGLM(modelFamily)) {\n (commonConfig as unknown as Record<string, number>).top_p = 0.85;\n (commonConfig as unknown as Record<string, number>).frequency_penalty = 0.2;\n }\n\n // Merge deepThink (per-request boolean) with reasoning config (model-level)\n // deepThink takes priority as a per-request override for reasoningEnabled\n const mergedEnableReasoning = (() => {\n const normalizedDeepThink =\n options?.deepThink === 'unset' ? undefined : options?.deepThink;\n if (normalizedDeepThink === true) return true;\n if (normalizedDeepThink === false) return false;\n return modelConfig.reasoningEnabled;\n })();\n\n const {\n config: reasoningEffortConfig,\n debugMessage: reasoningEffortDebugMessage,\n warningMessage,\n } = resolveReasoningConfig({\n reasoningEnabled: mergedEnableReasoning,\n reasoningEffort: modelConfig.reasoningEffort,\n reasoningBudget: modelConfig.reasoningBudget,\n modelFamily,\n });\n if (reasoningEffortDebugMessage) {\n debugCall(reasoningEffortDebugMessage);\n }\n if (warningMessage) {\n warnCall(warningMessage);\n }\n\n try {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model: modelName,\n messages,\n ...commonConfig,\n ...reasoningEffortConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n requestId = stream._request_id;\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n accumulatedReasoning += reasoning_content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: buildUsageInfo(usage, requestId),\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${modelFamily || 'default'}, cost-ms, ${timeCost}, temperature, ${temperature ?? ''}`,\n );\n } else {\n // Non-streaming with retry logic\n const retryCount = modelConfig.retryCount ?? 1;\n const retryInterval = modelConfig.retryInterval ?? 2000;\n const maxAttempts = retryCount + 1; // retryCount=1 means 2 total attempts (1 initial + 1 retry)\n\n let lastError: Error | undefined;\n\n for (let attempt = 1; attempt <= maxAttempts; attempt++) {\n try {\n const result = await completion.create({\n model: modelName,\n messages,\n ...commonConfig,\n ...reasoningEffortConfig,\n } as any);\n\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${modelFamily || 'default'}, ui-tars-version, ${uiTarsModelVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}, temperature, ${temperature ?? ''}`,\n );\n\n debugProfileDetail(\n `model usage detail: ${JSON.stringify(result.usage)}`,\n );\n\n if (!result.choices) {\n throw new Error(\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n }\n\n content = result.choices[0].message.content!;\n accumulatedReasoning =\n (result.choices[0].message as any)?.reasoning_content || '';\n usage = result.usage;\n requestId = result._request_id;\n\n if (\n !content &&\n accumulatedReasoning &&\n (modelFamily === 'doubao-vision' || modelFamily === 'doubao-seed')\n ) {\n warnCall('empty content from AI model, using reasoning content');\n content = accumulatedReasoning;\n }\n\n if (!content) {\n throw new Error('empty content from AI model');\n }\n\n break; // Success, exit retry loop\n } catch (error) {\n lastError = error as Error;\n if (attempt < maxAttempts) {\n warnCall(\n `AI call failed (attempt ${attempt}/${maxAttempts}), retrying in ${retryInterval}ms... Error: ${lastError.message}`,\n );\n await new Promise((resolve) => setTimeout(resolve, retryInterval));\n }\n }\n }\n\n if (!content) {\n throw lastError;\n }\n }\n\n debugCall(`response reasoning content: ${accumulatedReasoning}`);\n debugCall(`response content: ${content}`);\n\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n } as OpenAI.CompletionUsage;\n }\n\n return {\n content: content || '',\n reasoning_content: accumulatedReasoning || undefined,\n usage: buildUsageInfo(usage, requestId),\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n warnCall('call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service (${modelName}): ${e.message}\\nTrouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport async function callAIWithObjectResponse<T>(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: T;\n contentString: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const response = await callAI(messages, modelConfig, {\n deepThink: options?.deepThink,\n });\n assert(response, 'empty response');\n const modelFamily = modelConfig.modelFamily;\n const jsonContent = safeParseJson(response.content, modelFamily);\n if (typeof jsonContent !== 'object') {\n throw new AIResponseParseError(\n `failed to parse json response from model (${modelConfig.modelName}): ${response.content}`,\n response.content,\n response.usage,\n );\n }\n return {\n content: jsonContent,\n contentString: response.content,\n usage: response.usage,\n reasoning_content: response.reasoning_content,\n };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n modelConfig: IModelConfig,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, modelConfig);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nexport function resolveReasoningConfig({\n reasoningEnabled,\n reasoningEffort,\n reasoningBudget,\n modelFamily,\n}: {\n reasoningEnabled?: boolean;\n reasoningEffort?: string;\n reasoningBudget?: number;\n modelFamily?: TModelFamily;\n}): {\n config: Record<string, unknown>;\n debugMessage?: string;\n warningMessage?: string;\n} {\n // No reasoning params set at all\n if (\n reasoningEnabled === undefined &&\n !reasoningEffort &&\n reasoningBudget === undefined\n ) {\n return { config: {} };\n }\n\n const debugMessages: string[] = [];\n const config: Record<string, unknown> = {};\n\n if (modelFamily === 'qwen3-vl' || modelFamily === 'qwen3.5') {\n // reasoningEnabled → enable_thinking\n if (reasoningEnabled !== undefined) {\n config.enable_thinking = reasoningEnabled;\n debugMessages.push(`enable_thinking=${reasoningEnabled}`);\n }\n // reasoningBudget → thinking_budget\n if (reasoningBudget !== undefined) {\n config.thinking_budget = reasoningBudget;\n debugMessages.push(`thinking_budget=${reasoningBudget}`);\n }\n // reasoningEffort is ignored for qwen\n } else if (modelFamily === 'doubao-vision' || modelFamily === 'doubao-seed') {\n // reasoningEnabled → thinking.type\n if (reasoningEnabled !== undefined) {\n config.thinking = {\n type: reasoningEnabled ? 'enabled' : 'disabled',\n };\n debugMessages.push(\n `thinking.type=${reasoningEnabled ? 'enabled' : 'disabled'}`,\n );\n }\n // reasoningEffort → reasoning_effort\n if (reasoningEffort) {\n config.reasoning_effort = reasoningEffort;\n debugMessages.push(`reasoning_effort=\"${reasoningEffort}\"`);\n }\n // reasoningBudget is ignored for doubao\n } else if (modelFamily === 'glm-v') {\n // reasoningEnabled → thinking.type\n if (reasoningEnabled !== undefined) {\n config.thinking = {\n type: reasoningEnabled ? 'enabled' : 'disabled',\n };\n debugMessages.push(\n `thinking.type=${reasoningEnabled ? 'enabled' : 'disabled'}`,\n );\n }\n // reasoningEffort and reasoningBudget are ignored for glm-v\n } else if (modelFamily === 'gpt-5') {\n // reasoningEffort → reasoning.effort\n if (reasoningEffort) {\n config.reasoning = { effort: reasoningEffort };\n debugMessages.push(`reasoning.effort=\"${reasoningEffort}\"`);\n } else if (reasoningEnabled === true) {\n config.reasoning = { effort: 'high' };\n debugMessages.push('reasoning.effort=\"high\" (from reasoningEnabled)');\n } else if (reasoningEnabled === false) {\n config.reasoning = { effort: 'low' };\n debugMessages.push('reasoning.effort=\"low\" (from reasoningEnabled)');\n }\n // reasoningBudget is ignored for gpt-5\n } else if (!modelFamily) {\n return {\n config: {},\n debugMessage: 'reasoning config ignored: no model_family configured',\n warningMessage:\n 'Reasoning config is set but no model_family is configured. Set MIDSCENE_MODEL_FAMILY to enable reasoning config pass-through.',\n };\n } else {\n // For unknown model families, pass reasoning_effort directly as a best-effort default\n if (reasoningEffort) {\n config.reasoning_effort = reasoningEffort;\n debugMessages.push(`reasoning_effort=\"${reasoningEffort}\"`);\n }\n }\n\n return {\n config,\n debugMessage: debugMessages.length\n ? `reasoning config for ${modelFamily}: ${debugMessages.join(', ')}`\n : undefined,\n };\n}\n\n/**\n * Normalize a parsed JSON object by trimming whitespace from:\n * 1. All object keys (e.g., \" prompt \" -> \"prompt\")\n * 2. All string values (e.g., \" Tap \" -> \"Tap\")\n * This handles LLM output that may include leading/trailing spaces.\n */\nfunction normalizeJsonObject(obj: any): any {\n // Handle null and undefined\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n // Handle arrays - recursively normalize each element\n if (Array.isArray(obj)) {\n return obj.map((item) => normalizeJsonObject(item));\n }\n\n // Handle objects\n if (typeof obj === 'object') {\n const normalized: any = {};\n\n for (const [key, value] of Object.entries(obj)) {\n // Trim the key to remove leading/trailing spaces\n const trimmedKey = key.trim();\n\n // Recursively normalize the value\n let normalizedValue = normalizeJsonObject(value);\n\n // Trim all string values\n if (typeof normalizedValue === 'string') {\n normalizedValue = normalizedValue.trim();\n }\n\n normalized[trimmedKey] = normalizedValue;\n }\n\n return normalized;\n }\n\n // Handle primitive strings\n if (typeof obj === 'string') {\n return obj.trim();\n }\n\n // Return other primitives as-is\n return obj;\n}\n\nexport function safeParseJson(\n input: string,\n modelFamily: TModelFamily | undefined,\n) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n\n let parsed: any;\n let lastError: unknown;\n try {\n parsed = JSON.parse(cleanJsonString);\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n try {\n parsed = JSON.parse(jsonrepair(cleanJsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n\n if (\n modelFamily === 'doubao-vision' ||\n modelFamily === 'doubao-seed' ||\n isUITars(modelFamily)\n ) {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n try {\n parsed = JSON.parse(jsonrepair(jsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n }\n throw Error(\n `failed to parse LLM response into JSON. Error - ${String(\n lastError ?? 'unknown error',\n )}. Response - \\n ${input}`,\n );\n}\n"],"names":["AIResponseParseError","Error","message","rawResponse","usage","createChatClient","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","modelDescription","uiTarsModelVersion","modelFamily","createOpenAIClient","timeout","proxyAgent","warnClient","getDebug","debugProxy","warnProxy","sanitizeProxyUrl","url","parsed","URL","ifInBrowser","moduleName","ProxyAgent","socksDispatcher","proxyUrl","port","Number","protocol","socksType","decodeURIComponent","error","openAIOptions","baseOpenAI","OpenAI","openai","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","langsmithModule","wrapOpenAI","MIDSCENE_LANGFUSE_DEBUG","langfuseModule","observeOpenAI","wrappedClient","callAI","messages","options","completion","maxTokens","MIDSCENE_MODEL_MAX_TOKENS","OPENAI_MAX_TOKENS","debugCall","warnCall","debugProfileStats","debugProfileDetail","startTime","Date","temperature","isStreaming","content","accumulated","accumulatedReasoning","timeCost","requestId","buildUsageInfo","usageData","cachedInputTokens","undefined","commonConfig","isAutoGLM","mergedEnableReasoning","normalizedDeepThink","reasoningEffortConfig","reasoningEffortDebugMessage","warningMessage","resolveReasoningConfig","stream","chunk","reasoning_content","chunkData","estimatedTokens","Math","finalChunk","retryCount","retryInterval","maxAttempts","lastError","attempt","result","JSON","Promise","resolve","setTimeout","e","newError","callAIWithObjectResponse","response","assert","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","reasoningEnabled","reasoningEffort","reasoningBudget","debugMessages","config","normalizeJsonObject","obj","Array","item","normalized","key","value","Object","trimmedKey","normalizedValue","cleanJsonString","jsonrepair","isUITars","jsonString","String"],"mappings":";;;;;;;;;;;;;;;;AAIO,MAAMA,6BAA6BC;IAIxC,YAAYC,OAAe,EAAEC,WAAmB,EAAEC,KAAmB,CAAE;QACrE,KAAK,CAACF,UAJR,yCACA;QAIE,IAAI,CAAC,IAAI,GAAG;QACZ,IAAI,CAAC,WAAW,GAAGC;QACnB,IAAI,CAAC,KAAK,GAAGC;IACf;AACF;AAqBA,eAAeC,iBAAiB,EAC9BC,WAAW,EAGZ;IAOC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,gBAAgB,EAChBC,kBAAkB,EAClBC,WAAW,EACXC,kBAAkB,EAClBC,OAAO,EACR,GAAGX;IAEJ,IAAIY;IACJ,MAAMC,aAAaC,SAAS,WAAW;QAAE,SAAS;IAAK;IACvD,MAAMC,aAAaD,SAAS;IAC5B,MAAME,YAAYF,SAAS,iBAAiB;QAAE,SAAS;IAAK;IAI5D,MAAMG,mBAAmB,CAACC;QACxB,IAAI;YACF,MAAMC,SAAS,IAAIC,IAAIF;YACvB,IAAIC,OAAO,QAAQ,EAAE;gBAEnBA,OAAO,QAAQ,GAAG;gBAClB,OAAOA,OAAO,IAAI;YACpB;YACA,OAAOD;QACT,EAAE,OAAM;YAEN,OAAOA;QACT;IACF;IAEA,IAAIhB,WAAW;QACba,WAAW,oBAAoBE,iBAAiBf;QAChD,IAAImB,aACFL,UACE;aAEG;YAEL,MAAMM,aAAa;YACnB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;YACpCV,aAAa,IAAIW,WAAW;gBAC1B,KAAKrB;YAEP;QACF;IACF,OAAO,IAAID,YAAY;QACrBc,WAAW,qBAAqBE,iBAAiBhB;QACjD,IAAIoB,aACFL,UACE;aAGF,IAAI;YAEF,MAAMM,aAAa;YACnB,MAAM,EAAEE,eAAe,EAAE,GAAG,MAAM,MAAM,CAACF;YAEzC,MAAMG,WAAW,IAAIL,IAAInB;YAGzB,IAAI,CAACwB,SAAS,QAAQ,EACpB,MAAM,IAAI9B,MAAM;YAIlB,MAAM+B,OAAOC,OAAO,QAAQ,CAACF,SAAS,IAAI,EAAE;YAC5C,IAAI,CAACA,SAAS,IAAI,IAAIE,OAAO,KAAK,CAACD,OACjC,MAAM,IAAI/B,MAAM;YAIlB,MAAMiC,WAAWH,SAAS,QAAQ,CAAC,OAAO,CAAC,KAAK;YAChD,MAAMI,YACJD,AAAa,aAAbA,WAAwB,IAAIA,AAAa,aAAbA,WAAwB,IAAI;YAE1DhB,aAAaY,gBAAgB;gBAC3B,MAAMK;gBACN,MAAMJ,SAAS,QAAQ;gBACvBC;gBACA,GAAID,SAAS,QAAQ,GACjB;oBACE,QAAQK,mBAAmBL,SAAS,QAAQ;oBAC5C,UAAUK,mBAAmBL,SAAS,QAAQ,IAAI;gBACpD,IACA,CAAC,CAAC;YACR;YACAV,WAAW,uCAAuC;gBAChD,MAAMc;gBACN,MAAMJ,SAAS,QAAQ;gBACvB,MAAMC;YACR;QACF,EAAE,OAAOK,OAAO;YACdf,UAAU,oCAAoCe;YAC9C,MAAM,IAAIpC,MACR,CAAC,yBAAyB,EAAEM,WAAW,+GAA+G,CAAC;QAE3J;IAEJ;IAEA,MAAM+B,gBAAgB;QACpB,SAAS5B;QACT,QAAQC;QAGR,GAAIO,aAAa;YAAE,cAAc;gBAAE,YAAYA;YAAkB;QAAE,IAAI,CAAC,CAAC;QACzE,GAAGN,iBAAiB;QACpB,GAAI,AAAmB,YAAnB,OAAOK,UAAuB;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAClD,yBAAyB;IAC3B;IAEA,MAAMsB,aAAa,IAAIC,SAAOF;IAE9B,IAAIG,SAAiBF;IAGrB,IACEE,UACAC,oBAAoB,qBAAqB,CAACC,2BAC1C;QACA,IAAIhB,aACF,MAAM,IAAI1B,MAAM;QAElBkB,WAAW;QAEX,MAAMyB,kBAAkB;QACxB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;QACpCH,SAASI,WAAWJ;IACtB;IAGA,IACEA,UACAC,oBAAoB,qBAAqB,CAACI,0BAC1C;QACA,IAAInB,aACF,MAAM,IAAI1B,MAAM;QAElBkB,WAAW;QAEX,MAAM4B,iBAAiB;QACvB,MAAM,EAAEC,aAAa,EAAE,GAAG,MAAM,MAAM,CAACD;QACvCN,SAASO,cAAcP;IACzB;IAEA,IAAIzB,oBAAoB;QACtB,MAAMiC,gBAAgB,MAAMjC,mBAAmBuB,YAAYD;QAE3D,IAAIW,eACFR,SAASQ;IAEb;IAEA,OAAO;QACL,YAAYR,OAAO,IAAI,CAAC,WAAW;QACnChC;QACAI;QACAC;QACAC;IACF;AACF;AAEO,eAAemC,OACpBC,QAAsC,EACtC7C,WAAyB,EACzB8C,OAIC;IAOD,MAAM,EACJC,UAAU,EACV5C,SAAS,EACTI,gBAAgB,EAChBC,kBAAkB,EAClBC,WAAW,EACZ,GAAG,MAAMV,iBAAiB;QACzBC;IACF;IAEA,MAAMgD,YACJZ,oBAAoB,yBAAyB,CAACa,8BAC9Cb,oBAAoB,yBAAyB,CAACc;IAChD,MAAMC,YAAYrC,SAAS;IAC3B,MAAMsC,WAAWtC,SAAS,WAAW;QAAE,SAAS;IAAK;IACrD,MAAMuC,oBAAoBvC,SAAS;IACnC,MAAMwC,qBAAqBxC,SAAS;IAEpC,MAAMyC,YAAYC,KAAK,GAAG;IAC1B,MAAMC,cAAczD,YAAY,WAAW,IAAI;IAE/C,MAAM0D,cAAcZ,SAAS,UAAUA,SAAS;IAChD,IAAIa;IACJ,IAAIC,cAAc;IAClB,IAAIC,uBAAuB;IAC3B,IAAI/D;IACJ,IAAIgE;IACJ,IAAIC;IAEJ,MAAMC,iBAAiB,CACrBC,WACAF;QAEA,IAAI,CAACE,WAAW;QAEhB,MAAMC,oBACJD,WACC,uBAAuB;QAE1B,OAAO;YACL,eAAeA,UAAU,aAAa,IAAI;YAC1C,mBAAmBA,UAAU,iBAAiB,IAAI;YAClD,cAAcA,UAAU,YAAY,IAAI;YACxC,cAAcC,qBAAqB;YACnC,WAAWJ,YAAY;YACvB,YAAY3D;YACZ,mBAAmBI;YACnB,QAAQP,YAAY,MAAM;YAC1B,YAAY+D,aAAaI;QAC3B;IACF;IAEA,MAAMC,eAAe;QACnBX;QACA,QAAQ,CAAC,CAACC;QACV,YAAYV;QACZ,GAAIvC,AAAgB,iBAAhBA,cACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAI4D,UAAU5D,cAAc;QACzB2D,aAAmD,KAAK,GAAG;QAC3DA,aAAmD,iBAAiB,GAAG;IAC1E;IAIA,MAAME,wBAAyB,AAAC;QAC9B,MAAMC,sBACJzB,SAAS,cAAc,UAAUqB,SAAYrB,SAAS;QACxD,IAAIyB,AAAwB,SAAxBA,qBAA8B,OAAO;QACzC,IAAIA,AAAwB,UAAxBA,qBAA+B,OAAO;QAC1C,OAAOvE,YAAY,gBAAgB;IACrC;IAEA,MAAM,EACJ,QAAQwE,qBAAqB,EAC7B,cAAcC,2BAA2B,EACzCC,cAAc,EACf,GAAGC,uBAAuB;QACzB,kBAAkBL;QAClB,iBAAiBtE,YAAY,eAAe;QAC5C,iBAAiBA,YAAY,eAAe;QAC5CS;IACF;IACA,IAAIgE,6BACFtB,UAAUsB;IAEZ,IAAIC,gBACFtB,SAASsB;IAGX,IAAI;QACFvB,UACE,CAAC,QAAQ,EAAEO,cAAc,eAAe,GAAG,WAAW,EAAEvD,WAAW;QAGrE,IAAIuD,aAAa;YACf,MAAMkB,SAAU,MAAM7B,WAAW,MAAM,CACrC;gBACE,OAAO5C;gBACP0C;gBACA,GAAGuB,YAAY;gBACf,GAAGI,qBAAqB;YAC1B,GACA;gBACE,QAAQ;YACV;YAKFT,YAAYa,OAAO,WAAW;YAE9B,WAAW,MAAMC,SAASD,OAAQ;gBAChC,MAAMjB,UAAUkB,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAO,WAAW;gBACtD,MAAMC,oBACHD,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAe,qBAAqB;gBAG3D,IAAIA,MAAM,KAAK,EACb/E,QAAQ+E,MAAM,KAAK;gBAGrB,IAAIlB,WAAWmB,mBAAmB;oBAChClB,eAAeD;oBACfE,wBAAwBiB;oBACxB,MAAMC,YAAiC;wBACrCpB;wBACAmB;wBACAlB;wBACA,YAAY;wBACZ,OAAOO;oBACT;oBACArB,QAAQ,OAAO,CAAEiC;gBACnB;gBAGA,IAAIF,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,eAAe;oBACrCf,WAAWN,KAAK,GAAG,KAAKD;oBAGxB,IAAI,CAACzD,OAAO;wBAEV,MAAMkF,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACrB,YAAY,MAAM,GAAG;wBAElC9D,QAAQ;4BACN,eAAekF;4BACf,mBAAmBA;4BACnB,cAAcA,AAAkB,IAAlBA;wBAChB;oBACF;oBAGA,MAAME,aAAkC;wBACtC,SAAS;wBACTtB;wBACA,mBAAmB;wBACnB,YAAY;wBACZ,OAAOI,eAAelE,OAAOiE;oBAC/B;oBACAjB,QAAQ,OAAO,CAAEoC;oBACjB;gBACF;YACF;YACAvB,UAAUC;YACVP,kBACE,CAAC,iBAAiB,EAAElD,UAAU,QAAQ,EAAEM,eAAe,UAAU,WAAW,EAAEqD,SAAS,eAAe,EAAEL,eAAe,IAAI;QAE/H,OAAO;YAEL,MAAM0B,aAAanF,YAAY,UAAU,IAAI;YAC7C,MAAMoF,gBAAgBpF,YAAY,aAAa,IAAI;YACnD,MAAMqF,cAAcF,aAAa;YAEjC,IAAIG;YAEJ,IAAK,IAAIC,UAAU,GAAGA,WAAWF,aAAaE,UAC5C,IAAI;gBACF,MAAMC,SAAS,MAAMzC,WAAW,MAAM,CAAC;oBACrC,OAAO5C;oBACP0C;oBACA,GAAGuB,YAAY;oBACf,GAAGI,qBAAqB;gBAC1B;gBAEAV,WAAWN,KAAK,GAAG,KAAKD;gBAExBF,kBACE,CAAC,OAAO,EAAElD,UAAU,QAAQ,EAAEM,eAAe,UAAU,mBAAmB,EAAED,mBAAmB,iBAAiB,EAAEgF,OAAO,KAAK,EAAE,iBAAiB,GAAG,qBAAqB,EAAEA,OAAO,KAAK,EAAE,qBAAqB,GAAG,gBAAgB,EAAEA,OAAO,KAAK,EAAE,gBAAgB,GAAG,WAAW,EAAE1B,SAAS,aAAa,EAAE0B,OAAO,WAAW,IAAI,GAAG,eAAe,EAAE/B,eAAe,IAAI;gBAGxWH,mBACE,CAAC,oBAAoB,EAAEmC,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;gBAGvD,IAAI,CAACA,OAAO,OAAO,EACjB,MAAM,IAAI7F,MACR,CAAC,mCAAmC,EAAE8F,KAAK,SAAS,CAACD,SAAS;gBAIlE7B,UAAU6B,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;gBAC3C3B,uBACG2B,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,EAAU,qBAAqB;gBAC3D1F,QAAQ0F,OAAO,KAAK;gBACpBzB,YAAYyB,OAAO,WAAW;gBAE9B,IACE,CAAC7B,WACDE,wBACCpD,CAAAA,AAAgB,oBAAhBA,eAAmCA,AAAgB,kBAAhBA,WAA4B,GAChE;oBACA2C,SAAS;oBACTO,UAAUE;gBACZ;gBAEA,IAAI,CAACF,SACH,MAAM,IAAIhE,MAAM;gBAGlB;YACF,EAAE,OAAOoC,OAAO;gBACduD,YAAYvD;gBACZ,IAAIwD,UAAUF,aAAa;oBACzBjC,SACE,CAAC,wBAAwB,EAAEmC,QAAQ,CAAC,EAAEF,YAAY,eAAe,EAAED,cAAc,aAAa,EAAEE,UAAU,OAAO,EAAE;oBAErH,MAAM,IAAII,QAAQ,CAACC,UAAYC,WAAWD,SAASP;gBACrD;YACF;YAGF,IAAI,CAACzB,SACH,MAAM2B;QAEV;QAEAnC,UAAU,CAAC,4BAA4B,EAAEU,sBAAsB;QAC/DV,UAAU,CAAC,kBAAkB,EAAEQ,SAAS;QAGxC,IAAID,eAAe,CAAC5D,OAAO;YAEzB,MAAMkF,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEtB,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtC7D,QAAQ;gBACN,eAAekF;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASrB,WAAW;YACpB,mBAAmBE,wBAAwBM;YAC3C,OAAOH,eAAelE,OAAOiE;YAC7B,YAAY,CAAC,CAACL;QAChB;IACF,EAAE,OAAOmC,GAAQ;QACfzC,SAAS,iBAAiByC;QAC1B,MAAMC,WAAW,IAAInG,MACnB,CAAC,eAAe,EAAE+D,cAAc,eAAe,GAAG,kBAAkB,EAAEvD,UAAU,GAAG,EAAE0F,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC9J;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,eAAeC,yBACpBlD,QAAsC,EACtC7C,WAAyB,EACzB8C,OAEC;IAOD,MAAMkD,WAAW,MAAMpD,OAAOC,UAAU7C,aAAa;QACnD,WAAW8C,SAAS;IACtB;IACAmD,OAAOD,UAAU;IACjB,MAAMvF,cAAcT,YAAY,WAAW;IAC3C,MAAMkG,cAAcC,cAAcH,SAAS,OAAO,EAAEvF;IACpD,IAAI,AAAuB,YAAvB,OAAOyF,aACT,MAAM,IAAIxG,qBACR,CAAC,0CAA0C,EAAEM,YAAY,SAAS,CAAC,GAAG,EAAEgG,SAAS,OAAO,EAAE,EAC1FA,SAAS,OAAO,EAChBA,SAAS,KAAK;IAGlB,OAAO;QACL,SAASE;QACT,eAAeF,SAAS,OAAO;QAC/B,OAAOA,SAAS,KAAK;QACrB,mBAAmBA,SAAS,iBAAiB;IAC/C;AACF;AAEO,eAAeI,yBACpBC,IAAY,EACZrG,WAAyB;IAEzB,MAAM,EAAE2D,OAAO,EAAE7D,KAAK,EAAE,GAAG,MAAM8C,OAAOyD,MAAMrG;IAC9C,OAAO;QAAE2D;QAAS7D;IAAM;AAC1B;AAEO,SAASwG,yBAAyBN,QAAgB;IACvD,IAAI;QAEF,MAAMO,YAAYP,SAAS,KAAK,CAAC;QACjC,IAAIO,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBR,SAAS,KAAK,CACnC;QAEF,IAAIQ,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBT,SAAS,KAAK,CAAC;QACrC,IAAIS,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOT;AACT;AAEO,SAASU,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEO,SAAShC,uBAAuB,EACrCiC,gBAAgB,EAChBC,eAAe,EACfC,eAAe,EACfrG,WAAW,EAMZ;IAMC,IACEmG,AAAqBzC,WAArByC,oBACA,CAACC,mBACDC,AAAoB3C,WAApB2C,iBAEA,OAAO;QAAE,QAAQ,CAAC;IAAE;IAGtB,MAAMC,gBAA0B,EAAE;IAClC,MAAMC,SAAkC,CAAC;IAEzC,IAAIvG,AAAgB,eAAhBA,eAA8BA,AAAgB,cAAhBA,aAA2B;QAE3D,IAAImG,AAAqBzC,WAArByC,kBAAgC;YAClCI,OAAO,eAAe,GAAGJ;YACzBG,cAAc,IAAI,CAAC,CAAC,gBAAgB,EAAEH,kBAAkB;QAC1D;QAEA,IAAIE,AAAoB3C,WAApB2C,iBAA+B;YACjCE,OAAO,eAAe,GAAGF;YACzBC,cAAc,IAAI,CAAC,CAAC,gBAAgB,EAAED,iBAAiB;QACzD;IAEF,OAAO,IAAIrG,AAAgB,oBAAhBA,eAAmCA,AAAgB,kBAAhBA,aAA+B;QAE3E,IAAImG,AAAqBzC,WAArByC,kBAAgC;YAClCI,OAAO,QAAQ,GAAG;gBAChB,MAAMJ,mBAAmB,YAAY;YACvC;YACAG,cAAc,IAAI,CAChB,CAAC,cAAc,EAAEH,mBAAmB,YAAY,YAAY;QAEhE;QAEA,IAAIC,iBAAiB;YACnBG,OAAO,gBAAgB,GAAGH;YAC1BE,cAAc,IAAI,CAAC,CAAC,kBAAkB,EAAEF,gBAAgB,CAAC,CAAC;QAC5D;IAEF,OAAO,IAAIpG,AAAgB,YAAhBA,aAET;QAAA,IAAImG,AAAqBzC,WAArByC,kBAAgC;YAClCI,OAAO,QAAQ,GAAG;gBAChB,MAAMJ,mBAAmB,YAAY;YACvC;YACAG,cAAc,IAAI,CAChB,CAAC,cAAc,EAAEH,mBAAmB,YAAY,YAAY;QAEhE;IAAA,OAEK,IAAInG,AAAgB,YAAhBA,aAET;QAAA,IAAIoG,iBAAiB;YACnBG,OAAO,SAAS,GAAG;gBAAE,QAAQH;YAAgB;YAC7CE,cAAc,IAAI,CAAC,CAAC,kBAAkB,EAAEF,gBAAgB,CAAC,CAAC;QAC5D,OAAO,IAAID,AAAqB,SAArBA,kBAA2B;YACpCI,OAAO,SAAS,GAAG;gBAAE,QAAQ;YAAO;YACpCD,cAAc,IAAI,CAAC;QACrB,OAAO,IAAIH,AAAqB,UAArBA,kBAA4B;YACrCI,OAAO,SAAS,GAAG;gBAAE,QAAQ;YAAM;YACnCD,cAAc,IAAI,CAAC;QACrB;IAAA,OAEK,IAAI,CAACtG,aACV,OAAO;QACL,QAAQ,CAAC;QACT,cAAc;QACd,gBACE;IACJ;SAGA,IAAIoG,iBAAiB;QACnBG,OAAO,gBAAgB,GAAGH;QAC1BE,cAAc,IAAI,CAAC,CAAC,kBAAkB,EAAEF,gBAAgB,CAAC,CAAC;IAC5D;IAGF,OAAO;QACLG;QACA,cAAcD,cAAc,MAAM,GAC9B,CAAC,qBAAqB,EAAEtG,YAAY,EAAE,EAAEsG,cAAc,IAAI,CAAC,OAAO,GAClE5C;IACN;AACF;AAQA,SAAS8C,oBAAoBC,GAAQ;IAEnC,IAAIA,QAAAA,KACF,OAAOA;IAIT,IAAIC,MAAM,OAAO,CAACD,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACE,OAASH,oBAAoBG;IAI/C,IAAI,AAAe,YAAf,OAAOF,KAAkB;QAC3B,MAAMG,aAAkB,CAAC;QAEzB,KAAK,MAAM,CAACC,KAAKC,MAAM,IAAIC,OAAO,OAAO,CAACN,KAAM;YAE9C,MAAMO,aAAaH,IAAI,IAAI;YAG3B,IAAII,kBAAkBT,oBAAoBM;YAG1C,IAAI,AAA2B,YAA3B,OAAOG,iBACTA,kBAAkBA,gBAAgB,IAAI;YAGxCL,UAAU,CAACI,WAAW,GAAGC;QAC3B;QAEA,OAAOL;IACT;IAGA,IAAI,AAAe,YAAf,OAAOH,KACT,OAAOA,IAAI,IAAI;IAIjB,OAAOA;AACT;AAEO,SAASf,cACdQ,KAAa,EACblG,WAAqC;IAErC,MAAMkH,kBAAkBrB,yBAAyBK;IAEjD,IAAIgB,iBAAiB,MAAM,oBACzB,OAAOA,gBACJ,KAAK,CAAC,oBACL,MAAM,GACP,IAAIhG;IAGT,IAAIR;IACJ,IAAImE;IACJ,IAAI;QACFnE,SAASsE,KAAK,KAAK,CAACkC;QACpB,OAAOV,oBAAoB9F;IAC7B,EAAE,OAAOY,OAAO;QACduD,YAAYvD;IACd;IACA,IAAI;QACFZ,SAASsE,KAAK,KAAK,CAACmC,WAAWD;QAC/B,OAAOV,oBAAoB9F;IAC7B,EAAE,OAAOY,OAAO;QACduD,YAAYvD;IACd;IAEA,IACEtB,AAAgB,oBAAhBA,eACAA,AAAgB,kBAAhBA,eACAoH,SAASpH,cACT;QACA,MAAMqH,aAAapB,yBAAyBiB;QAC5C,IAAI;YACFxG,SAASsE,KAAK,KAAK,CAACmC,WAAWE;YAC/B,OAAOb,oBAAoB9F;QAC7B,EAAE,OAAOY,OAAO;YACduD,YAAYvD;QACd;IACF;IACA,MAAMpC,MACJ,CAAC,gDAAgD,EAAEoI,OACjDzC,aAAa,iBACb,gBAAgB,EAAEqB,OAAO;AAE/B"}
@@ -44,7 +44,7 @@ async function uiTarsPlanning(userInstruction, options) {
44
44
  let parsed;
45
45
  try {
46
46
  convertedText = convertBboxToCoordinates(res.content);
47
- const { size } = context;
47
+ const { shotSize } = context;
48
48
  const parseResult = actionParser({
49
49
  prediction: convertedText,
50
50
  factor: [
@@ -52,8 +52,8 @@ async function uiTarsPlanning(userInstruction, options) {
52
52
  1000
53
53
  ],
54
54
  screenContext: {
55
- width: size.width,
56
- height: size.height
55
+ width: shotSize.width,
56
+ height: shotSize.height
57
57
  },
58
58
  modelVer: uiTarsModelVersion
59
59
  });
@@ -62,7 +62,7 @@ async function uiTarsPlanning(userInstruction, options) {
62
62
  const errorMessage = parseError instanceof Error ? parseError.message : String(parseError);
63
63
  throw new AIResponseParseError(`Parse error: ${errorMessage}`, JSON.stringify(res.content, void 0, 2), res.usage);
64
64
  }
65
- const { size } = context;
65
+ const { shotSize } = context;
66
66
  debug('ui-tars modelVer', uiTarsModelVersion, ', parsed', JSON.stringify(parsed));
67
67
  const transformActions = [];
68
68
  const unhandledActions = [];
@@ -71,13 +71,13 @@ async function uiTarsPlanning(userInstruction, options) {
71
71
  const actionType = (action.action_type || '').toLowerCase();
72
72
  if ('click' === actionType) {
73
73
  assert(action.action_inputs.start_box, 'start_box is required');
74
- const point = getPoint(action.action_inputs.start_box, size);
74
+ const point = getPoint(action.action_inputs.start_box, shotSize);
75
75
  const locate = {
76
76
  prompt: action.thought || '',
77
77
  bbox: pointToBbox({
78
78
  x: point[0],
79
79
  y: point[1]
80
- }, size.width, size.height)
80
+ }, shotSize.width, shotSize.height)
81
81
  };
82
82
  transformActions.push({
83
83
  type: 'Tap',
@@ -87,13 +87,13 @@ async function uiTarsPlanning(userInstruction, options) {
87
87
  });
88
88
  } else if ('left_double' === actionType) {
89
89
  assert(action.action_inputs.start_box, 'start_box is required');
90
- const point = getPoint(action.action_inputs.start_box, size);
90
+ const point = getPoint(action.action_inputs.start_box, shotSize);
91
91
  const locate = {
92
92
  prompt: action.thought || '',
93
93
  bbox: pointToBbox({
94
94
  x: point[0],
95
95
  y: point[1]
96
- }, size.width, size.height)
96
+ }, shotSize.width, shotSize.height)
97
97
  };
98
98
  transformActions.push({
99
99
  type: 'DoubleClick',
@@ -104,13 +104,13 @@ async function uiTarsPlanning(userInstruction, options) {
104
104
  });
105
105
  } else if ('right_single' === actionType) {
106
106
  assert(action.action_inputs.start_box, 'start_box is required');
107
- const point = getPoint(action.action_inputs.start_box, size);
107
+ const point = getPoint(action.action_inputs.start_box, shotSize);
108
108
  const locate = {
109
109
  prompt: action.thought || '',
110
110
  bbox: pointToBbox({
111
111
  x: point[0],
112
112
  y: point[1]
113
- }, size.width, size.height)
113
+ }, shotSize.width, shotSize.height)
114
114
  };
115
115
  transformActions.push({
116
116
  type: 'RightClick',
@@ -122,8 +122,8 @@ async function uiTarsPlanning(userInstruction, options) {
122
122
  } else if ('drag' === actionType) {
123
123
  assert(action.action_inputs.start_box, 'start_box is required');
124
124
  assert(action.action_inputs.end_box, 'end_box is required');
125
- const startPoint = getPoint(action.action_inputs.start_box, size);
126
- const endPoint = getPoint(action.action_inputs.end_box, size);
125
+ const startPoint = getPoint(action.action_inputs.start_box, shotSize);
126
+ const endPoint = getPoint(action.action_inputs.end_box, shotSize);
127
127
  transformActions.push({
128
128
  type: 'DragAndDrop',
129
129
  param: {
@@ -132,14 +132,14 @@ async function uiTarsPlanning(userInstruction, options) {
132
132
  bbox: pointToBbox({
133
133
  x: startPoint[0],
134
134
  y: startPoint[1]
135
- }, size.width, size.height)
135
+ }, shotSize.width, shotSize.height)
136
136
  },
137
137
  to: {
138
138
  prompt: action.thought || '',
139
139
  bbox: pointToBbox({
140
140
  x: endPoint[0],
141
141
  y: endPoint[1]
142
- }, size.width, size.height)
142
+ }, shotSize.width, shotSize.height)
143
143
  }
144
144
  },
145
145
  thought: action.thought || ''
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/ui-tars-planning.mjs","sources":["../../../src/ai-model/ui-tars-planning.ts"],"sourcesContent":["import type {\n PlanningAIResponse,\n PlanningAction,\n Size,\n UIContext,\n} from '@/types';\nimport { type IModelConfig, UITarsModelVersion } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout';\nimport { assert } from '@midscene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport type { ConversationHistory } from './conversation-history';\nimport { getSummary, getUiTarsPlanningPrompt } from './prompt/ui-tars-planning';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from './service-caller/index';\n\ntype ActionType =\n | 'click'\n | 'left_double'\n | 'right_single'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait';\n\nconst debug = getDebug('ui-tars-planning');\nconst warnLog = getDebug('ui-tars-planning', { console: true });\nconst bboxSize = 10;\nconst pointToBbox = (\n point: { x: number; y: number },\n width: number,\n height: number,\n): [number, number, number, number] => {\n return [\n Math.round(Math.max(point.x - bboxSize / 2, 0)),\n Math.round(Math.max(point.y - bboxSize / 2, 0)),\n Math.round(Math.min(point.x + bboxSize / 2, width)),\n Math.round(Math.min(point.y + bboxSize / 2, height)),\n ];\n};\n\nexport async function uiTarsPlanning(\n userInstruction: string,\n options: {\n conversationHistory: ConversationHistory;\n context: UIContext;\n modelConfig: IModelConfig;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelConfig, actionContext } = options;\n const { uiTarsModelVersion } = modelConfig;\n\n let instruction = userInstruction;\n if (actionContext) {\n instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\\n<user_instruction>${userInstruction}</user_instruction>`;\n }\n\n const systemPrompt = getUiTarsPlanningPrompt() + instruction;\n\n const screenshotBase64 = context.screenshot.base64;\n\n conversationHistory.append({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n },\n },\n ],\n });\n\n const res = await callAIWithStringResponse(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...conversationHistory.snapshot(),\n ],\n modelConfig,\n );\n\n let convertedText: string;\n let parsed: ReturnType<typeof actionParser>['parsed'];\n\n try {\n convertedText = convertBboxToCoordinates(res.content);\n\n const { size } = context;\n const parseResult = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: size.width,\n height: size.height,\n },\n modelVer: uiTarsModelVersion,\n });\n parsed = parseResult.parsed;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n const { size } = context;\n\n debug(\n 'ui-tars modelVer',\n uiTarsModelVersion,\n ', parsed',\n JSON.stringify(parsed),\n );\n\n const transformActions: PlanningAction[] = [];\n const unhandledActions: Array<{ type: string; thought: string }> = [];\n let shouldContinue = true;\n parsed.forEach((action) => {\n const actionType = (action.action_type || '').toLowerCase();\n if (actionType === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n\n const locate = {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n };\n\n transformActions.push({\n type: 'Tap',\n param: {\n locate: locate,\n },\n });\n } else if (actionType === 'left_double') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n\n const locate = {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n };\n\n transformActions.push({\n type: 'DoubleClick',\n param: {\n locate: locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'right_single') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n\n const locate = {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n };\n\n transformActions.push({\n type: 'RightClick',\n param: {\n locate: locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box, size);\n const endPoint = getPoint(action.action_inputs.end_box, size);\n transformActions.push({\n type: 'DragAndDrop',\n param: {\n from: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: startPoint[0], y: startPoint[1] },\n size.width,\n size.height,\n ),\n },\n to: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: endPoint[0], y: endPoint[1] },\n size.width,\n size.height,\n ),\n },\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'type') {\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'finished') {\n shouldContinue = false;\n transformActions.push({\n type: 'Finished',\n param: {},\n thought: action.thought || '',\n });\n } else if (actionType === 'hotkey') {\n if (!action.action_inputs.key) {\n warnLog('No key found in action: hotkey. Will not perform action.');\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n keyName: keys.join('+'),\n },\n thought: action.thought || '',\n });\n }\n } else if (actionType === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n thought: action.thought || '',\n });\n } else if (actionType) {\n // Track unhandled action types\n unhandledActions.push({\n type: actionType,\n thought: action.thought || '',\n });\n debug('Unhandled action type:', actionType, 'thought:', action.thought);\n }\n });\n\n if (transformActions.length === 0) {\n const errorDetails: string[] = [];\n\n // Check if parsing failed\n if (parsed.length === 0) {\n errorDetails.push('Action parser returned no actions');\n\n // Check if response has Thought but no Action\n if (\n res.content.includes('Thought:') &&\n !res.content.includes('Action:')\n ) {\n errorDetails.push(\n 'Response contains \"Thought:\" but missing \"Action:\" line',\n );\n } else {\n errorDetails.push('Response may be malformed or empty');\n }\n }\n\n // Check if we have unhandled action types\n if (unhandledActions.length > 0) {\n const types = unhandledActions.map((a) => a.type).join(', ');\n errorDetails.push(`Unhandled action types: ${types}`);\n }\n\n const errorMessage = [\n 'No actions found in UI-TARS response.',\n ...errorDetails,\n ].join('\\n');\n\n // Throw AIResponseParseError with usage and rawResponse preserved\n throw new AIResponseParseError(\n errorMessage,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n debug('transformActions', JSON.stringify(transformActions, null, 2));\n const log = getSummary(res.content);\n\n conversationHistory.append({\n role: 'assistant',\n content: log,\n });\n\n return {\n actions: transformActions,\n log,\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n shouldContinuePlanning: shouldContinue,\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove [EOS] and replace <bbox> coordinates\n const cleanedText = text.replace(/\\[EOS\\]/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string, size: { width: number; height: number }) {\n const [x, y] = JSON.parse(startBox);\n return [x * size.width, y * size.height];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface LeftDoubleAction extends BaseAction {\n action_type: 'left_double';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface RightSingleAction extends BaseAction {\n action_type: 'right_single';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: Record<string, never>;\n}\n\nexport type Action =\n | ClickAction\n | LeftDoubleAction\n | RightSingleAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction;\n"],"names":["debug","getDebug","warnLog","bboxSize","pointToBbox","point","width","height","Math","uiTarsPlanning","userInstruction","options","conversationHistory","context","modelConfig","actionContext","uiTarsModelVersion","instruction","systemPrompt","getUiTarsPlanningPrompt","screenshotBase64","res","callAIWithStringResponse","convertedText","parsed","convertBboxToCoordinates","size","parseResult","actionParser","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","transformActions","unhandledActions","shouldContinue","action","actionType","assert","getPoint","locate","startPoint","endPoint","keys","transformHotkeyInput","errorDetails","types","a","log","getSummary","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","y","cleanedText","startBox"],"mappings":";;;;;;AA6BA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,oBAAoB;IAAE,SAAS;AAAK;AAC7D,MAAME,WAAW;AACjB,MAAMC,cAAc,CAClBC,OACAC,OACAC,SAEO;QACLC,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGG;QAC5CE,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGI;KAC7C;AAGI,eAAeE,eACpBC,eAAuB,EACvBC,OAKC;IAED,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,WAAW,EAAEC,aAAa,EAAE,GAAGJ;IACrE,MAAM,EAAEK,kBAAkB,EAAE,GAAGF;IAE/B,IAAIG,cAAcP;IAClB,IAAIK,eACFE,cAAc,CAAC,yBAAyB,EAAEF,cAAc,8CAA8C,EAAEL,gBAAgB,mBAAmB,CAAC;IAG9I,MAAMQ,eAAeC,4BAA4BF;IAEjD,MAAMG,mBAAmBP,QAAQ,UAAU,CAAC,MAAM;IAElDD,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKQ;gBACP;YACF;SACD;IACH;IAEA,MAAMC,MAAM,MAAMC,yBAChB;QACE;YACE,MAAM;YACN,SAASJ;QACX;WACGN,oBAAoB,QAAQ;KAChC,EACDE;IAGF,IAAIS;IACJ,IAAIC;IAEJ,IAAI;QACFD,gBAAgBE,yBAAyBJ,IAAI,OAAO;QAEpD,MAAM,EAAEK,IAAI,EAAE,GAAGb;QACjB,MAAMc,cAAcC,aAAa;YAC/B,YAAYL;YACZ,QAAQ;gBAAC;gBAAM;aAAK;YACpB,eAAe;gBACb,OAAOG,KAAK,KAAK;gBACjB,QAAQA,KAAK,MAAM;YACrB;YACA,UAAUV;QACZ;QACAQ,SAASG,YAAY,MAAM;IAC7B,EAAE,OAAOE,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,qBACR,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEA,MAAM,EAAEK,IAAI,EAAE,GAAGb;IAEjBb,MACE,oBACAgB,oBACA,YACAkB,KAAK,SAAS,CAACV;IAGjB,MAAMY,mBAAqC,EAAE;IAC7C,MAAMC,mBAA6D,EAAE;IACrE,IAAIC,iBAAiB;IACrBd,OAAO,OAAO,CAAC,CAACe;QACd,MAAMC,aAAcD,AAAAA,CAAAA,OAAO,WAAW,IAAI,EAAC,EAAG,WAAW;QACzD,IAAIC,AAAe,YAAfA,YAAwB;YAC1BC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMlC,QAAQqC,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEb;YAEvD,MAAMiB,SAAS;gBACb,QAAQJ,OAAO,OAAO,IAAI;gBAC1B,MAAMnC,YACJ;oBAAE,GAAGC,KAAK,CAAC,EAAE;oBAAE,GAAGA,KAAK,CAAC,EAAE;gBAAC,GAC3BqB,KAAK,KAAK,EACVA,KAAK,MAAM;YAEf;YAEAU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQO;gBACV;YACF;QACF,OAAO,IAAIH,AAAe,kBAAfA,YAA8B;YACvCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMlC,QAAQqC,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEb;YAEvD,MAAMiB,SAAS;gBACb,QAAQJ,OAAO,OAAO,IAAI;gBAC1B,MAAMnC,YACJ;oBAAE,GAAGC,KAAK,CAAC,EAAE;oBAAE,GAAGA,KAAK,CAAC,EAAE;gBAAC,GAC3BqB,KAAK,KAAK,EACVA,KAAK,MAAM;YAEf;YAEAU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQO;gBACV;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,mBAAfA,YAA+B;YACxCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMlC,QAAQqC,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEb;YAEvD,MAAMiB,SAAS;gBACb,QAAQJ,OAAO,OAAO,IAAI;gBAC1B,MAAMnC,YACJ;oBAAE,GAAGC,KAAK,CAAC,EAAE;oBAAE,GAAGA,KAAK,CAAC,EAAE;gBAAC,GAC3BqB,KAAK,KAAK,EACVA,KAAK,MAAM;YAEf;YAEAU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQO;gBACV;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YAAuB;YAChCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCE,OAAOF,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMK,aAAaF,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEb;YAC5D,MAAMmB,WAAWH,SAASH,OAAO,aAAa,CAAC,OAAO,EAAEb;YACxDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,MAAM;wBACJ,QAAQG,OAAO,OAAO,IAAI;wBAC1B,MAAMnC,YACJ;4BAAE,GAAGwC,UAAU,CAAC,EAAE;4BAAE,GAAGA,UAAU,CAAC,EAAE;wBAAC,GACrClB,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;oBACA,IAAI;wBACF,QAAQa,OAAO,OAAO,IAAI;wBAC1B,MAAMnC,YACJ;4BAAE,GAAGyC,QAAQ,CAAC,EAAE;4BAAE,GAAGA,QAAQ,CAAC,EAAE;wBAAC,GACjCnB,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;gBACA,SAASa,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,OAAOG,OAAO,aAAa,CAAC,OAAO;YACrC;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,aAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWG,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,eAAfA,YAA2B;YACpCF,iBAAiB;YACjBF,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO,CAAC;gBACR,SAASG,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,aAAfA,YACT,IAAKD,OAAO,aAAa,CAAC,GAAG,EAEtB;YACL,MAAMO,OAAOC,qBAAqBR,OAAO,aAAa,CAAC,GAAG;YAE1DH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,SAASU,KAAK,IAAI,CAAC;gBACrB;gBACA,SAASP,OAAO,OAAO,IAAI;YAC7B;QACF,OAXErC,QAAQ;aAYL,IAAIsC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,SAASG,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,YAAY;YAErBH,iBAAiB,IAAI,CAAC;gBACpB,MAAMG;gBACN,SAASD,OAAO,OAAO,IAAI;YAC7B;YACAvC,MAAM,0BAA0BwC,YAAY,YAAYD,OAAO,OAAO;QACxE;IACF;IAEA,IAAIH,AAA4B,MAA5BA,iBAAiB,MAAM,EAAQ;QACjC,MAAMY,eAAyB,EAAE;QAGjC,IAAIxB,AAAkB,MAAlBA,OAAO,MAAM,EAAQ;YACvBwB,aAAa,IAAI,CAAC;YAGlB,IACE3B,IAAI,OAAO,CAAC,QAAQ,CAAC,eACrB,CAACA,IAAI,OAAO,CAAC,QAAQ,CAAC,YAEtB2B,aAAa,IAAI,CACf;iBAGFA,aAAa,IAAI,CAAC;QAEtB;QAGA,IAAIX,iBAAiB,MAAM,GAAG,GAAG;YAC/B,MAAMY,QAAQZ,iBAAiB,GAAG,CAAC,CAACa,IAAMA,EAAE,IAAI,EAAE,IAAI,CAAC;YACvDF,aAAa,IAAI,CAAC,CAAC,wBAAwB,EAAEC,OAAO;QACtD;QAEA,MAAMnB,eAAe;YACnB;eACGkB;SACJ,CAAC,IAAI,CAAC;QAGP,MAAM,IAAIf,qBACRH,cACAI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEArB,MAAM,oBAAoBkC,KAAK,SAAS,CAACE,kBAAkB,MAAM;IACjE,MAAMe,MAAMC,WAAW/B,IAAI,OAAO;IAElCT,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAASuC;IACX;IAEA,OAAO;QACL,SAASf;QACTe;QACA,OAAO9B,IAAI,KAAK;QAChB,aAAaa,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW;QACpD,wBAAwBG;IAC1B;AACF;AAOA,SAASb,yBAAyB4B,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAI1D,KAAK,KAAK,CAAEqD,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMG,IAAI3D,KAAK,KAAK,CAAEuD,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEC,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAcf,KAAK,OAAO,CAAC,YAAY;IAC7C,OAAOe,YAAY,OAAO,CAACd,SAASC,cAAc,IAAI;AACxD;AAEA,SAASb,SAAS2B,QAAgB,EAAE3C,IAAuC;IACzE,MAAM,CAACwC,GAAGC,EAAE,GAAGjC,KAAK,KAAK,CAACmC;IAC1B,OAAO;QAACH,IAAIxC,KAAK,KAAK;QAAEyC,IAAIzC,KAAK,MAAM;KAAC;AAC1C"}
1
+ {"version":3,"file":"ai-model/ui-tars-planning.mjs","sources":["../../../src/ai-model/ui-tars-planning.ts"],"sourcesContent":["import type {\n PlanningAIResponse,\n PlanningAction,\n Size,\n UIContext,\n} from '@/types';\nimport { type IModelConfig, UITarsModelVersion } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout';\nimport { assert } from '@midscene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport type { ConversationHistory } from './conversation-history';\nimport { getSummary, getUiTarsPlanningPrompt } from './prompt/ui-tars-planning';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from './service-caller/index';\n\ntype ActionType =\n | 'click'\n | 'left_double'\n | 'right_single'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait';\n\nconst debug = getDebug('ui-tars-planning');\nconst warnLog = getDebug('ui-tars-planning', { console: true });\nconst bboxSize = 10;\nconst pointToBbox = (\n point: { x: number; y: number },\n width: number,\n height: number,\n): [number, number, number, number] => {\n return [\n Math.round(Math.max(point.x - bboxSize / 2, 0)),\n Math.round(Math.max(point.y - bboxSize / 2, 0)),\n Math.round(Math.min(point.x + bboxSize / 2, width)),\n Math.round(Math.min(point.y + bboxSize / 2, height)),\n ];\n};\n\nexport async function uiTarsPlanning(\n userInstruction: string,\n options: {\n conversationHistory: ConversationHistory;\n context: UIContext;\n modelConfig: IModelConfig;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelConfig, actionContext } = options;\n const { uiTarsModelVersion } = modelConfig;\n\n let instruction = userInstruction;\n if (actionContext) {\n instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\\n<user_instruction>${userInstruction}</user_instruction>`;\n }\n\n const systemPrompt = getUiTarsPlanningPrompt() + instruction;\n\n const screenshotBase64 = context.screenshot.base64;\n\n conversationHistory.append({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n },\n },\n ],\n });\n\n const res = await callAIWithStringResponse(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...conversationHistory.snapshot(),\n ],\n modelConfig,\n );\n\n let convertedText: string;\n let parsed: ReturnType<typeof actionParser>['parsed'];\n\n try {\n convertedText = convertBboxToCoordinates(res.content);\n\n const { shotSize } = context;\n const parseResult = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: shotSize.width,\n height: shotSize.height,\n },\n modelVer: uiTarsModelVersion,\n });\n parsed = parseResult.parsed;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n const { shotSize } = context;\n\n debug(\n 'ui-tars modelVer',\n uiTarsModelVersion,\n ', parsed',\n JSON.stringify(parsed),\n );\n\n const transformActions: PlanningAction[] = [];\n const unhandledActions: Array<{ type: string; thought: string }> = [];\n let shouldContinue = true;\n parsed.forEach((action) => {\n const actionType = (action.action_type || '').toLowerCase();\n if (actionType === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, shotSize);\n\n const locate = {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n shotSize.width,\n shotSize.height,\n ),\n };\n\n transformActions.push({\n type: 'Tap',\n param: {\n locate: locate,\n },\n });\n } else if (actionType === 'left_double') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, shotSize);\n\n const locate = {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n shotSize.width,\n shotSize.height,\n ),\n };\n\n transformActions.push({\n type: 'DoubleClick',\n param: {\n locate: locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'right_single') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, shotSize);\n\n const locate = {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n shotSize.width,\n shotSize.height,\n ),\n };\n\n transformActions.push({\n type: 'RightClick',\n param: {\n locate: locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box, shotSize);\n const endPoint = getPoint(action.action_inputs.end_box, shotSize);\n transformActions.push({\n type: 'DragAndDrop',\n param: {\n from: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: startPoint[0], y: startPoint[1] },\n shotSize.width,\n shotSize.height,\n ),\n },\n to: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: endPoint[0], y: endPoint[1] },\n shotSize.width,\n shotSize.height,\n ),\n },\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'type') {\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'finished') {\n shouldContinue = false;\n transformActions.push({\n type: 'Finished',\n param: {},\n thought: action.thought || '',\n });\n } else if (actionType === 'hotkey') {\n if (!action.action_inputs.key) {\n warnLog('No key found in action: hotkey. Will not perform action.');\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n keyName: keys.join('+'),\n },\n thought: action.thought || '',\n });\n }\n } else if (actionType === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n thought: action.thought || '',\n });\n } else if (actionType) {\n // Track unhandled action types\n unhandledActions.push({\n type: actionType,\n thought: action.thought || '',\n });\n debug('Unhandled action type:', actionType, 'thought:', action.thought);\n }\n });\n\n if (transformActions.length === 0) {\n const errorDetails: string[] = [];\n\n // Check if parsing failed\n if (parsed.length === 0) {\n errorDetails.push('Action parser returned no actions');\n\n // Check if response has Thought but no Action\n if (\n res.content.includes('Thought:') &&\n !res.content.includes('Action:')\n ) {\n errorDetails.push(\n 'Response contains \"Thought:\" but missing \"Action:\" line',\n );\n } else {\n errorDetails.push('Response may be malformed or empty');\n }\n }\n\n // Check if we have unhandled action types\n if (unhandledActions.length > 0) {\n const types = unhandledActions.map((a) => a.type).join(', ');\n errorDetails.push(`Unhandled action types: ${types}`);\n }\n\n const errorMessage = [\n 'No actions found in UI-TARS response.',\n ...errorDetails,\n ].join('\\n');\n\n // Throw AIResponseParseError with usage and rawResponse preserved\n throw new AIResponseParseError(\n errorMessage,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n debug('transformActions', JSON.stringify(transformActions, null, 2));\n const log = getSummary(res.content);\n\n conversationHistory.append({\n role: 'assistant',\n content: log,\n });\n\n return {\n actions: transformActions,\n log,\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n shouldContinuePlanning: shouldContinue,\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove [EOS] and replace <bbox> coordinates\n const cleanedText = text.replace(/\\[EOS\\]/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string, size: { width: number; height: number }) {\n const [x, y] = JSON.parse(startBox);\n return [x * size.width, y * size.height];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface LeftDoubleAction extends BaseAction {\n action_type: 'left_double';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface RightSingleAction extends BaseAction {\n action_type: 'right_single';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: Record<string, never>;\n}\n\nexport type Action =\n | ClickAction\n | LeftDoubleAction\n | RightSingleAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction;\n"],"names":["debug","getDebug","warnLog","bboxSize","pointToBbox","point","width","height","Math","uiTarsPlanning","userInstruction","options","conversationHistory","context","modelConfig","actionContext","uiTarsModelVersion","instruction","systemPrompt","getUiTarsPlanningPrompt","screenshotBase64","res","callAIWithStringResponse","convertedText","parsed","convertBboxToCoordinates","shotSize","parseResult","actionParser","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","transformActions","unhandledActions","shouldContinue","action","actionType","assert","getPoint","locate","startPoint","endPoint","keys","transformHotkeyInput","errorDetails","types","a","log","getSummary","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","y","cleanedText","startBox","size"],"mappings":";;;;;;AA6BA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,oBAAoB;IAAE,SAAS;AAAK;AAC7D,MAAME,WAAW;AACjB,MAAMC,cAAc,CAClBC,OACAC,OACAC,SAEO;QACLC,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGG;QAC5CE,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGI;KAC7C;AAGI,eAAeE,eACpBC,eAAuB,EACvBC,OAKC;IAED,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,WAAW,EAAEC,aAAa,EAAE,GAAGJ;IACrE,MAAM,EAAEK,kBAAkB,EAAE,GAAGF;IAE/B,IAAIG,cAAcP;IAClB,IAAIK,eACFE,cAAc,CAAC,yBAAyB,EAAEF,cAAc,8CAA8C,EAAEL,gBAAgB,mBAAmB,CAAC;IAG9I,MAAMQ,eAAeC,4BAA4BF;IAEjD,MAAMG,mBAAmBP,QAAQ,UAAU,CAAC,MAAM;IAElDD,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKQ;gBACP;YACF;SACD;IACH;IAEA,MAAMC,MAAM,MAAMC,yBAChB;QACE;YACE,MAAM;YACN,SAASJ;QACX;WACGN,oBAAoB,QAAQ;KAChC,EACDE;IAGF,IAAIS;IACJ,IAAIC;IAEJ,IAAI;QACFD,gBAAgBE,yBAAyBJ,IAAI,OAAO;QAEpD,MAAM,EAAEK,QAAQ,EAAE,GAAGb;QACrB,MAAMc,cAAcC,aAAa;YAC/B,YAAYL;YACZ,QAAQ;gBAAC;gBAAM;aAAK;YACpB,eAAe;gBACb,OAAOG,SAAS,KAAK;gBACrB,QAAQA,SAAS,MAAM;YACzB;YACA,UAAUV;QACZ;QACAQ,SAASG,YAAY,MAAM;IAC7B,EAAE,OAAOE,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,qBACR,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEA,MAAM,EAAEK,QAAQ,EAAE,GAAGb;IAErBb,MACE,oBACAgB,oBACA,YACAkB,KAAK,SAAS,CAACV;IAGjB,MAAMY,mBAAqC,EAAE;IAC7C,MAAMC,mBAA6D,EAAE;IACrE,IAAIC,iBAAiB;IACrBd,OAAO,OAAO,CAAC,CAACe;QACd,MAAMC,aAAcD,AAAAA,CAAAA,OAAO,WAAW,IAAI,EAAC,EAAG,WAAW;QACzD,IAAIC,AAAe,YAAfA,YAAwB;YAC1BC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMlC,QAAQqC,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEb;YAEvD,MAAMiB,SAAS;gBACb,QAAQJ,OAAO,OAAO,IAAI;gBAC1B,MAAMnC,YACJ;oBAAE,GAAGC,KAAK,CAAC,EAAE;oBAAE,GAAGA,KAAK,CAAC,EAAE;gBAAC,GAC3BqB,SAAS,KAAK,EACdA,SAAS,MAAM;YAEnB;YAEAU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQO;gBACV;YACF;QACF,OAAO,IAAIH,AAAe,kBAAfA,YAA8B;YACvCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMlC,QAAQqC,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEb;YAEvD,MAAMiB,SAAS;gBACb,QAAQJ,OAAO,OAAO,IAAI;gBAC1B,MAAMnC,YACJ;oBAAE,GAAGC,KAAK,CAAC,EAAE;oBAAE,GAAGA,KAAK,CAAC,EAAE;gBAAC,GAC3BqB,SAAS,KAAK,EACdA,SAAS,MAAM;YAEnB;YAEAU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQO;gBACV;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,mBAAfA,YAA+B;YACxCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMlC,QAAQqC,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEb;YAEvD,MAAMiB,SAAS;gBACb,QAAQJ,OAAO,OAAO,IAAI;gBAC1B,MAAMnC,YACJ;oBAAE,GAAGC,KAAK,CAAC,EAAE;oBAAE,GAAGA,KAAK,CAAC,EAAE;gBAAC,GAC3BqB,SAAS,KAAK,EACdA,SAAS,MAAM;YAEnB;YAEAU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQO;gBACV;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YAAuB;YAChCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCE,OAAOF,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMK,aAAaF,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEb;YAC5D,MAAMmB,WAAWH,SAASH,OAAO,aAAa,CAAC,OAAO,EAAEb;YACxDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,MAAM;wBACJ,QAAQG,OAAO,OAAO,IAAI;wBAC1B,MAAMnC,YACJ;4BAAE,GAAGwC,UAAU,CAAC,EAAE;4BAAE,GAAGA,UAAU,CAAC,EAAE;wBAAC,GACrClB,SAAS,KAAK,EACdA,SAAS,MAAM;oBAEnB;oBACA,IAAI;wBACF,QAAQa,OAAO,OAAO,IAAI;wBAC1B,MAAMnC,YACJ;4BAAE,GAAGyC,QAAQ,CAAC,EAAE;4BAAE,GAAGA,QAAQ,CAAC,EAAE;wBAAC,GACjCnB,SAAS,KAAK,EACdA,SAAS,MAAM;oBAEnB;gBACF;gBACA,SAASa,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,OAAOG,OAAO,aAAa,CAAC,OAAO;YACrC;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,aAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWG,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,eAAfA,YAA2B;YACpCF,iBAAiB;YACjBF,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO,CAAC;gBACR,SAASG,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,aAAfA,YACT,IAAKD,OAAO,aAAa,CAAC,GAAG,EAEtB;YACL,MAAMO,OAAOC,qBAAqBR,OAAO,aAAa,CAAC,GAAG;YAE1DH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,SAASU,KAAK,IAAI,CAAC;gBACrB;gBACA,SAASP,OAAO,OAAO,IAAI;YAC7B;QACF,OAXErC,QAAQ;aAYL,IAAIsC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,SAASG,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,YAAY;YAErBH,iBAAiB,IAAI,CAAC;gBACpB,MAAMG;gBACN,SAASD,OAAO,OAAO,IAAI;YAC7B;YACAvC,MAAM,0BAA0BwC,YAAY,YAAYD,OAAO,OAAO;QACxE;IACF;IAEA,IAAIH,AAA4B,MAA5BA,iBAAiB,MAAM,EAAQ;QACjC,MAAMY,eAAyB,EAAE;QAGjC,IAAIxB,AAAkB,MAAlBA,OAAO,MAAM,EAAQ;YACvBwB,aAAa,IAAI,CAAC;YAGlB,IACE3B,IAAI,OAAO,CAAC,QAAQ,CAAC,eACrB,CAACA,IAAI,OAAO,CAAC,QAAQ,CAAC,YAEtB2B,aAAa,IAAI,CACf;iBAGFA,aAAa,IAAI,CAAC;QAEtB;QAGA,IAAIX,iBAAiB,MAAM,GAAG,GAAG;YAC/B,MAAMY,QAAQZ,iBAAiB,GAAG,CAAC,CAACa,IAAMA,EAAE,IAAI,EAAE,IAAI,CAAC;YACvDF,aAAa,IAAI,CAAC,CAAC,wBAAwB,EAAEC,OAAO;QACtD;QAEA,MAAMnB,eAAe;YACnB;eACGkB;SACJ,CAAC,IAAI,CAAC;QAGP,MAAM,IAAIf,qBACRH,cACAI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEArB,MAAM,oBAAoBkC,KAAK,SAAS,CAACE,kBAAkB,MAAM;IACjE,MAAMe,MAAMC,WAAW/B,IAAI,OAAO;IAElCT,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAASuC;IACX;IAEA,OAAO;QACL,SAASf;QACTe;QACA,OAAO9B,IAAI,KAAK;QAChB,aAAaa,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW;QACpD,wBAAwBG;IAC1B;AACF;AAOA,SAASb,yBAAyB4B,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAI1D,KAAK,KAAK,CAAEqD,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMG,IAAI3D,KAAK,KAAK,CAAEuD,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEC,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAcf,KAAK,OAAO,CAAC,YAAY;IAC7C,OAAOe,YAAY,OAAO,CAACd,SAASC,cAAc,IAAI;AACxD;AAEA,SAASb,SAAS2B,QAAgB,EAAEC,IAAuC;IACzE,MAAM,CAACJ,GAAGC,EAAE,GAAGjC,KAAK,KAAK,CAACmC;IAC1B,OAAO;QAACH,IAAII,KAAK,KAAK;QAAEH,IAAIG,KAAK,MAAM;KAAC;AAC1C"}
@@ -100,7 +100,7 @@ function adaptBbox(bbox, width, height, modelFamily) {
100
100
  0,
101
101
  0
102
102
  ];
103
- result = 'doubao-vision' === modelFamily || isUITars(modelFamily) ? adaptDoubaoBbox(normalizedBbox, width, height) : 'gemini' === modelFamily ? adaptGeminiBbox(normalizedBbox, width, height) : 'qwen2.5-vl' === modelFamily ? adaptQwen2_5Bbox(normalizedBbox) : normalized01000(normalizedBbox, width, height);
103
+ result = 'doubao-vision' === modelFamily || 'doubao-seed' === modelFamily || isUITars(modelFamily) ? adaptDoubaoBbox(normalizedBbox, width, height) : 'gemini' === modelFamily ? adaptGeminiBbox(normalizedBbox, width, height) : 'qwen2.5-vl' === modelFamily ? adaptQwen2_5Bbox(normalizedBbox) : normalized01000(normalizedBbox, width, height);
104
104
  return result;
105
105
  }
106
106
  function normalized01000(bbox, width, height) {
@@ -224,8 +224,7 @@ const PointSchema = z.object({
224
224
  });
225
225
  const SizeSchema = z.object({
226
226
  width: z.number(),
227
- height: z.number(),
228
- dpr: z.number().optional()
227
+ height: z.number()
229
228
  });
230
229
  const RectSchema = PointSchema.and(SizeSchema).and(z.object({
231
230
  zoom: z.number().optional()
@@ -253,18 +252,6 @@ const MidsceneLocationInput = z.object({
253
252
  z.boolean()
254
253
  ]).optional()
255
254
  }).passthrough();
256
- z.object({
257
- [locateFieldFlagName]: z.literal(true),
258
- prompt: TUserPromptSchema,
259
- deepThink: z.boolean().optional(),
260
- cacheable: z.boolean().optional(),
261
- xpath: z.boolean().optional(),
262
- center: z.tuple([
263
- z.number(),
264
- z.number()
265
- ]),
266
- rect: RectSchema
267
- }).passthrough();
268
255
  const getMidsceneLocationSchema = ()=>MidsceneLocationInput;
269
256
  const ifMidsceneLocatorField = (field)=>{
270
257
  let actualField = field;