@ui-tars-test/agent-sdk 0.3.12 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"ToolCallEngine.d.ts","sourceRoot":"","sources":["../src/ToolCallEngine.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,IAAI,EACJ,mCAAmC,EACnC,0BAA0B,EAC1B,mCAAmC,EACnC,mBAAmB,EACnB,wBAAwB,EACxB,gBAAgB,EAChB,0BAA0B,EAE1B,mBAAmB,EACnB,qBAAqB,EACrB,iBAAiB,EAClB,MAAM,qCAAqC,CAAC;AAK7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAC;AAKhE;;;;;;;;;GASG;AACH,qBAAa,sBAAuB,SAAQ,cAAc;IACxD,OAAO,CAAC,kBAAkB,CAAC,CAAqB;gBAEpC,kBAAkB,CAAC,EAAE,kBAAkB;IAKnD;;OAEG;IACH,aAAa,CAAC,YAAY,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM;IAI1D;;;;OAIG;IACH,cAAc,CAAC,OAAO,EAAE,mCAAmC,GAAG,0BAA0B;IAaxF;;;;OAIG;IACH,yBAAyB,IAAI,qBAAqB;IASlD;;;;OAIG;IACH,qBAAqB,CACnB,KAAK,EAAE,mBAAmB,EAC1B,KAAK,EAAE,qBAAqB,GAC3B,iBAAiB;IAsBpB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAI1B;;OAEG;IACH,wBAAwB,CAAC,KAAK,EAAE,qBAAqB,GAAG,mBAAmB;IAiG3E;;;;;OAKG;IACH,+BAA+B,CAC7B,yBAAyB,EAAE,gBAAgB,CAAC,qBAAqB,GAChE,mCAAmC;IAOtC;;;;;OAKG;IACH,qCAAqC,CACnC,eAAe,EAAE,wBAAwB,EAAE,GAC1C,0BAA0B,EAAE;CAchC"}
1
+ {"version":3,"file":"ToolCallEngine.d.ts","sourceRoot":"","sources":["../src/ToolCallEngine.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,IAAI,EACJ,mCAAmC,EACnC,0BAA0B,EAC1B,mCAAmC,EACnC,mBAAmB,EACnB,wBAAwB,EACxB,gBAAgB,EAChB,0BAA0B,EAE1B,mBAAmB,EACnB,qBAAqB,EACrB,iBAAiB,EAClB,MAAM,qCAAqC,CAAC;AAK7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAC;AAKhE;;;;;;;;;GASG;AACH,qBAAa,sBAAuB,SAAQ,cAAc;IACxD,OAAO,CAAC,kBAAkB,CAAC,CAAqB;gBAEpC,kBAAkB,CAAC,EAAE,kBAAkB;IAKnD;;OAEG;IACH,aAAa,CAAC,YAAY,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM;IAI1D;;;;OAIG;IACH,cAAc,CAAC,OAAO,EAAE,mCAAmC,GAAG,0BAA0B;IAgBxF;;;;OAIG;IACH,yBAAyB,IAAI,qBAAqB;IASlD;;;;OAIG;IACH,qBAAqB,CACnB,KAAK,EAAE,mBAAmB,EAC1B,KAAK,EAAE,qBAAqB,GAC3B,iBAAiB;IA0BpB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAI1B;;OAEG;IACH,wBAAwB,CAAC,KAAK,EAAE,qBAAqB,GAAG,mBAAmB;IA2J3E;;;;;OAKG;IACH,+BAA+B,CAC7B,yBAAyB,EAAE,gBAAgB,CAAC,qBAAqB,GAChE,mCAAmC;IAOtC;;;;;OAKG;IACH,qCAAqC,CACnC,eAAe,EAAE,wBAAwB,EAAE,GAC1C,0BAA0B,EAAE;CAchC"}
@@ -58,7 +58,9 @@ class GUIAgentToolCallEngine extends tarko_agent_interface_namespaceObject.ToolC
58
58
  model: context.model,
59
59
  messages: context.messages,
60
60
  temperature: context.temperature || 0.7,
61
- stream: true
61
+ stream: true,
62
+ tool_choice: 'none',
63
+ tools: void 0
62
64
  };
63
65
  }
64
66
  initStreamProcessingState() {
@@ -70,12 +72,14 @@ class GUIAgentToolCallEngine extends tarko_agent_interface_namespaceObject.ToolC
70
72
  };
71
73
  }
72
74
  processStreamingChunk(chunk, state) {
73
- var _chunk_choices_, _chunk_choices_1;
75
+ var _chunk_choices_, _chunk_choices_1, _chunk_choices_2;
74
76
  const delta = null == (_chunk_choices_ = chunk.choices[0]) ? void 0 : _chunk_choices_.delta;
75
- if (null == delta ? void 0 : delta.content) state.contentBuffer += delta.content;
76
- if (null == (_chunk_choices_1 = chunk.choices[0]) ? void 0 : _chunk_choices_1.finish_reason) state.finishReason = chunk.choices[0].finish_reason;
77
+ const message = null == (_chunk_choices_1 = chunk.choices[0]) ? void 0 : _chunk_choices_1.message;
78
+ const content = (null == delta ? void 0 : delta.content) || (null == message ? void 0 : message.content) || '';
79
+ if (content) state.contentBuffer += content;
80
+ if (null == (_chunk_choices_2 = chunk.choices[0]) ? void 0 : _chunk_choices_2.finish_reason) state.finishReason = chunk.choices[0].finish_reason;
77
81
  return {
78
- content: (null == delta ? void 0 : delta.content) || '',
82
+ content: content,
79
83
  reasoningContent: '',
80
84
  hasToolCallUpdate: false,
81
85
  toolCalls: []
@@ -86,6 +90,9 @@ class GUIAgentToolCallEngine extends tarko_agent_interface_namespaceObject.ToolC
86
90
  }
87
91
  finalizeStreamProcessing(state) {
88
92
  const fullContent = state.contentBuffer;
93
+ console.log('[DEBUG] Full content length:', fullContent.length);
94
+ console.log('[DEBUG] Full content prefix:', fullContent.slice(0, 100));
95
+ console.log('[DEBUG] Full content suffix:', fullContent.slice(-100));
89
96
  defaultLogger.log("\u3010New Sys Prompt'\u3011 Model Response:", fullContent);
90
97
  defaultLogger.log('[finalizeStreamProcessing] fullContent', fullContent);
91
98
  console.log('[CLI DEBUG] [ToolCallEngine] Full model response received:', fullContent);
@@ -95,6 +102,41 @@ class GUIAgentToolCallEngine extends tarko_agent_interface_namespaceObject.ToolC
95
102
  defaultLogger.log('[finalizeStreamProcessing] Using custom action parser');
96
103
  console.log('[CLI DEBUG] [ToolCallEngine] Using custom action parser');
97
104
  }
105
+ if (/<seed:tool_call_never_used_/.test(fullContent)) {
106
+ console.log('[CLI DEBUG] [ToolCallEngine] Detected custom XML format. Attempting custom regex parser.');
107
+ try {
108
+ const actions = [];
109
+ const functionRegex = /<function_[^=>]*=([a-zA-Z0-9_]+)>([\s\S]*?)<\/function_[^>]*>/g;
110
+ let match;
111
+ while(null !== (match = functionRegex.exec(fullContent))){
112
+ const actionName = match[1];
113
+ const innerContent = match[2];
114
+ const args = {};
115
+ const paramRegex = /<parameter_[^=>]*=([a-zA-Z0-9_]+)>([\s\S]*?)<\/parameter_[^>]*>/g;
116
+ let paramMatch;
117
+ while(null !== (paramMatch = paramRegex.exec(innerContent)))args[paramMatch[1]] = paramMatch[2].trim();
118
+ actions.push({
119
+ type: actionName,
120
+ inputs: args,
121
+ thought: ''
122
+ });
123
+ }
124
+ const thoughtRegex = /<think_[^>]*>([\s\S]*?)<\/think_[^>]*>/g;
125
+ const thoughtMatch = thoughtRegex.exec(fullContent);
126
+ const thoughtContent = thoughtMatch ? thoughtMatch[1].trim() : '';
127
+ if (actions.length > 0) {
128
+ console.log(`[CLI DEBUG] [ToolCallEngine] Custom regex parser found ${actions.length} actions.`);
129
+ parsedGUIResponse = {
130
+ errorMessage: '',
131
+ rawContent: fullContent,
132
+ actions: actions,
133
+ reasoningContent: thoughtContent
134
+ };
135
+ }
136
+ } catch (e) {
137
+ console.error('[CLI DEBUG] [ToolCallEngine] Custom regex parser error:', e);
138
+ }
139
+ }
98
140
  if (!parsedGUIResponse) {
99
141
  console.log('[CLI DEBUG] [ToolCallEngine] Using default action parser (XML parser)');
100
142
  parsedGUIResponse = defaultParser.parsePrediction(fullContent);
@@ -1 +1 @@
1
- {"version":3,"file":"ToolCallEngine.js","sources":["webpack://@ui-tars-test/agent-sdk/webpack/runtime/define_property_getters","webpack://@ui-tars-test/agent-sdk/webpack/runtime/has_own_property","webpack://@ui-tars-test/agent-sdk/webpack/runtime/make_namespace_object","webpack://@ui-tars-test/agent-sdk/./src/ToolCallEngine.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import {\n ToolCallEngine,\n Tool,\n ToolCallEnginePrepareRequestContext,\n ChatCompletionCreateParams,\n ChatCompletionAssistantMessageParam,\n ChatCompletionChunk,\n MultimodalToolCallResult,\n AgentEventStream,\n ChatCompletionMessageParam,\n ChatCompletionMessageToolCall,\n ParsedModelResponse,\n StreamProcessingState,\n StreamChunkResult,\n} from '@ui-tars-test/tarko-agent-interface';\nimport { DefaultActionParser } from '@ui-tars-test/action-parser';\nimport { GUI_ADAPTED_TOOL_NAME } from './constants';\nimport { ConsoleLogger, LogLevel } from '@agent-infra/logger';\nimport { serializeAction } from '@ui-tars-test/shared/utils';\nimport { CustomActionParser } from '@ui-tars-test/shared/types';\n\nconst defaultParser = new DefaultActionParser();\nconst defaultLogger = new ConsoleLogger('[GUIAgent:ToolCallEngine]', LogLevel.DEBUG);\n\n/**\n * GUIAgentToolCallEngine - Minimal prompt engineering tool call engine\n *\n * This is the simplest possible implementation of a tool call engine that:\n * 1. Uses prompt engineering to instruct the LLM to output tool calls in a specific format\n * 2. Parses tool calls from LLM response text using simple regex matching\n * 3. Does not support streaming (focuses on core functionality only)\n *\n * Format used: <tool_call>{\"name\": \"tool_name\", \"arguments\": {...}}</tool_call>\n */\nexport class GUIAgentToolCallEngine extends ToolCallEngine {\n private customActionParser?: CustomActionParser;\n\n constructor(customActionParser?: CustomActionParser) {\n super();\n this.customActionParser = customActionParser;\n }\n\n /**\n * Prepare system prompt with tool information and instructions\n */\n preparePrompt(instructions: string, tools: Tool[]): string {\n return instructions;\n }\n\n /**\n * Prepare request parameters for the LLM\n *\n * FIXME: move to base tool call engine.\n */\n prepareRequest(context: ToolCallEnginePrepareRequestContext): ChatCompletionCreateParams {\n defaultLogger.log(\n \"【New Sys Prompt'】 System Prompt:\",\n JSON.stringify(context.messages.find((m) => m.role === 'system')?.content || ''),\n );\n return {\n model: context.model,\n messages: context.messages,\n temperature: context.temperature || 0.7,\n stream: true,\n };\n }\n\n /**\n * Initialize processing state (minimal implementation)\n *\n * FIXME: move to base tool call engine.\n */\n initStreamProcessingState(): StreamProcessingState {\n return {\n contentBuffer: '',\n toolCalls: [],\n reasoningBuffer: '',\n finishReason: null,\n };\n }\n\n /**\n * Process streaming chunks - simply accumulate content\n *\n * FIXME: make it optional\n */\n processStreamingChunk(\n chunk: ChatCompletionChunk,\n state: StreamProcessingState,\n ): StreamChunkResult {\n const delta = chunk.choices[0]?.delta;\n\n // Accumulate content\n if (delta?.content) {\n state.contentBuffer += delta.content;\n }\n\n // Record finish reason\n if (chunk.choices[0]?.finish_reason) {\n state.finishReason = chunk.choices[0].finish_reason;\n }\n\n // Return incremental content without tool call detection during streaming\n return {\n content: delta?.content || '',\n reasoningContent: '',\n hasToolCallUpdate: false,\n toolCalls: [],\n };\n }\n\n /**\n * Generate a tool call ID\n */\n private generateToolCallId(): string {\n return `call_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`;\n }\n\n /**\n * Extract tool calls from complete response text\n */\n finalizeStreamProcessing(state: StreamProcessingState): ParsedModelResponse {\n const fullContent = state.contentBuffer;\n defaultLogger.log(\"【New Sys Prompt'】 Model Response:\", fullContent);\n defaultLogger.log('[finalizeStreamProcessing] fullContent', fullContent);\n\n // Add explicit log to confirm XML parsing intent\n console.log('[CLI DEBUG] [ToolCallEngine] Full model response received:', fullContent);\n\n // Try custom action parser first if available\n let parsedGUIResponse = null;\n if (this.customActionParser) {\n parsedGUIResponse = this.customActionParser(fullContent);\n defaultLogger.log('[finalizeStreamProcessing] Using custom action parser');\n console.log('[CLI DEBUG] [ToolCallEngine] Using custom action parser');\n }\n\n // Fall back to default parser if custom parser is not available or returns null\n if (!parsedGUIResponse) {\n console.log('[CLI DEBUG] [ToolCallEngine] Using default action parser (XML parser)');\n parsedGUIResponse = defaultParser.parsePrediction(fullContent);\n defaultLogger.log('[finalizeStreamProcessing] Using default action parser');\n }\n\n if (parsedGUIResponse) {\n console.log(\n '[CLI DEBUG] [ToolCallEngine] Parsed response:',\n JSON.stringify(parsedGUIResponse, null, 2),\n );\n } else {\n console.log('[CLI DEBUG] [ToolCallEngine] Parsing failed or returned null');\n }\n\n if (!parsedGUIResponse || parsedGUIResponse.errorMessage) {\n return {\n content: '',\n rawContent: fullContent,\n toolCalls: [\n {\n id: this.generateToolCallId(),\n type: 'function',\n function: {\n name: GUI_ADAPTED_TOOL_NAME,\n arguments: JSON.stringify({\n action: '',\n step: '',\n thought: '',\n operator_action: null,\n errorMessage:\n parsedGUIResponse?.errorMessage ?? 'Failed to parse GUI Action from output',\n }),\n },\n },\n ],\n finishReason: 'tool_calls',\n };\n }\n\n const toolCalls: ChatCompletionMessageToolCall[] = [];\n\n let finished = false;\n let finishMessage: string | null = null;\n for (const action of parsedGUIResponse.actions) {\n if (action.type === 'finished') {\n finished = true;\n finishMessage = action.inputs?.content ?? null;\n continue;\n }\n toolCalls.push({\n id: this.generateToolCallId(),\n type: 'function',\n function: {\n name: GUI_ADAPTED_TOOL_NAME,\n arguments: JSON.stringify({\n action: serializeAction(action),\n step: '',\n thought: parsedGUIResponse.reasoningContent ?? '',\n operator_action: action,\n }),\n },\n });\n }\n\n const content = finishMessage ?? '';\n const reasoningContent = parsedGUIResponse.reasoningContent ?? '';\n const contentForWebUI = content.replace(/\\\\n|\\n/g, '<br>');\n const reasoningContentForWebUI = reasoningContent.replace(/\\\\n|\\n/g, '<br>');\n\n // No tool calls found - return regular response\n return {\n content: contentForWebUI,\n rawContent: fullContent,\n reasoningContent: reasoningContentForWebUI,\n toolCalls,\n finishReason: toolCalls.length > 0 && !finished ? 'tool_calls' : 'stop',\n };\n }\n\n /**\n * Build assistant message for conversation history\n * For PE engines, we preserve the raw content including tool call markup\n *\n * FIXME: move to base tool call engine.\n */\n buildHistoricalAssistantMessage(\n currentLoopAssistantEvent: AgentEventStream.AssistantMessageEvent,\n ): ChatCompletionAssistantMessageParam {\n return {\n role: 'assistant',\n content: currentLoopAssistantEvent.rawContent || currentLoopAssistantEvent.content,\n };\n }\n\n /**\n * Build tool result messages as user messages\n * PE engines format tool results as user input for next iteration\n *\n * FIXME: move to base tool call engine.\n */\n buildHistoricalToolCallResultMessages(\n toolCallResults: MultimodalToolCallResult[],\n ): ChatCompletionMessageParam[] {\n return toolCallResults.map((result) => {\n // Extract text content from multimodal result\n const textContent = result.content\n .filter((part) => part.type === 'text')\n .map((part) => (part as { text: string }).text)\n .join('');\n\n return {\n role: 'user',\n content: `Tool \"${result.toolName}\" result:\\n${textContent}`,\n };\n });\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","defaultParser","DefaultActionParser","defaultLogger","ConsoleLogger","LogLevel","GUIAgentToolCallEngine","ToolCallEngine","instructions","tools","context","_context_messages_find","JSON","m","chunk","state","_chunk_choices_","_chunk_choices_1","delta","Date","Math","fullContent","console","parsedGUIResponse","GUI_ADAPTED_TOOL_NAME","toolCalls","finished","finishMessage","action","_action_inputs","serializeAction","content","reasoningContent","contentForWebUI","reasoningContentForWebUI","currentLoopAssistantEvent","toolCallResults","result","textContent","part","customActionParser"],"mappings":";;;;;;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;ACeA,MAAMI,gBAAgB,IAAIC,8BAAAA,mBAAmBA;AAC7C,MAAMC,gBAAgB,IAAIC,uBAAAA,aAAaA,CAAC,6BAA6BC,uBAAAA,QAAAA,CAAAA,KAAc;AAY5E,MAAMC,+BAA+BC,sCAAAA,cAAcA;IAWxD,cAAcC,YAAoB,EAAEC,KAAa,EAAU;QACzD,OAAOD;IACT;IAOA,eAAeE,OAA4C,EAA8B;YAGtEC;QAFjBR,cAAc,GAAG,CACf,8CACAS,KAAK,SAAS,CAACD,AAAAA,SAAAA,CAAAA,yBAAAA,QAAQ,QAAQ,CAAC,IAAI,CAAC,CAACE,IAAMA,AAAW,aAAXA,EAAE,IAAI,CAAa,IAAhDF,KAAAA,IAAAA,uBAAmD,OAAO,AAAD,KAAK;QAE/E,OAAO;YACL,OAAOD,QAAQ,KAAK;YACpB,UAAUA,QAAQ,QAAQ;YAC1B,aAAaA,QAAQ,WAAW,IAAI;YACpC,QAAQ;QACV;IACF;IAOA,4BAAmD;QACjD,OAAO;YACL,eAAe;YACf,WAAW,EAAE;YACb,iBAAiB;YACjB,cAAc;QAChB;IACF;IAOA,sBACEI,KAA0B,EAC1BC,KAA4B,EACT;YACLC,iBAQVC;QARJ,MAAMC,QAAQ,QAAAF,CAAAA,kBAAAA,MAAM,OAAO,CAAC,EAAE,AAAD,IAAfA,KAAAA,IAAAA,gBAAkB,KAAK;QAGrC,IAAIE,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,OAAO,EAChBH,MAAM,aAAa,IAAIG,MAAM,OAAO;QAItC,IAAI,QAAAD,CAAAA,mBAAAA,MAAM,OAAO,CAAC,EAAE,AAAD,IAAfA,KAAAA,IAAAA,iBAAkB,aAAa,EACjCF,MAAM,YAAY,GAAGD,MAAM,OAAO,CAAC,EAAE,CAAC,aAAa;QAIrD,OAAO;YACL,SAASI,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,OAAO,AAAD,KAAK;YAC3B,kBAAkB;YAClB,mBAAmB;YACnB,WAAW,EAAE;QACf;IACF;IAKQ,qBAA6B;QACnC,OAAO,CAAC,KAAK,EAAEC,KAAK,GAAG,GAAG,CAAC,EAAEC,KAAK,MAAM,GAAG,QAAQ,CAAC,IAAI,SAAS,CAAC,GAAG,KAAK;IAC5E;IAKA,yBAAyBL,KAA4B,EAAuB;QAC1E,MAAMM,cAAcN,MAAM,aAAa;QACvCZ,cAAc,GAAG,CAAC,+CAAqCkB;QACvDlB,cAAc,GAAG,CAAC,0CAA0CkB;QAG5DC,QAAQ,GAAG,CAAC,8DAA8DD;QAG1E,IAAIE,oBAAoB;QACxB,IAAI,IAAI,CAAC,kBAAkB,EAAE;YAC3BA,oBAAoB,IAAI,CAAC,kBAAkB,CAACF;YAC5ClB,cAAc,GAAG,CAAC;YAClBmB,QAAQ,GAAG,CAAC;QACd;QAGA,IAAI,CAACC,mBAAmB;YACtBD,QAAQ,GAAG,CAAC;YACZC,oBAAoBtB,cAAc,eAAe,CAACoB;YAClDlB,cAAc,GAAG,CAAC;QACpB;QAEA,IAAIoB,mBACFD,QAAQ,GAAG,CACT,iDACAV,KAAK,SAAS,CAACW,mBAAmB,MAAM;aAG1CD,QAAQ,GAAG,CAAC;QAGd,IAAI,CAACC,qBAAqBA,kBAAkB,YAAY,EACtD,OAAO;YACL,SAAS;YACT,YAAYF;YACZ,WAAW;gBACT;oBACE,IAAI,IAAI,CAAC,kBAAkB;oBAC3B,MAAM;oBACN,UAAU;wBACR,MAAMG,sCAAAA,qBAAqBA;wBAC3B,WAAWZ,KAAK,SAAS,CAAC;4BACxB,QAAQ;4BACR,MAAM;4BACN,SAAS;4BACT,iBAAiB;4BACjB,cACEW,AAAAA,CAAAA,QAAAA,oBAAAA,KAAAA,IAAAA,kBAAmB,YAAY,AAAD,KAAK;wBACvC;oBACF;gBACF;aACD;YACD,cAAc;QAChB;QAGF,MAAME,YAA6C,EAAE;QAErD,IAAIC,WAAW;QACf,IAAIC,gBAA+B;QACnC,KAAK,MAAMC,UAAUL,kBAAkB,OAAO,CAAE;YAC9C,IAAIK,AAAgB,eAAhBA,OAAO,IAAI,EAAiB;oBAEdC;gBADhBH,WAAW;gBACXC,gBAAgBE,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,MAAM,AAAD,IAAZA,KAAAA,IAAAA,eAAe,OAAO,AAAD,KAAK;gBAC1C;YACF;YACAJ,UAAU,IAAI,CAAC;gBACb,IAAI,IAAI,CAAC,kBAAkB;gBAC3B,MAAM;gBACN,UAAU;oBACR,MAAMD,sCAAAA,qBAAqBA;oBAC3B,WAAWZ,KAAK,SAAS,CAAC;wBACxB,QAAQkB,AAAAA,IAAAA,sBAAAA,eAAAA,AAAAA,EAAgBF;wBACxB,MAAM;wBACN,SAASL,kBAAkB,gBAAgB,IAAI;wBAC/C,iBAAiBK;oBACnB;gBACF;YACF;QACF;QAEA,MAAMG,UAAUJ,iBAAiB;QACjC,MAAMK,mBAAmBT,kBAAkB,gBAAgB,IAAI;QAC/D,MAAMU,kBAAkBF,QAAQ,OAAO,CAAC,WAAW;QACnD,MAAMG,2BAA2BF,iBAAiB,OAAO,CAAC,WAAW;QAGrE,OAAO;YACL,SAASC;YACT,YAAYZ;YACZ,kBAAkBa;YAClBT;YACA,cAAcA,UAAU,MAAM,GAAG,KAAK,CAACC,WAAW,eAAe;QACnE;IACF;IAQA,gCACES,yBAAiE,EAC5B;QACrC,OAAO;YACL,MAAM;YACN,SAASA,0BAA0B,UAAU,IAAIA,0BAA0B,OAAO;QACpF;IACF;IAQA,sCACEC,eAA2C,EACb;QAC9B,OAAOA,gBAAgB,GAAG,CAAC,CAACC;YAE1B,MAAMC,cAAcD,OAAO,OAAO,CAC/B,MAAM,CAAC,CAACE,OAASA,AAAc,WAAdA,KAAK,IAAI,EAC1B,GAAG,CAAC,CAACA,OAAUA,KAA0B,IAAI,EAC7C,IAAI,CAAC;YAER,OAAO;gBACL,MAAM;gBACN,SAAS,CAAC,MAAM,EAAEF,OAAO,QAAQ,CAAC,WAAW,EAAEC,aAAa;YAC9D;QACF;IACF;IAzNA,YAAYE,kBAAuC,CAAE;QACnD,KAAK,IAHP,uBAAQ,sBAAR;QAIE,IAAI,CAAC,kBAAkB,GAAGA;IAC5B;AAuNF"}
1
+ {"version":3,"file":"ToolCallEngine.js","sources":["webpack://@ui-tars-test/agent-sdk/webpack/runtime/define_property_getters","webpack://@ui-tars-test/agent-sdk/webpack/runtime/has_own_property","webpack://@ui-tars-test/agent-sdk/webpack/runtime/make_namespace_object","webpack://@ui-tars-test/agent-sdk/./src/ToolCallEngine.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import {\n ToolCallEngine,\n Tool,\n ToolCallEnginePrepareRequestContext,\n ChatCompletionCreateParams,\n ChatCompletionAssistantMessageParam,\n ChatCompletionChunk,\n MultimodalToolCallResult,\n AgentEventStream,\n ChatCompletionMessageParam,\n ChatCompletionMessageToolCall,\n ParsedModelResponse,\n StreamProcessingState,\n StreamChunkResult,\n} from '@ui-tars-test/tarko-agent-interface';\nimport { DefaultActionParser } from '@ui-tars-test/action-parser';\nimport { GUI_ADAPTED_TOOL_NAME } from './constants';\nimport { ConsoleLogger, LogLevel } from '@agent-infra/logger';\nimport { serializeAction } from '@ui-tars-test/shared/utils';\nimport { CustomActionParser } from '@ui-tars-test/shared/types';\n\nconst defaultParser = new DefaultActionParser();\nconst defaultLogger = new ConsoleLogger('[GUIAgent:ToolCallEngine]', LogLevel.DEBUG);\n\n/**\n * GUIAgentToolCallEngine - Minimal prompt engineering tool call engine\n *\n * This is the simplest possible implementation of a tool call engine that:\n * 1. Uses prompt engineering to instruct the LLM to output tool calls in a specific format\n * 2. Parses tool calls from LLM response text using simple regex matching\n * 3. Does not support streaming (focuses on core functionality only)\n *\n * Format used: <tool_call>{\"name\": \"tool_name\", \"arguments\": {...}}</tool_call>\n */\nexport class GUIAgentToolCallEngine extends ToolCallEngine {\n private customActionParser?: CustomActionParser;\n\n constructor(customActionParser?: CustomActionParser) {\n super();\n this.customActionParser = customActionParser;\n }\n\n /**\n * Prepare system prompt with tool information and instructions\n */\n preparePrompt(instructions: string, tools: Tool[]): string {\n return instructions;\n }\n\n /**\n * Prepare request parameters for the LLM\n *\n * FIXME: move to base tool call engine.\n */\n prepareRequest(context: ToolCallEnginePrepareRequestContext): ChatCompletionCreateParams {\n defaultLogger.log(\n \"【New Sys Prompt'】 System Prompt:\",\n JSON.stringify(context.messages.find((m) => m.role === 'system')?.content || ''),\n );\n return {\n model: context.model,\n messages: context.messages,\n temperature: context.temperature || 0.7,\n stream: true,\n // 强制禁用 Native Function Calling,确保模型将 Action 输出到 content 中(以 XML 格式)\n tool_choice: 'none',\n tools: undefined, // 显式移除 tools 参数,防止某些 provider 在 tool_choice: none 时依然启用 function calling\n };\n }\n\n /**\n * Initialize processing state (minimal implementation)\n *\n * FIXME: move to base tool call engine.\n */\n initStreamProcessingState(): StreamProcessingState {\n return {\n contentBuffer: '',\n toolCalls: [],\n reasoningBuffer: '',\n finishReason: null,\n };\n }\n\n /**\n * Process streaming chunks - simply accumulate content\n *\n * FIXME: make it optional\n */\n processStreamingChunk(\n chunk: ChatCompletionChunk,\n state: StreamProcessingState,\n ): StreamChunkResult {\n // For non-streaming requests, the entire response comes in one chunk\n const delta = chunk.choices[0]?.delta;\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n const message = (chunk.choices[0] as any)?.message;\n\n // Accumulate content from delta (streaming) or message (non-streaming)\n const content = delta?.content || message?.content || '';\n if (content) {\n state.contentBuffer += content;\n }\n\n // Record finish reason\n if (chunk.choices[0]?.finish_reason) {\n state.finishReason = chunk.choices[0].finish_reason;\n }\n\n // Return incremental content without tool call detection during streaming\n return {\n content: content,\n reasoningContent: '',\n hasToolCallUpdate: false,\n toolCalls: [],\n };\n }\n\n /**\n * Generate a tool call ID\n */\n private generateToolCallId(): string {\n return `call_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`;\n }\n\n /**\n * Extract tool calls from complete response text\n */\n finalizeStreamProcessing(state: StreamProcessingState): ParsedModelResponse {\n const fullContent = state.contentBuffer;\n console.log('[DEBUG] Full content length:', fullContent.length);\n console.log('[DEBUG] Full content prefix:', fullContent.slice(0, 100));\n console.log('[DEBUG] Full content suffix:', fullContent.slice(-100));\n defaultLogger.log(\"【New Sys Prompt'】 Model Response:\", fullContent);\n defaultLogger.log('[finalizeStreamProcessing] fullContent', fullContent);\n\n // Add explicit log to confirm XML parsing intent\n console.log('[CLI DEBUG] [ToolCallEngine] Full model response received:', fullContent);\n\n // Try custom action parser first if available\n let parsedGUIResponse = null;\n if (this.customActionParser) {\n parsedGUIResponse = this.customActionParser(fullContent);\n defaultLogger.log('[finalizeStreamProcessing] Using custom action parser');\n console.log('[CLI DEBUG] [ToolCallEngine] Using custom action parser');\n }\n\n // Priority: Custom Regex Parser > Default Parser\n // Check if the content contains the specific XML format with dynamic suffixes\n if (/<seed:tool_call_never_used_/.test(fullContent)) {\n console.log(\n '[CLI DEBUG] [ToolCallEngine] Detected custom XML format. Attempting custom regex parser.',\n );\n try {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n const actions: any[] = [];\n // Regex to match <function_...=name>...</function...>\n // Handles dynamic suffixes like _never_used_...\n const functionRegex = /<function_[^=>]*=([a-zA-Z0-9_]+)>([\\s\\S]*?)<\\/function_[^>]*>/g;\n let match;\n\n while ((match = functionRegex.exec(fullContent)) !== null) {\n const actionName = match[1];\n const innerContent = match[2];\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n const args: any = {};\n\n // Regex to match <parameter_...=key>value</parameter...>\n const paramRegex = /<parameter_[^=>]*=([a-zA-Z0-9_]+)>([\\s\\S]*?)<\\/parameter_[^>]*>/g;\n let paramMatch;\n while ((paramMatch = paramRegex.exec(innerContent)) !== null) {\n args[paramMatch[1]] = paramMatch[2].trim();\n }\n\n actions.push({\n type: actionName,\n inputs: args,\n thought: '',\n });\n }\n\n // Also try to extract thought from <think_...>...</think_...>\n const thoughtRegex = /<think_[^>]*>([\\s\\S]*?)<\\/think_[^>]*>/g;\n const thoughtMatch = thoughtRegex.exec(fullContent);\n const thoughtContent = thoughtMatch ? thoughtMatch[1].trim() : '';\n\n if (actions.length > 0) {\n console.log(\n `[CLI DEBUG] [ToolCallEngine] Custom regex parser found ${actions.length} actions.`,\n );\n parsedGUIResponse = {\n errorMessage: '',\n rawContent: fullContent,\n actions: actions,\n reasoningContent: thoughtContent,\n };\n }\n } catch (e) {\n console.error('[CLI DEBUG] [ToolCallEngine] Custom regex parser error:', e);\n }\n }\n\n // Fall back to default parser if regex parser didn't produce results\n if (!parsedGUIResponse) {\n console.log('[CLI DEBUG] [ToolCallEngine] Using default action parser (XML parser)');\n parsedGUIResponse = defaultParser.parsePrediction(fullContent);\n defaultLogger.log('[finalizeStreamProcessing] Using default action parser');\n }\n\n if (parsedGUIResponse) {\n console.log(\n '[CLI DEBUG] [ToolCallEngine] Parsed response:',\n JSON.stringify(parsedGUIResponse, null, 2),\n );\n } else {\n console.log('[CLI DEBUG] [ToolCallEngine] Parsing failed or returned null');\n }\n\n if (!parsedGUIResponse || parsedGUIResponse.errorMessage) {\n return {\n content: '',\n rawContent: fullContent,\n toolCalls: [\n {\n id: this.generateToolCallId(),\n type: 'function',\n function: {\n name: GUI_ADAPTED_TOOL_NAME,\n arguments: JSON.stringify({\n action: '',\n step: '',\n thought: '',\n operator_action: null,\n errorMessage:\n parsedGUIResponse?.errorMessage ?? 'Failed to parse GUI Action from output',\n }),\n },\n },\n ],\n finishReason: 'tool_calls',\n };\n }\n\n const toolCalls: ChatCompletionMessageToolCall[] = [];\n\n let finished = false;\n let finishMessage: string | null = null;\n for (const action of parsedGUIResponse.actions) {\n if (action.type === 'finished') {\n finished = true;\n finishMessage = action.inputs?.content ?? null;\n continue;\n }\n toolCalls.push({\n id: this.generateToolCallId(),\n type: 'function',\n function: {\n name: GUI_ADAPTED_TOOL_NAME,\n arguments: JSON.stringify({\n action: serializeAction(action),\n step: '',\n thought: parsedGUIResponse.reasoningContent ?? '',\n operator_action: action,\n }),\n },\n });\n }\n\n const content = finishMessage ?? '';\n const reasoningContent = parsedGUIResponse.reasoningContent ?? '';\n const contentForWebUI = content.replace(/\\\\n|\\n/g, '<br>');\n const reasoningContentForWebUI = reasoningContent.replace(/\\\\n|\\n/g, '<br>');\n\n // No tool calls found - return regular response\n return {\n content: contentForWebUI,\n rawContent: fullContent,\n reasoningContent: reasoningContentForWebUI,\n toolCalls,\n finishReason: toolCalls.length > 0 && !finished ? 'tool_calls' : 'stop',\n };\n }\n\n /**\n * Build assistant message for conversation history\n * For PE engines, we preserve the raw content including tool call markup\n *\n * FIXME: move to base tool call engine.\n */\n buildHistoricalAssistantMessage(\n currentLoopAssistantEvent: AgentEventStream.AssistantMessageEvent,\n ): ChatCompletionAssistantMessageParam {\n return {\n role: 'assistant',\n content: currentLoopAssistantEvent.rawContent || currentLoopAssistantEvent.content,\n };\n }\n\n /**\n * Build tool result messages as user messages\n * PE engines format tool results as user input for next iteration\n *\n * FIXME: move to base tool call engine.\n */\n buildHistoricalToolCallResultMessages(\n toolCallResults: MultimodalToolCallResult[],\n ): ChatCompletionMessageParam[] {\n return toolCallResults.map((result) => {\n // Extract text content from multimodal result\n const textContent = result.content\n .filter((part) => part.type === 'text')\n .map((part) => (part as { text: string }).text)\n .join('');\n\n return {\n role: 'user',\n content: `Tool \"${result.toolName}\" result:\\n${textContent}`,\n };\n });\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","defaultParser","DefaultActionParser","defaultLogger","ConsoleLogger","LogLevel","GUIAgentToolCallEngine","ToolCallEngine","instructions","tools","context","_context_messages_find","JSON","m","undefined","chunk","state","_chunk_choices_","_chunk_choices_1","_chunk_choices_2","delta","message","content","Date","Math","fullContent","console","parsedGUIResponse","actions","functionRegex","match","actionName","innerContent","args","paramRegex","paramMatch","thoughtRegex","thoughtMatch","thoughtContent","e","GUI_ADAPTED_TOOL_NAME","toolCalls","finished","finishMessage","action","_action_inputs","serializeAction","reasoningContent","contentForWebUI","reasoningContentForWebUI","currentLoopAssistantEvent","toolCallResults","result","textContent","part","customActionParser"],"mappings":";;;;;;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;ACeA,MAAMI,gBAAgB,IAAIC,8BAAAA,mBAAmBA;AAC7C,MAAMC,gBAAgB,IAAIC,uBAAAA,aAAaA,CAAC,6BAA6BC,uBAAAA,QAAAA,CAAAA,KAAc;AAY5E,MAAMC,+BAA+BC,sCAAAA,cAAcA;IAWxD,cAAcC,YAAoB,EAAEC,KAAa,EAAU;QACzD,OAAOD;IACT;IAOA,eAAeE,OAA4C,EAA8B;YAGtEC;QAFjBR,cAAc,GAAG,CACf,8CACAS,KAAK,SAAS,CAACD,AAAAA,SAAAA,CAAAA,yBAAAA,QAAQ,QAAQ,CAAC,IAAI,CAAC,CAACE,IAAMA,AAAW,aAAXA,EAAE,IAAI,CAAa,IAAhDF,KAAAA,IAAAA,uBAAmD,OAAO,AAAD,KAAK;QAE/E,OAAO;YACL,OAAOD,QAAQ,KAAK;YACpB,UAAUA,QAAQ,QAAQ;YAC1B,aAAaA,QAAQ,WAAW,IAAI;YACpC,QAAQ;YAER,aAAa;YACb,OAAOI;QACT;IACF;IAOA,4BAAmD;QACjD,OAAO;YACL,eAAe;YACf,WAAW,EAAE;YACb,iBAAiB;YACjB,cAAc;QAChB;IACF;IAOA,sBACEC,KAA0B,EAC1BC,KAA4B,EACT;YAELC,iBAEGC,kBASbC;QAXJ,MAAMC,QAAQ,QAAAH,CAAAA,kBAAAA,MAAM,OAAO,CAAC,EAAE,AAAD,IAAfA,KAAAA,IAAAA,gBAAkB,KAAK;QAErC,MAAMI,UAAU,QAACH,CAAAA,mBAAAA,MAAM,OAAO,CAAC,EAAE,AAAD,IAAfA,KAAAA,IAAAA,iBAA0B,OAAO;QAGlD,MAAMI,UAAUF,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,OAAO,AAAD,KAAKC,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,AAAD,KAAK;QACtD,IAAIC,SACFN,MAAM,aAAa,IAAIM;QAIzB,IAAI,QAAAH,CAAAA,mBAAAA,MAAM,OAAO,CAAC,EAAE,AAAD,IAAfA,KAAAA,IAAAA,iBAAkB,aAAa,EACjCH,MAAM,YAAY,GAAGD,MAAM,OAAO,CAAC,EAAE,CAAC,aAAa;QAIrD,OAAO;YACL,SAASO;YACT,kBAAkB;YAClB,mBAAmB;YACnB,WAAW,EAAE;QACf;IACF;IAKQ,qBAA6B;QACnC,OAAO,CAAC,KAAK,EAAEC,KAAK,GAAG,GAAG,CAAC,EAAEC,KAAK,MAAM,GAAG,QAAQ,CAAC,IAAI,SAAS,CAAC,GAAG,KAAK;IAC5E;IAKA,yBAAyBR,KAA4B,EAAuB;QAC1E,MAAMS,cAAcT,MAAM,aAAa;QACvCU,QAAQ,GAAG,CAAC,gCAAgCD,YAAY,MAAM;QAC9DC,QAAQ,GAAG,CAAC,gCAAgCD,YAAY,KAAK,CAAC,GAAG;QACjEC,QAAQ,GAAG,CAAC,gCAAgCD,YAAY,KAAK,CAAC;QAC9DtB,cAAc,GAAG,CAAC,+CAAqCsB;QACvDtB,cAAc,GAAG,CAAC,0CAA0CsB;QAG5DC,QAAQ,GAAG,CAAC,8DAA8DD;QAG1E,IAAIE,oBAAoB;QACxB,IAAI,IAAI,CAAC,kBAAkB,EAAE;YAC3BA,oBAAoB,IAAI,CAAC,kBAAkB,CAACF;YAC5CtB,cAAc,GAAG,CAAC;YAClBuB,QAAQ,GAAG,CAAC;QACd;QAIA,IAAI,8BAA8B,IAAI,CAACD,cAAc;YACnDC,QAAQ,GAAG,CACT;YAEF,IAAI;gBAEF,MAAME,UAAiB,EAAE;gBAGzB,MAAMC,gBAAgB;gBACtB,IAAIC;gBAEJ,MAAQA,AAA6C,SAA7CA,CAAAA,QAAQD,cAAc,IAAI,CAACJ,YAAW,EAAa;oBACzD,MAAMM,aAAaD,KAAK,CAAC,EAAE;oBAC3B,MAAME,eAAeF,KAAK,CAAC,EAAE;oBAE7B,MAAMG,OAAY,CAAC;oBAGnB,MAAMC,aAAa;oBACnB,IAAIC;oBACJ,MAAQA,AAAgD,SAAhDA,CAAAA,aAAaD,WAAW,IAAI,CAACF,aAAY,EAC/CC,IAAI,CAACE,UAAU,CAAC,EAAE,CAAC,GAAGA,UAAU,CAAC,EAAE,CAAC,IAAI;oBAG1CP,QAAQ,IAAI,CAAC;wBACX,MAAMG;wBACN,QAAQE;wBACR,SAAS;oBACX;gBACF;gBAGA,MAAMG,eAAe;gBACrB,MAAMC,eAAeD,aAAa,IAAI,CAACX;gBACvC,MAAMa,iBAAiBD,eAAeA,YAAY,CAAC,EAAE,CAAC,IAAI,KAAK;gBAE/D,IAAIT,QAAQ,MAAM,GAAG,GAAG;oBACtBF,QAAQ,GAAG,CACT,CAAC,uDAAuD,EAAEE,QAAQ,MAAM,CAAC,SAAS,CAAC;oBAErFD,oBAAoB;wBAClB,cAAc;wBACd,YAAYF;wBACZ,SAASG;wBACT,kBAAkBU;oBACpB;gBACF;YACF,EAAE,OAAOC,GAAG;gBACVb,QAAQ,KAAK,CAAC,2DAA2Da;YAC3E;QACF;QAGA,IAAI,CAACZ,mBAAmB;YACtBD,QAAQ,GAAG,CAAC;YACZC,oBAAoB1B,cAAc,eAAe,CAACwB;YAClDtB,cAAc,GAAG,CAAC;QACpB;QAEA,IAAIwB,mBACFD,QAAQ,GAAG,CACT,iDACAd,KAAK,SAAS,CAACe,mBAAmB,MAAM;aAG1CD,QAAQ,GAAG,CAAC;QAGd,IAAI,CAACC,qBAAqBA,kBAAkB,YAAY,EACtD,OAAO;YACL,SAAS;YACT,YAAYF;YACZ,WAAW;gBACT;oBACE,IAAI,IAAI,CAAC,kBAAkB;oBAC3B,MAAM;oBACN,UAAU;wBACR,MAAMe,sCAAAA,qBAAqBA;wBAC3B,WAAW5B,KAAK,SAAS,CAAC;4BACxB,QAAQ;4BACR,MAAM;4BACN,SAAS;4BACT,iBAAiB;4BACjB,cACEe,AAAAA,CAAAA,QAAAA,oBAAAA,KAAAA,IAAAA,kBAAmB,YAAY,AAAD,KAAK;wBACvC;oBACF;gBACF;aACD;YACD,cAAc;QAChB;QAGF,MAAMc,YAA6C,EAAE;QAErD,IAAIC,WAAW;QACf,IAAIC,gBAA+B;QACnC,KAAK,MAAMC,UAAUjB,kBAAkB,OAAO,CAAE;YAC9C,IAAIiB,AAAgB,eAAhBA,OAAO,IAAI,EAAiB;oBAEdC;gBADhBH,WAAW;gBACXC,gBAAgBE,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,MAAM,AAAD,IAAZA,KAAAA,IAAAA,eAAe,OAAO,AAAD,KAAK;gBAC1C;YACF;YACAJ,UAAU,IAAI,CAAC;gBACb,IAAI,IAAI,CAAC,kBAAkB;gBAC3B,MAAM;gBACN,UAAU;oBACR,MAAMD,sCAAAA,qBAAqBA;oBAC3B,WAAW5B,KAAK,SAAS,CAAC;wBACxB,QAAQkC,AAAAA,IAAAA,sBAAAA,eAAAA,AAAAA,EAAgBF;wBACxB,MAAM;wBACN,SAASjB,kBAAkB,gBAAgB,IAAI;wBAC/C,iBAAiBiB;oBACnB;gBACF;YACF;QACF;QAEA,MAAMtB,UAAUqB,iBAAiB;QACjC,MAAMI,mBAAmBpB,kBAAkB,gBAAgB,IAAI;QAC/D,MAAMqB,kBAAkB1B,QAAQ,OAAO,CAAC,WAAW;QACnD,MAAM2B,2BAA2BF,iBAAiB,OAAO,CAAC,WAAW;QAGrE,OAAO;YACL,SAASC;YACT,YAAYvB;YACZ,kBAAkBwB;YAClBR;YACA,cAAcA,UAAU,MAAM,GAAG,KAAK,CAACC,WAAW,eAAe;QACnE;IACF;IAQA,gCACEQ,yBAAiE,EAC5B;QACrC,OAAO;YACL,MAAM;YACN,SAASA,0BAA0B,UAAU,IAAIA,0BAA0B,OAAO;QACpF;IACF;IAQA,sCACEC,eAA2C,EACb;QAC9B,OAAOA,gBAAgB,GAAG,CAAC,CAACC;YAE1B,MAAMC,cAAcD,OAAO,OAAO,CAC/B,MAAM,CAAC,CAACE,OAASA,AAAc,WAAdA,KAAK,IAAI,EAC1B,GAAG,CAAC,CAACA,OAAUA,KAA0B,IAAI,EAC7C,IAAI,CAAC;YAER,OAAO;gBACL,MAAM;gBACN,SAAS,CAAC,MAAM,EAAEF,OAAO,QAAQ,CAAC,WAAW,EAAEC,aAAa;YAC9D;QACF;IACF;IA1RA,YAAYE,kBAAuC,CAAE;QACnD,KAAK,IAHP,uBAAQ,sBAAR;QAIE,IAAI,CAAC,kBAAkB,GAAGA;IAC5B;AAwRF"}
@@ -30,7 +30,9 @@ class GUIAgentToolCallEngine extends ToolCallEngine {
30
30
  model: context.model,
31
31
  messages: context.messages,
32
32
  temperature: context.temperature || 0.7,
33
- stream: true
33
+ stream: true,
34
+ tool_choice: 'none',
35
+ tools: void 0
34
36
  };
35
37
  }
36
38
  initStreamProcessingState() {
@@ -42,12 +44,14 @@ class GUIAgentToolCallEngine extends ToolCallEngine {
42
44
  };
43
45
  }
44
46
  processStreamingChunk(chunk, state) {
45
- var _chunk_choices_, _chunk_choices_1;
47
+ var _chunk_choices_, _chunk_choices_1, _chunk_choices_2;
46
48
  const delta = null == (_chunk_choices_ = chunk.choices[0]) ? void 0 : _chunk_choices_.delta;
47
- if (null == delta ? void 0 : delta.content) state.contentBuffer += delta.content;
48
- if (null == (_chunk_choices_1 = chunk.choices[0]) ? void 0 : _chunk_choices_1.finish_reason) state.finishReason = chunk.choices[0].finish_reason;
49
+ const message = null == (_chunk_choices_1 = chunk.choices[0]) ? void 0 : _chunk_choices_1.message;
50
+ const content = (null == delta ? void 0 : delta.content) || (null == message ? void 0 : message.content) || '';
51
+ if (content) state.contentBuffer += content;
52
+ if (null == (_chunk_choices_2 = chunk.choices[0]) ? void 0 : _chunk_choices_2.finish_reason) state.finishReason = chunk.choices[0].finish_reason;
49
53
  return {
50
- content: (null == delta ? void 0 : delta.content) || '',
54
+ content: content,
51
55
  reasoningContent: '',
52
56
  hasToolCallUpdate: false,
53
57
  toolCalls: []
@@ -58,6 +62,9 @@ class GUIAgentToolCallEngine extends ToolCallEngine {
58
62
  }
59
63
  finalizeStreamProcessing(state) {
60
64
  const fullContent = state.contentBuffer;
65
+ console.log('[DEBUG] Full content length:', fullContent.length);
66
+ console.log('[DEBUG] Full content prefix:', fullContent.slice(0, 100));
67
+ console.log('[DEBUG] Full content suffix:', fullContent.slice(-100));
61
68
  defaultLogger.log("\u3010New Sys Prompt'\u3011 Model Response:", fullContent);
62
69
  defaultLogger.log('[finalizeStreamProcessing] fullContent', fullContent);
63
70
  console.log('[CLI DEBUG] [ToolCallEngine] Full model response received:', fullContent);
@@ -67,6 +74,41 @@ class GUIAgentToolCallEngine extends ToolCallEngine {
67
74
  defaultLogger.log('[finalizeStreamProcessing] Using custom action parser');
68
75
  console.log('[CLI DEBUG] [ToolCallEngine] Using custom action parser');
69
76
  }
77
+ if (/<seed:tool_call_never_used_/.test(fullContent)) {
78
+ console.log('[CLI DEBUG] [ToolCallEngine] Detected custom XML format. Attempting custom regex parser.');
79
+ try {
80
+ const actions = [];
81
+ const functionRegex = /<function_[^=>]*=([a-zA-Z0-9_]+)>([\s\S]*?)<\/function_[^>]*>/g;
82
+ let match;
83
+ while(null !== (match = functionRegex.exec(fullContent))){
84
+ const actionName = match[1];
85
+ const innerContent = match[2];
86
+ const args = {};
87
+ const paramRegex = /<parameter_[^=>]*=([a-zA-Z0-9_]+)>([\s\S]*?)<\/parameter_[^>]*>/g;
88
+ let paramMatch;
89
+ while(null !== (paramMatch = paramRegex.exec(innerContent)))args[paramMatch[1]] = paramMatch[2].trim();
90
+ actions.push({
91
+ type: actionName,
92
+ inputs: args,
93
+ thought: ''
94
+ });
95
+ }
96
+ const thoughtRegex = /<think_[^>]*>([\s\S]*?)<\/think_[^>]*>/g;
97
+ const thoughtMatch = thoughtRegex.exec(fullContent);
98
+ const thoughtContent = thoughtMatch ? thoughtMatch[1].trim() : '';
99
+ if (actions.length > 0) {
100
+ console.log(`[CLI DEBUG] [ToolCallEngine] Custom regex parser found ${actions.length} actions.`);
101
+ parsedGUIResponse = {
102
+ errorMessage: '',
103
+ rawContent: fullContent,
104
+ actions: actions,
105
+ reasoningContent: thoughtContent
106
+ };
107
+ }
108
+ } catch (e) {
109
+ console.error('[CLI DEBUG] [ToolCallEngine] Custom regex parser error:', e);
110
+ }
111
+ }
70
112
  if (!parsedGUIResponse) {
71
113
  console.log('[CLI DEBUG] [ToolCallEngine] Using default action parser (XML parser)');
72
114
  parsedGUIResponse = defaultParser.parsePrediction(fullContent);
@@ -1 +1 @@
1
- {"version":3,"file":"ToolCallEngine.mjs","sources":["webpack://@ui-tars-test/agent-sdk/./src/ToolCallEngine.ts"],"sourcesContent":["import {\n ToolCallEngine,\n Tool,\n ToolCallEnginePrepareRequestContext,\n ChatCompletionCreateParams,\n ChatCompletionAssistantMessageParam,\n ChatCompletionChunk,\n MultimodalToolCallResult,\n AgentEventStream,\n ChatCompletionMessageParam,\n ChatCompletionMessageToolCall,\n ParsedModelResponse,\n StreamProcessingState,\n StreamChunkResult,\n} from '@ui-tars-test/tarko-agent-interface';\nimport { DefaultActionParser } from '@ui-tars-test/action-parser';\nimport { GUI_ADAPTED_TOOL_NAME } from './constants';\nimport { ConsoleLogger, LogLevel } from '@agent-infra/logger';\nimport { serializeAction } from '@ui-tars-test/shared/utils';\nimport { CustomActionParser } from '@ui-tars-test/shared/types';\n\nconst defaultParser = new DefaultActionParser();\nconst defaultLogger = new ConsoleLogger('[GUIAgent:ToolCallEngine]', LogLevel.DEBUG);\n\n/**\n * GUIAgentToolCallEngine - Minimal prompt engineering tool call engine\n *\n * This is the simplest possible implementation of a tool call engine that:\n * 1. Uses prompt engineering to instruct the LLM to output tool calls in a specific format\n * 2. Parses tool calls from LLM response text using simple regex matching\n * 3. Does not support streaming (focuses on core functionality only)\n *\n * Format used: <tool_call>{\"name\": \"tool_name\", \"arguments\": {...}}</tool_call>\n */\nexport class GUIAgentToolCallEngine extends ToolCallEngine {\n private customActionParser?: CustomActionParser;\n\n constructor(customActionParser?: CustomActionParser) {\n super();\n this.customActionParser = customActionParser;\n }\n\n /**\n * Prepare system prompt with tool information and instructions\n */\n preparePrompt(instructions: string, tools: Tool[]): string {\n return instructions;\n }\n\n /**\n * Prepare request parameters for the LLM\n *\n * FIXME: move to base tool call engine.\n */\n prepareRequest(context: ToolCallEnginePrepareRequestContext): ChatCompletionCreateParams {\n defaultLogger.log(\n \"【New Sys Prompt'】 System Prompt:\",\n JSON.stringify(context.messages.find((m) => m.role === 'system')?.content || ''),\n );\n return {\n model: context.model,\n messages: context.messages,\n temperature: context.temperature || 0.7,\n stream: true,\n };\n }\n\n /**\n * Initialize processing state (minimal implementation)\n *\n * FIXME: move to base tool call engine.\n */\n initStreamProcessingState(): StreamProcessingState {\n return {\n contentBuffer: '',\n toolCalls: [],\n reasoningBuffer: '',\n finishReason: null,\n };\n }\n\n /**\n * Process streaming chunks - simply accumulate content\n *\n * FIXME: make it optional\n */\n processStreamingChunk(\n chunk: ChatCompletionChunk,\n state: StreamProcessingState,\n ): StreamChunkResult {\n const delta = chunk.choices[0]?.delta;\n\n // Accumulate content\n if (delta?.content) {\n state.contentBuffer += delta.content;\n }\n\n // Record finish reason\n if (chunk.choices[0]?.finish_reason) {\n state.finishReason = chunk.choices[0].finish_reason;\n }\n\n // Return incremental content without tool call detection during streaming\n return {\n content: delta?.content || '',\n reasoningContent: '',\n hasToolCallUpdate: false,\n toolCalls: [],\n };\n }\n\n /**\n * Generate a tool call ID\n */\n private generateToolCallId(): string {\n return `call_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`;\n }\n\n /**\n * Extract tool calls from complete response text\n */\n finalizeStreamProcessing(state: StreamProcessingState): ParsedModelResponse {\n const fullContent = state.contentBuffer;\n defaultLogger.log(\"【New Sys Prompt'】 Model Response:\", fullContent);\n defaultLogger.log('[finalizeStreamProcessing] fullContent', fullContent);\n\n // Add explicit log to confirm XML parsing intent\n console.log('[CLI DEBUG] [ToolCallEngine] Full model response received:', fullContent);\n\n // Try custom action parser first if available\n let parsedGUIResponse = null;\n if (this.customActionParser) {\n parsedGUIResponse = this.customActionParser(fullContent);\n defaultLogger.log('[finalizeStreamProcessing] Using custom action parser');\n console.log('[CLI DEBUG] [ToolCallEngine] Using custom action parser');\n }\n\n // Fall back to default parser if custom parser is not available or returns null\n if (!parsedGUIResponse) {\n console.log('[CLI DEBUG] [ToolCallEngine] Using default action parser (XML parser)');\n parsedGUIResponse = defaultParser.parsePrediction(fullContent);\n defaultLogger.log('[finalizeStreamProcessing] Using default action parser');\n }\n\n if (parsedGUIResponse) {\n console.log(\n '[CLI DEBUG] [ToolCallEngine] Parsed response:',\n JSON.stringify(parsedGUIResponse, null, 2),\n );\n } else {\n console.log('[CLI DEBUG] [ToolCallEngine] Parsing failed or returned null');\n }\n\n if (!parsedGUIResponse || parsedGUIResponse.errorMessage) {\n return {\n content: '',\n rawContent: fullContent,\n toolCalls: [\n {\n id: this.generateToolCallId(),\n type: 'function',\n function: {\n name: GUI_ADAPTED_TOOL_NAME,\n arguments: JSON.stringify({\n action: '',\n step: '',\n thought: '',\n operator_action: null,\n errorMessage:\n parsedGUIResponse?.errorMessage ?? 'Failed to parse GUI Action from output',\n }),\n },\n },\n ],\n finishReason: 'tool_calls',\n };\n }\n\n const toolCalls: ChatCompletionMessageToolCall[] = [];\n\n let finished = false;\n let finishMessage: string | null = null;\n for (const action of parsedGUIResponse.actions) {\n if (action.type === 'finished') {\n finished = true;\n finishMessage = action.inputs?.content ?? null;\n continue;\n }\n toolCalls.push({\n id: this.generateToolCallId(),\n type: 'function',\n function: {\n name: GUI_ADAPTED_TOOL_NAME,\n arguments: JSON.stringify({\n action: serializeAction(action),\n step: '',\n thought: parsedGUIResponse.reasoningContent ?? '',\n operator_action: action,\n }),\n },\n });\n }\n\n const content = finishMessage ?? '';\n const reasoningContent = parsedGUIResponse.reasoningContent ?? '';\n const contentForWebUI = content.replace(/\\\\n|\\n/g, '<br>');\n const reasoningContentForWebUI = reasoningContent.replace(/\\\\n|\\n/g, '<br>');\n\n // No tool calls found - return regular response\n return {\n content: contentForWebUI,\n rawContent: fullContent,\n reasoningContent: reasoningContentForWebUI,\n toolCalls,\n finishReason: toolCalls.length > 0 && !finished ? 'tool_calls' : 'stop',\n };\n }\n\n /**\n * Build assistant message for conversation history\n * For PE engines, we preserve the raw content including tool call markup\n *\n * FIXME: move to base tool call engine.\n */\n buildHistoricalAssistantMessage(\n currentLoopAssistantEvent: AgentEventStream.AssistantMessageEvent,\n ): ChatCompletionAssistantMessageParam {\n return {\n role: 'assistant',\n content: currentLoopAssistantEvent.rawContent || currentLoopAssistantEvent.content,\n };\n }\n\n /**\n * Build tool result messages as user messages\n * PE engines format tool results as user input for next iteration\n *\n * FIXME: move to base tool call engine.\n */\n buildHistoricalToolCallResultMessages(\n toolCallResults: MultimodalToolCallResult[],\n ): ChatCompletionMessageParam[] {\n return toolCallResults.map((result) => {\n // Extract text content from multimodal result\n const textContent = result.content\n .filter((part) => part.type === 'text')\n .map((part) => (part as { text: string }).text)\n .join('');\n\n return {\n role: 'user',\n content: `Tool \"${result.toolName}\" result:\\n${textContent}`,\n };\n });\n }\n}\n"],"names":["defaultParser","DefaultActionParser","defaultLogger","ConsoleLogger","LogLevel","GUIAgentToolCallEngine","ToolCallEngine","instructions","tools","context","_context_messages_find","JSON","m","chunk","state","_chunk_choices_","_chunk_choices_1","delta","Date","Math","fullContent","console","parsedGUIResponse","GUI_ADAPTED_TOOL_NAME","toolCalls","finished","finishMessage","action","_action_inputs","serializeAction","content","reasoningContent","contentForWebUI","reasoningContentForWebUI","currentLoopAssistantEvent","toolCallResults","result","textContent","part","customActionParser"],"mappings":";;;;;;;;;;;;;;;;;;;AAqBA,MAAMA,gBAAgB,IAAIC;AAC1B,MAAMC,gBAAgB,IAAIC,cAAc,6BAA6BC,SAAS,KAAK;AAY5E,MAAMC,+BAA+BC;IAW1C,cAAcC,YAAoB,EAAEC,KAAa,EAAU;QACzD,OAAOD;IACT;IAOA,eAAeE,OAA4C,EAA8B;YAGtEC;QAFjBR,cAAc,GAAG,CACf,8CACAS,KAAK,SAAS,CAACD,AAAAA,SAAAA,CAAAA,yBAAAA,QAAQ,QAAQ,CAAC,IAAI,CAAC,CAACE,IAAMA,AAAW,aAAXA,EAAE,IAAI,CAAa,IAAhDF,KAAAA,IAAAA,uBAAmD,OAAO,AAAD,KAAK;QAE/E,OAAO;YACL,OAAOD,QAAQ,KAAK;YACpB,UAAUA,QAAQ,QAAQ;YAC1B,aAAaA,QAAQ,WAAW,IAAI;YACpC,QAAQ;QACV;IACF;IAOA,4BAAmD;QACjD,OAAO;YACL,eAAe;YACf,WAAW,EAAE;YACb,iBAAiB;YACjB,cAAc;QAChB;IACF;IAOA,sBACEI,KAA0B,EAC1BC,KAA4B,EACT;YACLC,iBAQVC;QARJ,MAAMC,QAAQ,QAAAF,CAAAA,kBAAAA,MAAM,OAAO,CAAC,EAAE,AAAD,IAAfA,KAAAA,IAAAA,gBAAkB,KAAK;QAGrC,IAAIE,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,OAAO,EAChBH,MAAM,aAAa,IAAIG,MAAM,OAAO;QAItC,IAAI,QAAAD,CAAAA,mBAAAA,MAAM,OAAO,CAAC,EAAE,AAAD,IAAfA,KAAAA,IAAAA,iBAAkB,aAAa,EACjCF,MAAM,YAAY,GAAGD,MAAM,OAAO,CAAC,EAAE,CAAC,aAAa;QAIrD,OAAO;YACL,SAASI,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,OAAO,AAAD,KAAK;YAC3B,kBAAkB;YAClB,mBAAmB;YACnB,WAAW,EAAE;QACf;IACF;IAKQ,qBAA6B;QACnC,OAAO,CAAC,KAAK,EAAEC,KAAK,GAAG,GAAG,CAAC,EAAEC,KAAK,MAAM,GAAG,QAAQ,CAAC,IAAI,SAAS,CAAC,GAAG,KAAK;IAC5E;IAKA,yBAAyBL,KAA4B,EAAuB;QAC1E,MAAMM,cAAcN,MAAM,aAAa;QACvCZ,cAAc,GAAG,CAAC,+CAAqCkB;QACvDlB,cAAc,GAAG,CAAC,0CAA0CkB;QAG5DC,QAAQ,GAAG,CAAC,8DAA8DD;QAG1E,IAAIE,oBAAoB;QACxB,IAAI,IAAI,CAAC,kBAAkB,EAAE;YAC3BA,oBAAoB,IAAI,CAAC,kBAAkB,CAACF;YAC5ClB,cAAc,GAAG,CAAC;YAClBmB,QAAQ,GAAG,CAAC;QACd;QAGA,IAAI,CAACC,mBAAmB;YACtBD,QAAQ,GAAG,CAAC;YACZC,oBAAoBtB,cAAc,eAAe,CAACoB;YAClDlB,cAAc,GAAG,CAAC;QACpB;QAEA,IAAIoB,mBACFD,QAAQ,GAAG,CACT,iDACAV,KAAK,SAAS,CAACW,mBAAmB,MAAM;aAG1CD,QAAQ,GAAG,CAAC;QAGd,IAAI,CAACC,qBAAqBA,kBAAkB,YAAY,EACtD,OAAO;YACL,SAAS;YACT,YAAYF;YACZ,WAAW;gBACT;oBACE,IAAI,IAAI,CAAC,kBAAkB;oBAC3B,MAAM;oBACN,UAAU;wBACR,MAAMG;wBACN,WAAWZ,KAAK,SAAS,CAAC;4BACxB,QAAQ;4BACR,MAAM;4BACN,SAAS;4BACT,iBAAiB;4BACjB,cACEW,AAAAA,CAAAA,QAAAA,oBAAAA,KAAAA,IAAAA,kBAAmB,YAAY,AAAD,KAAK;wBACvC;oBACF;gBACF;aACD;YACD,cAAc;QAChB;QAGF,MAAME,YAA6C,EAAE;QAErD,IAAIC,WAAW;QACf,IAAIC,gBAA+B;QACnC,KAAK,MAAMC,UAAUL,kBAAkB,OAAO,CAAE;YAC9C,IAAIK,AAAgB,eAAhBA,OAAO,IAAI,EAAiB;oBAEdC;gBADhBH,WAAW;gBACXC,gBAAgBE,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,MAAM,AAAD,IAAZA,KAAAA,IAAAA,eAAe,OAAO,AAAD,KAAK;gBAC1C;YACF;YACAJ,UAAU,IAAI,CAAC;gBACb,IAAI,IAAI,CAAC,kBAAkB;gBAC3B,MAAM;gBACN,UAAU;oBACR,MAAMD;oBACN,WAAWZ,KAAK,SAAS,CAAC;wBACxB,QAAQkB,gBAAgBF;wBACxB,MAAM;wBACN,SAASL,kBAAkB,gBAAgB,IAAI;wBAC/C,iBAAiBK;oBACnB;gBACF;YACF;QACF;QAEA,MAAMG,UAAUJ,iBAAiB;QACjC,MAAMK,mBAAmBT,kBAAkB,gBAAgB,IAAI;QAC/D,MAAMU,kBAAkBF,QAAQ,OAAO,CAAC,WAAW;QACnD,MAAMG,2BAA2BF,iBAAiB,OAAO,CAAC,WAAW;QAGrE,OAAO;YACL,SAASC;YACT,YAAYZ;YACZ,kBAAkBa;YAClBT;YACA,cAAcA,UAAU,MAAM,GAAG,KAAK,CAACC,WAAW,eAAe;QACnE;IACF;IAQA,gCACES,yBAAiE,EAC5B;QACrC,OAAO;YACL,MAAM;YACN,SAASA,0BAA0B,UAAU,IAAIA,0BAA0B,OAAO;QACpF;IACF;IAQA,sCACEC,eAA2C,EACb;QAC9B,OAAOA,gBAAgB,GAAG,CAAC,CAACC;YAE1B,MAAMC,cAAcD,OAAO,OAAO,CAC/B,MAAM,CAAC,CAACE,OAASA,AAAc,WAAdA,KAAK,IAAI,EAC1B,GAAG,CAAC,CAACA,OAAUA,KAA0B,IAAI,EAC7C,IAAI,CAAC;YAER,OAAO;gBACL,MAAM;gBACN,SAAS,CAAC,MAAM,EAAEF,OAAO,QAAQ,CAAC,WAAW,EAAEC,aAAa;YAC9D;QACF;IACF;IAzNA,YAAYE,kBAAuC,CAAE;QACnD,KAAK,IAHP,uBAAQ,sBAAR;QAIE,IAAI,CAAC,kBAAkB,GAAGA;IAC5B;AAuNF"}
1
+ {"version":3,"file":"ToolCallEngine.mjs","sources":["webpack://@ui-tars-test/agent-sdk/./src/ToolCallEngine.ts"],"sourcesContent":["import {\n ToolCallEngine,\n Tool,\n ToolCallEnginePrepareRequestContext,\n ChatCompletionCreateParams,\n ChatCompletionAssistantMessageParam,\n ChatCompletionChunk,\n MultimodalToolCallResult,\n AgentEventStream,\n ChatCompletionMessageParam,\n ChatCompletionMessageToolCall,\n ParsedModelResponse,\n StreamProcessingState,\n StreamChunkResult,\n} from '@ui-tars-test/tarko-agent-interface';\nimport { DefaultActionParser } from '@ui-tars-test/action-parser';\nimport { GUI_ADAPTED_TOOL_NAME } from './constants';\nimport { ConsoleLogger, LogLevel } from '@agent-infra/logger';\nimport { serializeAction } from '@ui-tars-test/shared/utils';\nimport { CustomActionParser } from '@ui-tars-test/shared/types';\n\nconst defaultParser = new DefaultActionParser();\nconst defaultLogger = new ConsoleLogger('[GUIAgent:ToolCallEngine]', LogLevel.DEBUG);\n\n/**\n * GUIAgentToolCallEngine - Minimal prompt engineering tool call engine\n *\n * This is the simplest possible implementation of a tool call engine that:\n * 1. Uses prompt engineering to instruct the LLM to output tool calls in a specific format\n * 2. Parses tool calls from LLM response text using simple regex matching\n * 3. Does not support streaming (focuses on core functionality only)\n *\n * Format used: <tool_call>{\"name\": \"tool_name\", \"arguments\": {...}}</tool_call>\n */\nexport class GUIAgentToolCallEngine extends ToolCallEngine {\n private customActionParser?: CustomActionParser;\n\n constructor(customActionParser?: CustomActionParser) {\n super();\n this.customActionParser = customActionParser;\n }\n\n /**\n * Prepare system prompt with tool information and instructions\n */\n preparePrompt(instructions: string, tools: Tool[]): string {\n return instructions;\n }\n\n /**\n * Prepare request parameters for the LLM\n *\n * FIXME: move to base tool call engine.\n */\n prepareRequest(context: ToolCallEnginePrepareRequestContext): ChatCompletionCreateParams {\n defaultLogger.log(\n \"【New Sys Prompt'】 System Prompt:\",\n JSON.stringify(context.messages.find((m) => m.role === 'system')?.content || ''),\n );\n return {\n model: context.model,\n messages: context.messages,\n temperature: context.temperature || 0.7,\n stream: true,\n // 强制禁用 Native Function Calling,确保模型将 Action 输出到 content 中(以 XML 格式)\n tool_choice: 'none',\n tools: undefined, // 显式移除 tools 参数,防止某些 provider 在 tool_choice: none 时依然启用 function calling\n };\n }\n\n /**\n * Initialize processing state (minimal implementation)\n *\n * FIXME: move to base tool call engine.\n */\n initStreamProcessingState(): StreamProcessingState {\n return {\n contentBuffer: '',\n toolCalls: [],\n reasoningBuffer: '',\n finishReason: null,\n };\n }\n\n /**\n * Process streaming chunks - simply accumulate content\n *\n * FIXME: make it optional\n */\n processStreamingChunk(\n chunk: ChatCompletionChunk,\n state: StreamProcessingState,\n ): StreamChunkResult {\n // For non-streaming requests, the entire response comes in one chunk\n const delta = chunk.choices[0]?.delta;\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n const message = (chunk.choices[0] as any)?.message;\n\n // Accumulate content from delta (streaming) or message (non-streaming)\n const content = delta?.content || message?.content || '';\n if (content) {\n state.contentBuffer += content;\n }\n\n // Record finish reason\n if (chunk.choices[0]?.finish_reason) {\n state.finishReason = chunk.choices[0].finish_reason;\n }\n\n // Return incremental content without tool call detection during streaming\n return {\n content: content,\n reasoningContent: '',\n hasToolCallUpdate: false,\n toolCalls: [],\n };\n }\n\n /**\n * Generate a tool call ID\n */\n private generateToolCallId(): string {\n return `call_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`;\n }\n\n /**\n * Extract tool calls from complete response text\n */\n finalizeStreamProcessing(state: StreamProcessingState): ParsedModelResponse {\n const fullContent = state.contentBuffer;\n console.log('[DEBUG] Full content length:', fullContent.length);\n console.log('[DEBUG] Full content prefix:', fullContent.slice(0, 100));\n console.log('[DEBUG] Full content suffix:', fullContent.slice(-100));\n defaultLogger.log(\"【New Sys Prompt'】 Model Response:\", fullContent);\n defaultLogger.log('[finalizeStreamProcessing] fullContent', fullContent);\n\n // Add explicit log to confirm XML parsing intent\n console.log('[CLI DEBUG] [ToolCallEngine] Full model response received:', fullContent);\n\n // Try custom action parser first if available\n let parsedGUIResponse = null;\n if (this.customActionParser) {\n parsedGUIResponse = this.customActionParser(fullContent);\n defaultLogger.log('[finalizeStreamProcessing] Using custom action parser');\n console.log('[CLI DEBUG] [ToolCallEngine] Using custom action parser');\n }\n\n // Priority: Custom Regex Parser > Default Parser\n // Check if the content contains the specific XML format with dynamic suffixes\n if (/<seed:tool_call_never_used_/.test(fullContent)) {\n console.log(\n '[CLI DEBUG] [ToolCallEngine] Detected custom XML format. Attempting custom regex parser.',\n );\n try {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n const actions: any[] = [];\n // Regex to match <function_...=name>...</function...>\n // Handles dynamic suffixes like _never_used_...\n const functionRegex = /<function_[^=>]*=([a-zA-Z0-9_]+)>([\\s\\S]*?)<\\/function_[^>]*>/g;\n let match;\n\n while ((match = functionRegex.exec(fullContent)) !== null) {\n const actionName = match[1];\n const innerContent = match[2];\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n const args: any = {};\n\n // Regex to match <parameter_...=key>value</parameter...>\n const paramRegex = /<parameter_[^=>]*=([a-zA-Z0-9_]+)>([\\s\\S]*?)<\\/parameter_[^>]*>/g;\n let paramMatch;\n while ((paramMatch = paramRegex.exec(innerContent)) !== null) {\n args[paramMatch[1]] = paramMatch[2].trim();\n }\n\n actions.push({\n type: actionName,\n inputs: args,\n thought: '',\n });\n }\n\n // Also try to extract thought from <think_...>...</think_...>\n const thoughtRegex = /<think_[^>]*>([\\s\\S]*?)<\\/think_[^>]*>/g;\n const thoughtMatch = thoughtRegex.exec(fullContent);\n const thoughtContent = thoughtMatch ? thoughtMatch[1].trim() : '';\n\n if (actions.length > 0) {\n console.log(\n `[CLI DEBUG] [ToolCallEngine] Custom regex parser found ${actions.length} actions.`,\n );\n parsedGUIResponse = {\n errorMessage: '',\n rawContent: fullContent,\n actions: actions,\n reasoningContent: thoughtContent,\n };\n }\n } catch (e) {\n console.error('[CLI DEBUG] [ToolCallEngine] Custom regex parser error:', e);\n }\n }\n\n // Fall back to default parser if regex parser didn't produce results\n if (!parsedGUIResponse) {\n console.log('[CLI DEBUG] [ToolCallEngine] Using default action parser (XML parser)');\n parsedGUIResponse = defaultParser.parsePrediction(fullContent);\n defaultLogger.log('[finalizeStreamProcessing] Using default action parser');\n }\n\n if (parsedGUIResponse) {\n console.log(\n '[CLI DEBUG] [ToolCallEngine] Parsed response:',\n JSON.stringify(parsedGUIResponse, null, 2),\n );\n } else {\n console.log('[CLI DEBUG] [ToolCallEngine] Parsing failed or returned null');\n }\n\n if (!parsedGUIResponse || parsedGUIResponse.errorMessage) {\n return {\n content: '',\n rawContent: fullContent,\n toolCalls: [\n {\n id: this.generateToolCallId(),\n type: 'function',\n function: {\n name: GUI_ADAPTED_TOOL_NAME,\n arguments: JSON.stringify({\n action: '',\n step: '',\n thought: '',\n operator_action: null,\n errorMessage:\n parsedGUIResponse?.errorMessage ?? 'Failed to parse GUI Action from output',\n }),\n },\n },\n ],\n finishReason: 'tool_calls',\n };\n }\n\n const toolCalls: ChatCompletionMessageToolCall[] = [];\n\n let finished = false;\n let finishMessage: string | null = null;\n for (const action of parsedGUIResponse.actions) {\n if (action.type === 'finished') {\n finished = true;\n finishMessage = action.inputs?.content ?? null;\n continue;\n }\n toolCalls.push({\n id: this.generateToolCallId(),\n type: 'function',\n function: {\n name: GUI_ADAPTED_TOOL_NAME,\n arguments: JSON.stringify({\n action: serializeAction(action),\n step: '',\n thought: parsedGUIResponse.reasoningContent ?? '',\n operator_action: action,\n }),\n },\n });\n }\n\n const content = finishMessage ?? '';\n const reasoningContent = parsedGUIResponse.reasoningContent ?? '';\n const contentForWebUI = content.replace(/\\\\n|\\n/g, '<br>');\n const reasoningContentForWebUI = reasoningContent.replace(/\\\\n|\\n/g, '<br>');\n\n // No tool calls found - return regular response\n return {\n content: contentForWebUI,\n rawContent: fullContent,\n reasoningContent: reasoningContentForWebUI,\n toolCalls,\n finishReason: toolCalls.length > 0 && !finished ? 'tool_calls' : 'stop',\n };\n }\n\n /**\n * Build assistant message for conversation history\n * For PE engines, we preserve the raw content including tool call markup\n *\n * FIXME: move to base tool call engine.\n */\n buildHistoricalAssistantMessage(\n currentLoopAssistantEvent: AgentEventStream.AssistantMessageEvent,\n ): ChatCompletionAssistantMessageParam {\n return {\n role: 'assistant',\n content: currentLoopAssistantEvent.rawContent || currentLoopAssistantEvent.content,\n };\n }\n\n /**\n * Build tool result messages as user messages\n * PE engines format tool results as user input for next iteration\n *\n * FIXME: move to base tool call engine.\n */\n buildHistoricalToolCallResultMessages(\n toolCallResults: MultimodalToolCallResult[],\n ): ChatCompletionMessageParam[] {\n return toolCallResults.map((result) => {\n // Extract text content from multimodal result\n const textContent = result.content\n .filter((part) => part.type === 'text')\n .map((part) => (part as { text: string }).text)\n .join('');\n\n return {\n role: 'user',\n content: `Tool \"${result.toolName}\" result:\\n${textContent}`,\n };\n });\n }\n}\n"],"names":["defaultParser","DefaultActionParser","defaultLogger","ConsoleLogger","LogLevel","GUIAgentToolCallEngine","ToolCallEngine","instructions","tools","context","_context_messages_find","JSON","m","undefined","chunk","state","_chunk_choices_","_chunk_choices_1","_chunk_choices_2","delta","message","content","Date","Math","fullContent","console","parsedGUIResponse","actions","functionRegex","match","actionName","innerContent","args","paramRegex","paramMatch","thoughtRegex","thoughtMatch","thoughtContent","e","GUI_ADAPTED_TOOL_NAME","toolCalls","finished","finishMessage","action","_action_inputs","serializeAction","reasoningContent","contentForWebUI","reasoningContentForWebUI","currentLoopAssistantEvent","toolCallResults","result","textContent","part","customActionParser"],"mappings":";;;;;;;;;;;;;;;;;;;AAqBA,MAAMA,gBAAgB,IAAIC;AAC1B,MAAMC,gBAAgB,IAAIC,cAAc,6BAA6BC,SAAS,KAAK;AAY5E,MAAMC,+BAA+BC;IAW1C,cAAcC,YAAoB,EAAEC,KAAa,EAAU;QACzD,OAAOD;IACT;IAOA,eAAeE,OAA4C,EAA8B;YAGtEC;QAFjBR,cAAc,GAAG,CACf,8CACAS,KAAK,SAAS,CAACD,AAAAA,SAAAA,CAAAA,yBAAAA,QAAQ,QAAQ,CAAC,IAAI,CAAC,CAACE,IAAMA,AAAW,aAAXA,EAAE,IAAI,CAAa,IAAhDF,KAAAA,IAAAA,uBAAmD,OAAO,AAAD,KAAK;QAE/E,OAAO;YACL,OAAOD,QAAQ,KAAK;YACpB,UAAUA,QAAQ,QAAQ;YAC1B,aAAaA,QAAQ,WAAW,IAAI;YACpC,QAAQ;YAER,aAAa;YACb,OAAOI;QACT;IACF;IAOA,4BAAmD;QACjD,OAAO;YACL,eAAe;YACf,WAAW,EAAE;YACb,iBAAiB;YACjB,cAAc;QAChB;IACF;IAOA,sBACEC,KAA0B,EAC1BC,KAA4B,EACT;YAELC,iBAEGC,kBASbC;QAXJ,MAAMC,QAAQ,QAAAH,CAAAA,kBAAAA,MAAM,OAAO,CAAC,EAAE,AAAD,IAAfA,KAAAA,IAAAA,gBAAkB,KAAK;QAErC,MAAMI,UAAU,QAACH,CAAAA,mBAAAA,MAAM,OAAO,CAAC,EAAE,AAAD,IAAfA,KAAAA,IAAAA,iBAA0B,OAAO;QAGlD,MAAMI,UAAUF,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,OAAO,AAAD,KAAKC,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,AAAD,KAAK;QACtD,IAAIC,SACFN,MAAM,aAAa,IAAIM;QAIzB,IAAI,QAAAH,CAAAA,mBAAAA,MAAM,OAAO,CAAC,EAAE,AAAD,IAAfA,KAAAA,IAAAA,iBAAkB,aAAa,EACjCH,MAAM,YAAY,GAAGD,MAAM,OAAO,CAAC,EAAE,CAAC,aAAa;QAIrD,OAAO;YACL,SAASO;YACT,kBAAkB;YAClB,mBAAmB;YACnB,WAAW,EAAE;QACf;IACF;IAKQ,qBAA6B;QACnC,OAAO,CAAC,KAAK,EAAEC,KAAK,GAAG,GAAG,CAAC,EAAEC,KAAK,MAAM,GAAG,QAAQ,CAAC,IAAI,SAAS,CAAC,GAAG,KAAK;IAC5E;IAKA,yBAAyBR,KAA4B,EAAuB;QAC1E,MAAMS,cAAcT,MAAM,aAAa;QACvCU,QAAQ,GAAG,CAAC,gCAAgCD,YAAY,MAAM;QAC9DC,QAAQ,GAAG,CAAC,gCAAgCD,YAAY,KAAK,CAAC,GAAG;QACjEC,QAAQ,GAAG,CAAC,gCAAgCD,YAAY,KAAK,CAAC;QAC9DtB,cAAc,GAAG,CAAC,+CAAqCsB;QACvDtB,cAAc,GAAG,CAAC,0CAA0CsB;QAG5DC,QAAQ,GAAG,CAAC,8DAA8DD;QAG1E,IAAIE,oBAAoB;QACxB,IAAI,IAAI,CAAC,kBAAkB,EAAE;YAC3BA,oBAAoB,IAAI,CAAC,kBAAkB,CAACF;YAC5CtB,cAAc,GAAG,CAAC;YAClBuB,QAAQ,GAAG,CAAC;QACd;QAIA,IAAI,8BAA8B,IAAI,CAACD,cAAc;YACnDC,QAAQ,GAAG,CACT;YAEF,IAAI;gBAEF,MAAME,UAAiB,EAAE;gBAGzB,MAAMC,gBAAgB;gBACtB,IAAIC;gBAEJ,MAAQA,AAA6C,SAA7CA,CAAAA,QAAQD,cAAc,IAAI,CAACJ,YAAW,EAAa;oBACzD,MAAMM,aAAaD,KAAK,CAAC,EAAE;oBAC3B,MAAME,eAAeF,KAAK,CAAC,EAAE;oBAE7B,MAAMG,OAAY,CAAC;oBAGnB,MAAMC,aAAa;oBACnB,IAAIC;oBACJ,MAAQA,AAAgD,SAAhDA,CAAAA,aAAaD,WAAW,IAAI,CAACF,aAAY,EAC/CC,IAAI,CAACE,UAAU,CAAC,EAAE,CAAC,GAAGA,UAAU,CAAC,EAAE,CAAC,IAAI;oBAG1CP,QAAQ,IAAI,CAAC;wBACX,MAAMG;wBACN,QAAQE;wBACR,SAAS;oBACX;gBACF;gBAGA,MAAMG,eAAe;gBACrB,MAAMC,eAAeD,aAAa,IAAI,CAACX;gBACvC,MAAMa,iBAAiBD,eAAeA,YAAY,CAAC,EAAE,CAAC,IAAI,KAAK;gBAE/D,IAAIT,QAAQ,MAAM,GAAG,GAAG;oBACtBF,QAAQ,GAAG,CACT,CAAC,uDAAuD,EAAEE,QAAQ,MAAM,CAAC,SAAS,CAAC;oBAErFD,oBAAoB;wBAClB,cAAc;wBACd,YAAYF;wBACZ,SAASG;wBACT,kBAAkBU;oBACpB;gBACF;YACF,EAAE,OAAOC,GAAG;gBACVb,QAAQ,KAAK,CAAC,2DAA2Da;YAC3E;QACF;QAGA,IAAI,CAACZ,mBAAmB;YACtBD,QAAQ,GAAG,CAAC;YACZC,oBAAoB1B,cAAc,eAAe,CAACwB;YAClDtB,cAAc,GAAG,CAAC;QACpB;QAEA,IAAIwB,mBACFD,QAAQ,GAAG,CACT,iDACAd,KAAK,SAAS,CAACe,mBAAmB,MAAM;aAG1CD,QAAQ,GAAG,CAAC;QAGd,IAAI,CAACC,qBAAqBA,kBAAkB,YAAY,EACtD,OAAO;YACL,SAAS;YACT,YAAYF;YACZ,WAAW;gBACT;oBACE,IAAI,IAAI,CAAC,kBAAkB;oBAC3B,MAAM;oBACN,UAAU;wBACR,MAAMe;wBACN,WAAW5B,KAAK,SAAS,CAAC;4BACxB,QAAQ;4BACR,MAAM;4BACN,SAAS;4BACT,iBAAiB;4BACjB,cACEe,AAAAA,CAAAA,QAAAA,oBAAAA,KAAAA,IAAAA,kBAAmB,YAAY,AAAD,KAAK;wBACvC;oBACF;gBACF;aACD;YACD,cAAc;QAChB;QAGF,MAAMc,YAA6C,EAAE;QAErD,IAAIC,WAAW;QACf,IAAIC,gBAA+B;QACnC,KAAK,MAAMC,UAAUjB,kBAAkB,OAAO,CAAE;YAC9C,IAAIiB,AAAgB,eAAhBA,OAAO,IAAI,EAAiB;oBAEdC;gBADhBH,WAAW;gBACXC,gBAAgBE,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,MAAM,AAAD,IAAZA,KAAAA,IAAAA,eAAe,OAAO,AAAD,KAAK;gBAC1C;YACF;YACAJ,UAAU,IAAI,CAAC;gBACb,IAAI,IAAI,CAAC,kBAAkB;gBAC3B,MAAM;gBACN,UAAU;oBACR,MAAMD;oBACN,WAAW5B,KAAK,SAAS,CAAC;wBACxB,QAAQkC,gBAAgBF;wBACxB,MAAM;wBACN,SAASjB,kBAAkB,gBAAgB,IAAI;wBAC/C,iBAAiBiB;oBACnB;gBACF;YACF;QACF;QAEA,MAAMtB,UAAUqB,iBAAiB;QACjC,MAAMI,mBAAmBpB,kBAAkB,gBAAgB,IAAI;QAC/D,MAAMqB,kBAAkB1B,QAAQ,OAAO,CAAC,WAAW;QACnD,MAAM2B,2BAA2BF,iBAAiB,OAAO,CAAC,WAAW;QAGrE,OAAO;YACL,SAASC;YACT,YAAYvB;YACZ,kBAAkBwB;YAClBR;YACA,cAAcA,UAAU,MAAM,GAAG,KAAK,CAACC,WAAW,eAAe;QACnE;IACF;IAQA,gCACEQ,yBAAiE,EAC5B;QACrC,OAAO;YACL,MAAM;YACN,SAASA,0BAA0B,UAAU,IAAIA,0BAA0B,OAAO;QACpF;IACF;IAQA,sCACEC,eAA2C,EACb;QAC9B,OAAOA,gBAAgB,GAAG,CAAC,CAACC;YAE1B,MAAMC,cAAcD,OAAO,OAAO,CAC/B,MAAM,CAAC,CAACE,OAASA,AAAc,WAAdA,KAAK,IAAI,EAC1B,GAAG,CAAC,CAACA,OAAUA,KAA0B,IAAI,EAC7C,IAAI,CAAC;YAER,OAAO;gBACL,MAAM;gBACN,SAAS,CAAC,MAAM,EAAEF,OAAO,QAAQ,CAAC,WAAW,EAAEC,aAAa;YAC9D;QACF;IACF;IA1RA,YAAYE,kBAAuC,CAAE;QACnD,KAAK,IAHP,uBAAQ,sBAAR;QAIE,IAAI,CAAC,kBAAkB,GAAGA;IAC5B;AAwRF"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ui-tars-test/agent-sdk",
3
- "version": "0.3.12",
3
+ "version": "0.3.13",
4
4
  "description": "GUI Agent",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",