@ui-tars-test/agent-sdk 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +13 -0
  2. package/dist/GUIAgent.d.ts +24 -0
  3. package/dist/GUIAgent.d.ts.map +1 -0
  4. package/dist/GUIAgent.js +207 -0
  5. package/dist/GUIAgent.js.map +1 -0
  6. package/dist/GUIAgent.mjs +173 -0
  7. package/dist/GUIAgent.mjs.map +1 -0
  8. package/dist/ToolCallEngine.d.ts +61 -0
  9. package/dist/ToolCallEngine.d.ts.map +1 -0
  10. package/dist/ToolCallEngine.js +190 -0
  11. package/dist/ToolCallEngine.js.map +1 -0
  12. package/dist/ToolCallEngine.mjs +156 -0
  13. package/dist/ToolCallEngine.mjs.map +1 -0
  14. package/dist/constants.d.ts +6 -0
  15. package/dist/constants.d.ts.map +1 -0
  16. package/dist/constants.js +42 -0
  17. package/dist/constants.js.map +1 -0
  18. package/dist/constants.mjs +8 -0
  19. package/dist/constants.mjs.map +1 -0
  20. package/dist/defaultImpls.d.ts +15 -0
  21. package/dist/defaultImpls.d.ts.map +1 -0
  22. package/dist/defaultImpls.js +67 -0
  23. package/dist/defaultImpls.js.map +1 -0
  24. package/dist/defaultImpls.mjs +30 -0
  25. package/dist/defaultImpls.mjs.map +1 -0
  26. package/dist/index.d.ts +3 -0
  27. package/dist/index.d.ts.map +1 -0
  28. package/dist/index.js +72 -0
  29. package/dist/index.js.map +1 -0
  30. package/dist/index.mjs +7 -0
  31. package/dist/prompts.d.ts +10 -0
  32. package/dist/prompts.d.ts.map +1 -0
  33. package/dist/prompts.js +311 -0
  34. package/dist/prompts.js.map +1 -0
  35. package/dist/prompts.mjs +256 -0
  36. package/dist/prompts.mjs.map +1 -0
  37. package/dist/utils.d.ts +10 -0
  38. package/dist/utils.d.ts.map +1 -0
  39. package/dist/utils.js +65 -0
  40. package/dist/utils.js.map +1 -0
  41. package/dist/utils.mjs +28 -0
  42. package/dist/utils.mjs.map +1 -0
  43. package/package.json +63 -0
package/README.md ADDED
@@ -0,0 +1,13 @@
1
+ # @multimodal/seed-gui-agent
2
+
3
+ Seed GUI Agent.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install @multimodal/seed-gui-agent
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ // TODO
@@ -0,0 +1,24 @@
1
+ import { LLMRequestHookPayload } from '@tarko/agent';
2
+ import { Operator, BaseGUIAgent } from '@ui-tars-test/shared/base';
3
+ import { GUIAgentConfig } from '@ui-tars-test/shared/types';
4
+ export declare class GUIAgent<T extends Operator> extends BaseGUIAgent {
5
+ static label: string;
6
+ private operator;
7
+ private normalizeCoordinates;
8
+ private detailCalculator;
9
+ private loopIntervalInMs;
10
+ constructor(config: GUIAgentConfig<T>);
11
+ initialize(): Promise<void>;
12
+ onLLMRequest(id: string, payload: LLMRequestHookPayload): Promise<void>;
13
+ onEachAgentLoopStart(sessionId: string): Promise<void>;
14
+ onAgentLoopEnd(id: string): Promise<void>;
15
+ onBeforeToolCall(id: string, toolCall: {
16
+ toolCallId: string;
17
+ name: string;
18
+ }, args: unknown): Promise<unknown>;
19
+ onAfterToolCall(id: string, toolCall: {
20
+ toolCallId: string;
21
+ name: string;
22
+ }, result: unknown): Promise<unknown>;
23
+ }
24
+ //# sourceMappingURL=GUIAgent.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"GUIAgent.d.ts","sourceRoot":"","sources":["../src/GUIAgent.ts"],"names":[],"mappings":"AAIA,OAAO,EACL,qBAAqB,EAKtB,MAAM,cAAc,CAAC;AAItB,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AACnE,OAAO,EACL,cAAc,EAGf,MAAM,4BAA4B,CAAC;AAapC,qBAAa,QAAQ,CAAC,CAAC,SAAS,QAAQ,CAAE,SAAQ,YAAY;IAC5D,MAAM,CAAC,KAAK,SAAe;IAE3B,OAAO,CAAC,QAAQ,CAAuB;IACvC,OAAO,CAAC,oBAAoB,CAAuB;IACnD,OAAO,CAAC,gBAAgB,CAAwB;IAChD,OAAO,CAAC,gBAAgB,CAAS;gBAErB,MAAM,EAAE,cAAc,CAAC,CAAC,CAAC;IA+C/B,UAAU;IA0CV,YAAY,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAkDvE,oBAAoB,CAAC,SAAS,EAAE,MAAM;IAItC,cAAc,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAIzC,gBAAgB,CACpB,EAAE,EAAE,MAAM,EACV,QAAQ,EAAE;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAC9C,IAAI,EAAE,OAAO;IAKT,eAAe,CACnB,EAAE,EAAE,MAAM,EACV,QAAQ,EAAE;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAC9C,MAAM,EAAE,OAAO,GACd,OAAO,CAAC,OAAO,CAAC;CA4DpB"}
@@ -0,0 +1,207 @@
1
+ /**
2
+ * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ */
5
+ "use strict";
6
+ var __webpack_require__ = {};
7
+ (()=>{
8
+ __webpack_require__.d = (exports1, definition)=>{
9
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
10
+ enumerable: true,
11
+ get: definition[key]
12
+ });
13
+ };
14
+ })();
15
+ (()=>{
16
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
17
+ })();
18
+ (()=>{
19
+ __webpack_require__.r = (exports1)=>{
20
+ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
21
+ value: 'Module'
22
+ });
23
+ Object.defineProperty(exports1, '__esModule', {
24
+ value: true
25
+ });
26
+ };
27
+ })();
28
+ var __webpack_exports__ = {};
29
+ __webpack_require__.r(__webpack_exports__);
30
+ __webpack_require__.d(__webpack_exports__, {
31
+ GUIAgent: ()=>GUIAgent
32
+ });
33
+ const agent_namespaceObject = require("@tarko/agent");
34
+ const external_ToolCallEngine_js_namespaceObject = require("./ToolCallEngine.js");
35
+ const external_prompts_js_namespaceObject = require("./prompts.js");
36
+ const media_utils_namespaceObject = require("@agent-infra/media-utils");
37
+ const base_namespaceObject = require("@ui-tars-test/shared/base");
38
+ const utils_namespaceObject = require("@ui-tars-test/shared/utils");
39
+ const external_constants_js_namespaceObject = require("./constants.js");
40
+ const external_utils_js_namespaceObject = require("./utils.js");
41
+ const external_defaultImpls_js_namespaceObject = require("./defaultImpls.js");
42
+ function _define_property(obj, key, value) {
43
+ if (key in obj) Object.defineProperty(obj, key, {
44
+ value: value,
45
+ enumerable: true,
46
+ configurable: true,
47
+ writable: true
48
+ });
49
+ else obj[key] = value;
50
+ return obj;
51
+ }
52
+ const defaultLogger = new agent_namespaceObject.ConsoleLogger('[GUIAgent]', agent_namespaceObject.LogLevel.DEBUG);
53
+ class GUIAgent extends base_namespaceObject.BaseGUIAgent {
54
+ async initialize() {
55
+ this.registerTool(new agent_namespaceObject.Tool({
56
+ id: external_constants_js_namespaceObject.GUI_ADAPTED_TOOL_NAME,
57
+ description: 'operator tool',
58
+ parameters: {},
59
+ function: async (input)=>{
60
+ this.logger.log(`${external_constants_js_namespaceObject.GUI_ADAPTED_TOOL_NAME} input:`, input);
61
+ if (!this.operator) return (0, external_utils_js_namespaceObject.createGUIErrorResponse)(input.action, 'Operator not initialized');
62
+ if (input.errorMessage) return (0, external_utils_js_namespaceObject.createGUIErrorResponse)(input.action, input.errorMessage);
63
+ if (input.operator_action) input.operator_action = (0, utils_namespaceObject.normalizeActionCoords)(input.operator_action, this.normalizeCoordinates);
64
+ this.logger.info('action to execute:', JSON.stringify(input.operator_action));
65
+ const result = await this.operator.doExecute({
66
+ actions: [
67
+ input.operator_action
68
+ ]
69
+ });
70
+ if (result.errorMessage) return (0, external_utils_js_namespaceObject.createGUIErrorResponse)(input.action, result.errorMessage);
71
+ return {
72
+ success: true,
73
+ action: input.action,
74
+ normalizedAction: (0, external_utils_js_namespaceObject.convertToAgentUIAction)(input.operator_action),
75
+ observation: void 0
76
+ };
77
+ }
78
+ }));
79
+ super.initialize();
80
+ }
81
+ async onLLMRequest(id, payload) {
82
+ try {
83
+ const safeStringify = (obj, max = 800)=>{
84
+ try {
85
+ const s = JSON.stringify(obj);
86
+ return s.length > max ? s.slice(0, max) + "\u2026(truncated)" : s;
87
+ } catch {
88
+ return '[unserializable]';
89
+ }
90
+ };
91
+ const req = null == payload ? void 0 : payload.request;
92
+ const messages = req && Array.isArray(req.messages) ? req.messages : [];
93
+ const model = req && 'string' == typeof req.model ? req.model : 'unknown';
94
+ const hasImages = (()=>{
95
+ try {
96
+ let cnt = 0;
97
+ for (const m of messages){
98
+ const content = null == m ? void 0 : m.content;
99
+ if (Array.isArray(content)) {
100
+ for (const part of content)if ((null == part ? void 0 : part.type) === 'image_url') cnt++;
101
+ }
102
+ }
103
+ return cnt;
104
+ } catch {
105
+ return;
106
+ }
107
+ })();
108
+ const summary = {
109
+ id,
110
+ model,
111
+ messagesCount: messages.length,
112
+ hasImages
113
+ };
114
+ this.logger.info('[GUIAgent] onLLMRequest summary:', safeStringify(summary));
115
+ const firstMsg = messages[0];
116
+ if (firstMsg) this.logger.debug('[GUIAgent] onLLMRequest first message:', safeStringify(firstMsg, 1200));
117
+ } catch (e) {
118
+ this.logger.error('[GUIAgent] onLLMRequest logging failed:', e);
119
+ }
120
+ }
121
+ async onEachAgentLoopStart(sessionId) {
122
+ this.logger.info('onEachAgentLoopStart', sessionId);
123
+ }
124
+ async onAgentLoopEnd(id) {}
125
+ async onBeforeToolCall(id, toolCall, args) {
126
+ return args;
127
+ }
128
+ async onAfterToolCall(id, toolCall, result) {
129
+ this.logger.info('onAfterToolCall toolCall', JSON.stringify(toolCall));
130
+ if (toolCall.name !== external_constants_js_namespaceObject.GUI_ADAPTED_TOOL_NAME) return void this.logger.info('onAfterToolCall: skipping screenshot');
131
+ await (0, utils_namespaceObject.sleep)(this.loopIntervalInMs);
132
+ const output = await this.operator.doScreenshot();
133
+ if (!output) return void this.logger.error('Failed to get screenshot');
134
+ const base64Tool = new media_utils_namespaceObject.Base64ImageParser(output.base64);
135
+ const base64Uri = base64Tool.getDataUri();
136
+ if (!base64Uri) return void this.logger.error('Failed to get base64 image uri');
137
+ const { width: imageWidth, height: imageHeight } = base64Tool.getDimensions() || {
138
+ width: -1,
139
+ height: -1
140
+ };
141
+ const content = [
142
+ {
143
+ type: 'image_url',
144
+ image_url: {
145
+ url: base64Uri,
146
+ detail: this.detailCalculator(imageWidth, imageHeight)
147
+ }
148
+ }
149
+ ];
150
+ if (null == output ? void 0 : output.url) content.push({
151
+ type: 'text',
152
+ text: `The current page's url: ${output.url}`
153
+ });
154
+ const eventStream = this.getEventStream();
155
+ const events = eventStream.getEvents();
156
+ this.logger.info('onAfterToolCall events length:', events.length);
157
+ const event = eventStream.createEvent('environment_input', {
158
+ description: 'Browser Screenshot',
159
+ content,
160
+ metadata: {
161
+ type: 'screenshot',
162
+ url: null == output ? void 0 : output.url
163
+ }
164
+ });
165
+ eventStream.sendEvent(event);
166
+ return result;
167
+ }
168
+ constructor(config){
169
+ const { operator, model, systemPrompt, customeActionParser, normalizeCoordinates, detailCalculator, maxLoopCount, loopIntervalInMs = 500 } = config;
170
+ let finalSystemPrompt = external_prompts_js_namespaceObject.SYSTEM_PROMPT;
171
+ if ('string' == typeof systemPrompt) finalSystemPrompt = systemPrompt;
172
+ else if (Array.isArray(systemPrompt)) finalSystemPrompt = systemPrompt.map((p)=>'string' == typeof p ? p : p.content).join('\n\n');
173
+ else if (systemPrompt && (0, utils_namespaceObject.isSystemPromptTemplate)(systemPrompt)) finalSystemPrompt = (0, utils_namespaceObject.assembleSystemPrompt)(systemPrompt, operator.getSupportedActions());
174
+ defaultLogger.debug('final instructions for sp:', finalSystemPrompt);
175
+ const AdaptedToolCallEngine = class extends external_ToolCallEngine_js_namespaceObject.GUIAgentToolCallEngine {
176
+ constructor(){
177
+ super(customeActionParser);
178
+ }
179
+ };
180
+ super({
181
+ name: GUIAgent.label,
182
+ instructions: finalSystemPrompt,
183
+ tools: [],
184
+ toolCallEngine: AdaptedToolCallEngine,
185
+ model: model,
186
+ ...maxLoopCount && {
187
+ maxIterations: maxLoopCount
188
+ },
189
+ logLevel: agent_namespaceObject.LogLevel.DEBUG
190
+ }), _define_property(this, "operator", void 0), _define_property(this, "normalizeCoordinates", void 0), _define_property(this, "detailCalculator", void 0), _define_property(this, "loopIntervalInMs", void 0);
191
+ this.operator = operator;
192
+ this.normalizeCoordinates = normalizeCoordinates ?? external_defaultImpls_js_namespaceObject.defaultNormalizeCoords;
193
+ this.detailCalculator = detailCalculator ?? external_defaultImpls_js_namespaceObject.defaultDetailCalculator;
194
+ this.loopIntervalInMs = loopIntervalInMs;
195
+ this.logger = this.logger.spawn('[GUIAgent]');
196
+ }
197
+ }
198
+ _define_property(GUIAgent, "label", 'GUI Agent');
199
+ exports.GUIAgent = __webpack_exports__.GUIAgent;
200
+ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
201
+ "GUIAgent"
202
+ ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
203
+ Object.defineProperty(exports, '__esModule', {
204
+ value: true
205
+ });
206
+
207
+ //# sourceMappingURL=GUIAgent.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"GUIAgent.js","sources":["webpack://@ui-tars-test/agent-sdk/webpack/runtime/define_property_getters","webpack://@ui-tars-test/agent-sdk/webpack/runtime/has_own_property","webpack://@ui-tars-test/agent-sdk/webpack/runtime/make_namespace_object","webpack://@ui-tars-test/agent-sdk/./src/GUIAgent.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","/*\n * Copyright (c) 2025 Bytedance, Inc. and its affiliates.\n * SPDX-License-Identifier: Apache-2.0\n */\nimport {\n LLMRequestHookPayload,\n ChatCompletionContentPart,\n LogLevel,\n Tool,\n ConsoleLogger,\n} from '@tarko/agent';\nimport { GUIAgentToolCallEngine } from './ToolCallEngine';\nimport { SYSTEM_PROMPT } from './prompts';\nimport { Base64ImageParser } from '@agent-infra/media-utils';\nimport { Operator, BaseGUIAgent } from '@ui-tars-test/shared/base';\nimport {\n GUIAgentConfig,\n NormalizeCoordinates,\n ImageDetailCalculator,\n} from '@ui-tars-test/shared/types';\nimport {\n assembleSystemPrompt,\n isSystemPromptTemplate,\n normalizeActionCoords,\n sleep,\n} from '@ui-tars-test/shared/utils';\nimport { GUI_ADAPTED_TOOL_NAME } from './constants';\nimport { convertToAgentUIAction, createGUIErrorResponse } from './utils';\nimport { defaultNormalizeCoords, defaultDetailCalculator } from './defaultImpls';\n\nconst defaultLogger = new ConsoleLogger('[GUIAgent]', LogLevel.DEBUG);\n\nexport class GUIAgent<T extends Operator> extends BaseGUIAgent {\n static label = 'GUI Agent';\n\n private operator: Operator | undefined;\n private normalizeCoordinates: NormalizeCoordinates;\n private detailCalculator: ImageDetailCalculator;\n private loopIntervalInMs: number;\n\n constructor(config: GUIAgentConfig<T>) {\n const {\n operator,\n model,\n systemPrompt,\n customeActionParser,\n normalizeCoordinates,\n detailCalculator,\n maxLoopCount,\n loopIntervalInMs = 500,\n } = config;\n let finalSystemPrompt = SYSTEM_PROMPT;\n if (typeof systemPrompt === 'string') {\n finalSystemPrompt = systemPrompt;\n } else if (Array.isArray(systemPrompt)) {\n finalSystemPrompt = systemPrompt\n .map((p) => (typeof p === 'string' ? p : p.content))\n .join('\\n\\n');\n } else if (systemPrompt && isSystemPromptTemplate(systemPrompt)) {\n finalSystemPrompt = assembleSystemPrompt(systemPrompt, operator.getSupportedActions());\n }\n defaultLogger.debug('final instructions for sp:', finalSystemPrompt);\n\n // Create a adapted ToolCallEngine constructor that captures the customeActionParser\n const AdaptedToolCallEngine = class extends GUIAgentToolCallEngine {\n constructor() {\n super(customeActionParser);\n }\n };\n\n super({\n name: GUIAgent.label,\n instructions: finalSystemPrompt,\n tools: [],\n toolCallEngine: AdaptedToolCallEngine,\n model: model,\n ...(maxLoopCount && { maxIterations: maxLoopCount }),\n logLevel: LogLevel.DEBUG,\n });\n this.operator = operator;\n this.normalizeCoordinates = normalizeCoordinates ?? defaultNormalizeCoords;\n // Default detail calculator implementation\n this.detailCalculator = detailCalculator ?? defaultDetailCalculator;\n this.loopIntervalInMs = loopIntervalInMs;\n this.logger = this.logger.spawn('[GUIAgent]');\n }\n\n async initialize() {\n // Register the GUI tool\n this.registerTool(\n new Tool({\n id: GUI_ADAPTED_TOOL_NAME,\n description: 'operator tool',\n parameters: {}, // no need to pass parameters\n function: async (input) => {\n this.logger.log(`${GUI_ADAPTED_TOOL_NAME} input:`, input);\n if (!this.operator) {\n return createGUIErrorResponse(input.action, 'Operator not initialized');\n }\n if (input.errorMessage) {\n return createGUIErrorResponse(input.action, input.errorMessage);\n }\n // normalize coordinates\n if (input.operator_action) {\n input.operator_action = normalizeActionCoords(\n input.operator_action,\n this.normalizeCoordinates,\n );\n }\n this.logger.info('action to execute:', JSON.stringify(input.operator_action));\n const result = await this.operator!.doExecute({\n actions: [input.operator_action],\n });\n if (result.errorMessage) {\n return createGUIErrorResponse(input.action, result.errorMessage);\n }\n // return { action: input.action, status: 'success', result };\n return {\n success: true,\n action: input.action,\n normalizedAction: convertToAgentUIAction(input.operator_action),\n observation: undefined, // Reserved for future implementation\n };\n },\n }),\n );\n super.initialize();\n }\n\n async onLLMRequest(id: string, payload: LLMRequestHookPayload): Promise<void> {\n try {\n const safeStringify = (obj: unknown, max = 800): string => {\n try {\n const s = JSON.stringify(obj);\n return s.length > max ? s.slice(0, max) + '…(truncated)' : s;\n } catch {\n return '[unserializable]';\n }\n };\n\n const req = payload?.request;\n const messages = req && Array.isArray(req.messages) ? req.messages : [];\n const model = req && typeof req.model === 'string' ? req.model : 'unknown';\n\n const hasImages = (() => {\n try {\n let cnt = 0;\n for (const m of messages as Array<{ content?: unknown }>) {\n const content = m?.content;\n if (Array.isArray(content)) {\n for (const part of content as Array<{ type?: string }>) {\n if (part?.type === 'image_url') cnt++;\n }\n }\n }\n return cnt;\n } catch {\n return undefined as unknown as number | undefined;\n }\n })();\n\n const summary = {\n id,\n model,\n messagesCount: messages.length,\n hasImages,\n };\n\n this.logger.info('[GUIAgent] onLLMRequest summary:', safeStringify(summary));\n\n const firstMsg = messages[0];\n if (firstMsg) {\n this.logger.debug('[GUIAgent] onLLMRequest first message:', safeStringify(firstMsg, 1200));\n }\n } catch (e) {\n this.logger.error('[GUIAgent] onLLMRequest logging failed:', e);\n }\n }\n\n async onEachAgentLoopStart(sessionId: string) {\n this.logger.info('onEachAgentLoopStart', sessionId);\n }\n\n async onAgentLoopEnd(id: string): Promise<void> {\n // await this.browserOperator.cleanup();\n }\n\n async onBeforeToolCall(\n id: string,\n toolCall: { toolCallId: string; name: string },\n args: unknown,\n ) {\n return args;\n }\n\n async onAfterToolCall(\n id: string,\n toolCall: { toolCallId: string; name: string },\n result: unknown,\n ): Promise<unknown> {\n this.logger.info('onAfterToolCall toolCall', JSON.stringify(toolCall));\n\n if (toolCall.name !== GUI_ADAPTED_TOOL_NAME) {\n this.logger.info('onAfterToolCall: skipping screenshot');\n return;\n }\n\n await sleep(this.loopIntervalInMs);\n\n const output = await this.operator!.doScreenshot();\n if (!output) {\n this.logger.error('Failed to get screenshot');\n return;\n }\n\n const base64Tool = new Base64ImageParser(output.base64);\n const base64Uri = base64Tool.getDataUri();\n if (!base64Uri) {\n this.logger.error('Failed to get base64 image uri');\n return;\n }\n\n const { width: imageWidth, height: imageHeight } = base64Tool.getDimensions() || {\n width: -1,\n height: -1,\n };\n\n const content: ChatCompletionContentPart[] = [\n {\n type: 'image_url',\n image_url: {\n url: base64Uri,\n detail: this.detailCalculator(imageWidth, imageHeight),\n },\n },\n ];\n\n if (output?.url) {\n content.push({\n type: 'text',\n text: `The current page's url: ${output.url}`,\n });\n }\n\n const eventStream = this.getEventStream();\n const events = eventStream.getEvents();\n this.logger.info('onAfterToolCall events length:', events.length);\n\n const event = eventStream.createEvent('environment_input', {\n description: 'Browser Screenshot',\n content,\n metadata: {\n type: 'screenshot',\n url: output?.url,\n },\n });\n eventStream.sendEvent(event);\n return result;\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","defaultLogger","ConsoleLogger","LogLevel","GUIAgent","BaseGUIAgent","Tool","GUI_ADAPTED_TOOL_NAME","input","createGUIErrorResponse","normalizeActionCoords","JSON","result","convertToAgentUIAction","undefined","id","payload","safeStringify","max","s","req","messages","Array","model","hasImages","cnt","m","content","part","summary","firstMsg","e","sessionId","toolCall","args","sleep","output","base64Tool","Base64ImageParser","base64Uri","imageWidth","imageHeight","eventStream","events","event","config","operator","systemPrompt","customeActionParser","normalizeCoordinates","detailCalculator","maxLoopCount","loopIntervalInMs","finalSystemPrompt","SYSTEM_PROMPT","p","isSystemPromptTemplate","assembleSystemPrompt","AdaptedToolCallEngine","GUIAgentToolCallEngine","defaultNormalizeCoords","defaultDetailCalculator"],"mappings":";;;;;;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;ACHC;;;;;;;;;;AA2BD,MAAMI,gBAAgB,IAAIC,sBAAAA,aAAaA,CAAC,cAAcC,sBAAAA,QAAAA,CAAAA,KAAc;AAE7D,MAAMC,iBAAqCC,qBAAAA,YAAYA;IAuD5D,MAAM,aAAa;QAEjB,IAAI,CAAC,YAAY,CACf,IAAIC,sBAAAA,IAAIA,CAAC;YACP,IAAIC,sCAAAA,qBAAqBA;YACzB,aAAa;YACb,YAAY,CAAC;YACb,UAAU,OAAOC;gBACf,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAGD,sCAAAA,qBAAqBA,CAAC,OAAO,CAAC,EAAEC;gBACnD,IAAI,CAAC,IAAI,CAAC,QAAQ,EAChB,OAAOC,AAAAA,IAAAA,kCAAAA,sBAAAA,AAAAA,EAAuBD,MAAM,MAAM,EAAE;gBAE9C,IAAIA,MAAM,YAAY,EACpB,OAAOC,AAAAA,IAAAA,kCAAAA,sBAAAA,AAAAA,EAAuBD,MAAM,MAAM,EAAEA,MAAM,YAAY;gBAGhE,IAAIA,MAAM,eAAe,EACvBA,MAAM,eAAe,GAAGE,AAAAA,IAAAA,sBAAAA,qBAAAA,AAAAA,EACtBF,MAAM,eAAe,EACrB,IAAI,CAAC,oBAAoB;gBAG7B,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,sBAAsBG,KAAK,SAAS,CAACH,MAAM,eAAe;gBAC3E,MAAMI,SAAS,MAAM,IAAI,CAAC,QAAQ,CAAE,SAAS,CAAC;oBAC5C,SAAS;wBAACJ,MAAM,eAAe;qBAAC;gBAClC;gBACA,IAAII,OAAO,YAAY,EACrB,OAAOH,AAAAA,IAAAA,kCAAAA,sBAAAA,AAAAA,EAAuBD,MAAM,MAAM,EAAEI,OAAO,YAAY;gBAGjE,OAAO;oBACL,SAAS;oBACT,QAAQJ,MAAM,MAAM;oBACpB,kBAAkBK,AAAAA,IAAAA,kCAAAA,sBAAAA,AAAAA,EAAuBL,MAAM,eAAe;oBAC9D,aAAaM;gBACf;YACF;QACF;QAEF,KAAK,CAAC;IACR;IAEA,MAAM,aAAaC,EAAU,EAAEC,OAA8B,EAAiB;QAC5E,IAAI;YACF,MAAMC,gBAAgB,CAACnB,KAAcoB,MAAM,GAAG;gBAC5C,IAAI;oBACF,MAAMC,IAAIR,KAAK,SAAS,CAACb;oBACzB,OAAOqB,EAAE,MAAM,GAAGD,MAAMC,EAAE,KAAK,CAAC,GAAGD,OAAO,sBAAiBC;gBAC7D,EAAE,OAAM;oBACN,OAAO;gBACT;YACF;YAEA,MAAMC,MAAMJ,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO;YAC5B,MAAMK,WAAWD,OAAOE,MAAM,OAAO,CAACF,IAAI,QAAQ,IAAIA,IAAI,QAAQ,GAAG,EAAE;YACvE,MAAMG,QAAQH,OAAO,AAAqB,YAArB,OAAOA,IAAI,KAAK,GAAgBA,IAAI,KAAK,GAAG;YAEjE,MAAMI,YAAa,AAAC;gBAClB,IAAI;oBACF,IAAIC,MAAM;oBACV,KAAK,MAAMC,KAAKL,SAA0C;wBACxD,MAAMM,UAAUD,QAAAA,IAAAA,KAAAA,IAAAA,EAAG,OAAO;wBAC1B,IAAIJ,MAAM,OAAO,CAACK,UAChB;4BAAA,KAAK,MAAMC,QAAQD,QACjB,IAAIC,AAAAA,CAAAA,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,IAAI,AAAD,MAAM,aAAaH;wBAClC;oBAEJ;oBACA,OAAOA;gBACT,EAAE,OAAM;oBACN;gBACF;YACF;YAEA,MAAMI,UAAU;gBACdd;gBACAQ;gBACA,eAAeF,SAAS,MAAM;gBAC9BG;YACF;YAEA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,oCAAoCP,cAAcY;YAEnE,MAAMC,WAAWT,QAAQ,CAAC,EAAE;YAC5B,IAAIS,UACF,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,0CAA0Cb,cAAca,UAAU;QAExF,EAAE,OAAOC,GAAG;YACV,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,2CAA2CA;QAC/D;IACF;IAEA,MAAM,qBAAqBC,SAAiB,EAAE;QAC5C,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,wBAAwBA;IAC3C;IAEA,MAAM,eAAejB,EAAU,EAAiB,CAEhD;IAEA,MAAM,iBACJA,EAAU,EACVkB,QAA8C,EAC9CC,IAAa,EACb;QACA,OAAOA;IACT;IAEA,MAAM,gBACJnB,EAAU,EACVkB,QAA8C,EAC9CrB,MAAe,EACG;QAClB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,4BAA4BD,KAAK,SAAS,CAACsB;QAE5D,IAAIA,SAAS,IAAI,KAAK1B,sCAAAA,qBAAqBA,EAAE,YAC3C,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC;QAInB,MAAM4B,AAAAA,IAAAA,sBAAAA,KAAAA,AAAAA,EAAM,IAAI,CAAC,gBAAgB;QAEjC,MAAMC,SAAS,MAAM,IAAI,CAAC,QAAQ,CAAE,YAAY;QAChD,IAAI,CAACA,QAAQ,YACX,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC;QAIpB,MAAMC,aAAa,IAAIC,4BAAAA,iBAAiBA,CAACF,OAAO,MAAM;QACtD,MAAMG,YAAYF,WAAW,UAAU;QACvC,IAAI,CAACE,WAAW,YACd,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC;QAIpB,MAAM,EAAE,OAAOC,UAAU,EAAE,QAAQC,WAAW,EAAE,GAAGJ,WAAW,aAAa,MAAM;YAC/E,OAAO;YACP,QAAQ;QACV;QAEA,MAAMV,UAAuC;YAC3C;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKY;oBACL,QAAQ,IAAI,CAAC,gBAAgB,CAACC,YAAYC;gBAC5C;YACF;SACD;QAED,IAAIL,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,GAAG,EACbT,QAAQ,IAAI,CAAC;YACX,MAAM;YACN,MAAM,CAAC,wBAAwB,EAAES,OAAO,GAAG,EAAE;QAC/C;QAGF,MAAMM,cAAc,IAAI,CAAC,cAAc;QACvC,MAAMC,SAASD,YAAY,SAAS;QACpC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,kCAAkCC,OAAO,MAAM;QAEhE,MAAMC,QAAQF,YAAY,WAAW,CAAC,qBAAqB;YACzD,aAAa;YACbf;YACA,UAAU;gBACR,MAAM;gBACN,KAAKS,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,GAAG;YAClB;QACF;QACAM,YAAY,SAAS,CAACE;QACtB,OAAOhC;IACT;IA1NA,YAAYiC,MAAyB,CAAE;QACrC,MAAM,EACJC,QAAQ,EACRvB,KAAK,EACLwB,YAAY,EACZC,mBAAmB,EACnBC,oBAAoB,EACpBC,gBAAgB,EAChBC,YAAY,EACZC,mBAAmB,GAAG,EACvB,GAAGP;QACJ,IAAIQ,oBAAoBC,oCAAAA,aAAaA;QACrC,IAAI,AAAwB,YAAxB,OAAOP,cACTM,oBAAoBN;aACf,IAAIzB,MAAM,OAAO,CAACyB,eACvBM,oBAAoBN,aACjB,GAAG,CAAC,CAACQ,IAAO,AAAa,YAAb,OAAOA,IAAiBA,IAAIA,EAAE,OAAO,EACjD,IAAI,CAAC;aACH,IAAIR,gBAAgBS,AAAAA,IAAAA,sBAAAA,sBAAAA,AAAAA,EAAuBT,eAChDM,oBAAoBI,AAAAA,IAAAA,sBAAAA,oBAAAA,AAAAA,EAAqBV,cAAcD,SAAS,mBAAmB;QAErF7C,cAAc,KAAK,CAAC,8BAA8BoD;QAGlD,MAAMK,wBAAwB,cAAcC,2CAAAA,sBAAsBA;YAChE,aAAc;gBACZ,KAAK,CAACX;YACR;QACF;QAEA,KAAK,CAAC;YACJ,MAAM5C,SAAS,KAAK;YACpB,cAAciD;YACd,OAAO,EAAE;YACT,gBAAgBK;YAChB,OAAOnC;YACP,GAAI4B,gBAAgB;gBAAE,eAAeA;YAAa,CAAC;YACnD,UAAUhD,sBAAAA,QAAAA,CAAAA,KAAc;QAC1B,IA3CF,uBAAQ,YAAR,SACA,uBAAQ,wBAAR,SACA,uBAAQ,oBAAR,SACA,uBAAQ,oBAAR;QAyCE,IAAI,CAAC,QAAQ,GAAG2C;QAChB,IAAI,CAAC,oBAAoB,GAAGG,wBAAwBW,yCAAAA,sBAAsBA;QAE1E,IAAI,CAAC,gBAAgB,GAAGV,oBAAoBW,yCAAAA,uBAAuBA;QACnE,IAAI,CAAC,gBAAgB,GAAGT;QACxB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC;IAClC;AA8KF;AAlOE,iBADWhD,UACJ,SAAQ"}
@@ -0,0 +1,173 @@
1
+ /**
2
+ * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ */
5
+ import { ConsoleLogger, LogLevel, Tool } from "@tarko/agent";
6
+ import { GUIAgentToolCallEngine } from "./ToolCallEngine.mjs";
7
+ import { SYSTEM_PROMPT } from "./prompts.mjs";
8
+ import { Base64ImageParser } from "@agent-infra/media-utils";
9
+ import { BaseGUIAgent } from "@ui-tars-test/shared/base";
10
+ import { assembleSystemPrompt, isSystemPromptTemplate, normalizeActionCoords, sleep } from "@ui-tars-test/shared/utils";
11
+ import { GUI_ADAPTED_TOOL_NAME } from "./constants.mjs";
12
+ import { convertToAgentUIAction, createGUIErrorResponse } from "./utils.mjs";
13
+ import { defaultDetailCalculator, defaultNormalizeCoords } from "./defaultImpls.mjs";
14
+ function _define_property(obj, key, value) {
15
+ if (key in obj) Object.defineProperty(obj, key, {
16
+ value: value,
17
+ enumerable: true,
18
+ configurable: true,
19
+ writable: true
20
+ });
21
+ else obj[key] = value;
22
+ return obj;
23
+ }
24
+ const defaultLogger = new ConsoleLogger('[GUIAgent]', LogLevel.DEBUG);
25
+ class GUIAgent extends BaseGUIAgent {
26
+ async initialize() {
27
+ this.registerTool(new Tool({
28
+ id: GUI_ADAPTED_TOOL_NAME,
29
+ description: 'operator tool',
30
+ parameters: {},
31
+ function: async (input)=>{
32
+ this.logger.log(`${GUI_ADAPTED_TOOL_NAME} input:`, input);
33
+ if (!this.operator) return createGUIErrorResponse(input.action, 'Operator not initialized');
34
+ if (input.errorMessage) return createGUIErrorResponse(input.action, input.errorMessage);
35
+ if (input.operator_action) input.operator_action = normalizeActionCoords(input.operator_action, this.normalizeCoordinates);
36
+ this.logger.info('action to execute:', JSON.stringify(input.operator_action));
37
+ const result = await this.operator.doExecute({
38
+ actions: [
39
+ input.operator_action
40
+ ]
41
+ });
42
+ if (result.errorMessage) return createGUIErrorResponse(input.action, result.errorMessage);
43
+ return {
44
+ success: true,
45
+ action: input.action,
46
+ normalizedAction: convertToAgentUIAction(input.operator_action),
47
+ observation: void 0
48
+ };
49
+ }
50
+ }));
51
+ super.initialize();
52
+ }
53
+ async onLLMRequest(id, payload) {
54
+ try {
55
+ const safeStringify = (obj, max = 800)=>{
56
+ try {
57
+ const s = JSON.stringify(obj);
58
+ return s.length > max ? s.slice(0, max) + "\u2026(truncated)" : s;
59
+ } catch {
60
+ return '[unserializable]';
61
+ }
62
+ };
63
+ const req = null == payload ? void 0 : payload.request;
64
+ const messages = req && Array.isArray(req.messages) ? req.messages : [];
65
+ const model = req && 'string' == typeof req.model ? req.model : 'unknown';
66
+ const hasImages = (()=>{
67
+ try {
68
+ let cnt = 0;
69
+ for (const m of messages){
70
+ const content = null == m ? void 0 : m.content;
71
+ if (Array.isArray(content)) {
72
+ for (const part of content)if ((null == part ? void 0 : part.type) === 'image_url') cnt++;
73
+ }
74
+ }
75
+ return cnt;
76
+ } catch {
77
+ return;
78
+ }
79
+ })();
80
+ const summary = {
81
+ id,
82
+ model,
83
+ messagesCount: messages.length,
84
+ hasImages
85
+ };
86
+ this.logger.info('[GUIAgent] onLLMRequest summary:', safeStringify(summary));
87
+ const firstMsg = messages[0];
88
+ if (firstMsg) this.logger.debug('[GUIAgent] onLLMRequest first message:', safeStringify(firstMsg, 1200));
89
+ } catch (e) {
90
+ this.logger.error('[GUIAgent] onLLMRequest logging failed:', e);
91
+ }
92
+ }
93
+ async onEachAgentLoopStart(sessionId) {
94
+ this.logger.info('onEachAgentLoopStart', sessionId);
95
+ }
96
+ async onAgentLoopEnd(id) {}
97
+ async onBeforeToolCall(id, toolCall, args) {
98
+ return args;
99
+ }
100
+ async onAfterToolCall(id, toolCall, result) {
101
+ this.logger.info('onAfterToolCall toolCall', JSON.stringify(toolCall));
102
+ if (toolCall.name !== GUI_ADAPTED_TOOL_NAME) return void this.logger.info('onAfterToolCall: skipping screenshot');
103
+ await sleep(this.loopIntervalInMs);
104
+ const output = await this.operator.doScreenshot();
105
+ if (!output) return void this.logger.error('Failed to get screenshot');
106
+ const base64Tool = new Base64ImageParser(output.base64);
107
+ const base64Uri = base64Tool.getDataUri();
108
+ if (!base64Uri) return void this.logger.error('Failed to get base64 image uri');
109
+ const { width: imageWidth, height: imageHeight } = base64Tool.getDimensions() || {
110
+ width: -1,
111
+ height: -1
112
+ };
113
+ const content = [
114
+ {
115
+ type: 'image_url',
116
+ image_url: {
117
+ url: base64Uri,
118
+ detail: this.detailCalculator(imageWidth, imageHeight)
119
+ }
120
+ }
121
+ ];
122
+ if (null == output ? void 0 : output.url) content.push({
123
+ type: 'text',
124
+ text: `The current page's url: ${output.url}`
125
+ });
126
+ const eventStream = this.getEventStream();
127
+ const events = eventStream.getEvents();
128
+ this.logger.info('onAfterToolCall events length:', events.length);
129
+ const event = eventStream.createEvent('environment_input', {
130
+ description: 'Browser Screenshot',
131
+ content,
132
+ metadata: {
133
+ type: 'screenshot',
134
+ url: null == output ? void 0 : output.url
135
+ }
136
+ });
137
+ eventStream.sendEvent(event);
138
+ return result;
139
+ }
140
+ constructor(config){
141
+ const { operator, model, systemPrompt, customeActionParser, normalizeCoordinates, detailCalculator, maxLoopCount, loopIntervalInMs = 500 } = config;
142
+ let finalSystemPrompt = SYSTEM_PROMPT;
143
+ if ('string' == typeof systemPrompt) finalSystemPrompt = systemPrompt;
144
+ else if (Array.isArray(systemPrompt)) finalSystemPrompt = systemPrompt.map((p)=>'string' == typeof p ? p : p.content).join('\n\n');
145
+ else if (systemPrompt && isSystemPromptTemplate(systemPrompt)) finalSystemPrompt = assembleSystemPrompt(systemPrompt, operator.getSupportedActions());
146
+ defaultLogger.debug('final instructions for sp:', finalSystemPrompt);
147
+ const AdaptedToolCallEngine = class extends GUIAgentToolCallEngine {
148
+ constructor(){
149
+ super(customeActionParser);
150
+ }
151
+ };
152
+ super({
153
+ name: GUIAgent.label,
154
+ instructions: finalSystemPrompt,
155
+ tools: [],
156
+ toolCallEngine: AdaptedToolCallEngine,
157
+ model: model,
158
+ ...maxLoopCount && {
159
+ maxIterations: maxLoopCount
160
+ },
161
+ logLevel: LogLevel.DEBUG
162
+ }), _define_property(this, "operator", void 0), _define_property(this, "normalizeCoordinates", void 0), _define_property(this, "detailCalculator", void 0), _define_property(this, "loopIntervalInMs", void 0);
163
+ this.operator = operator;
164
+ this.normalizeCoordinates = normalizeCoordinates ?? defaultNormalizeCoords;
165
+ this.detailCalculator = detailCalculator ?? defaultDetailCalculator;
166
+ this.loopIntervalInMs = loopIntervalInMs;
167
+ this.logger = this.logger.spawn('[GUIAgent]');
168
+ }
169
+ }
170
+ _define_property(GUIAgent, "label", 'GUI Agent');
171
+ export { GUIAgent };
172
+
173
+ //# sourceMappingURL=GUIAgent.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"GUIAgent.mjs","sources":["webpack://@ui-tars-test/agent-sdk/./src/GUIAgent.ts"],"sourcesContent":["/*\n * Copyright (c) 2025 Bytedance, Inc. and its affiliates.\n * SPDX-License-Identifier: Apache-2.0\n */\nimport {\n LLMRequestHookPayload,\n ChatCompletionContentPart,\n LogLevel,\n Tool,\n ConsoleLogger,\n} from '@tarko/agent';\nimport { GUIAgentToolCallEngine } from './ToolCallEngine';\nimport { SYSTEM_PROMPT } from './prompts';\nimport { Base64ImageParser } from '@agent-infra/media-utils';\nimport { Operator, BaseGUIAgent } from '@ui-tars-test/shared/base';\nimport {\n GUIAgentConfig,\n NormalizeCoordinates,\n ImageDetailCalculator,\n} from '@ui-tars-test/shared/types';\nimport {\n assembleSystemPrompt,\n isSystemPromptTemplate,\n normalizeActionCoords,\n sleep,\n} from '@ui-tars-test/shared/utils';\nimport { GUI_ADAPTED_TOOL_NAME } from './constants';\nimport { convertToAgentUIAction, createGUIErrorResponse } from './utils';\nimport { defaultNormalizeCoords, defaultDetailCalculator } from './defaultImpls';\n\nconst defaultLogger = new ConsoleLogger('[GUIAgent]', LogLevel.DEBUG);\n\nexport class GUIAgent<T extends Operator> extends BaseGUIAgent {\n static label = 'GUI Agent';\n\n private operator: Operator | undefined;\n private normalizeCoordinates: NormalizeCoordinates;\n private detailCalculator: ImageDetailCalculator;\n private loopIntervalInMs: number;\n\n constructor(config: GUIAgentConfig<T>) {\n const {\n operator,\n model,\n systemPrompt,\n customeActionParser,\n normalizeCoordinates,\n detailCalculator,\n maxLoopCount,\n loopIntervalInMs = 500,\n } = config;\n let finalSystemPrompt = SYSTEM_PROMPT;\n if (typeof systemPrompt === 'string') {\n finalSystemPrompt = systemPrompt;\n } else if (Array.isArray(systemPrompt)) {\n finalSystemPrompt = systemPrompt\n .map((p) => (typeof p === 'string' ? p : p.content))\n .join('\\n\\n');\n } else if (systemPrompt && isSystemPromptTemplate(systemPrompt)) {\n finalSystemPrompt = assembleSystemPrompt(systemPrompt, operator.getSupportedActions());\n }\n defaultLogger.debug('final instructions for sp:', finalSystemPrompt);\n\n // Create a adapted ToolCallEngine constructor that captures the customeActionParser\n const AdaptedToolCallEngine = class extends GUIAgentToolCallEngine {\n constructor() {\n super(customeActionParser);\n }\n };\n\n super({\n name: GUIAgent.label,\n instructions: finalSystemPrompt,\n tools: [],\n toolCallEngine: AdaptedToolCallEngine,\n model: model,\n ...(maxLoopCount && { maxIterations: maxLoopCount }),\n logLevel: LogLevel.DEBUG,\n });\n this.operator = operator;\n this.normalizeCoordinates = normalizeCoordinates ?? defaultNormalizeCoords;\n // Default detail calculator implementation\n this.detailCalculator = detailCalculator ?? defaultDetailCalculator;\n this.loopIntervalInMs = loopIntervalInMs;\n this.logger = this.logger.spawn('[GUIAgent]');\n }\n\n async initialize() {\n // Register the GUI tool\n this.registerTool(\n new Tool({\n id: GUI_ADAPTED_TOOL_NAME,\n description: 'operator tool',\n parameters: {}, // no need to pass parameters\n function: async (input) => {\n this.logger.log(`${GUI_ADAPTED_TOOL_NAME} input:`, input);\n if (!this.operator) {\n return createGUIErrorResponse(input.action, 'Operator not initialized');\n }\n if (input.errorMessage) {\n return createGUIErrorResponse(input.action, input.errorMessage);\n }\n // normalize coordinates\n if (input.operator_action) {\n input.operator_action = normalizeActionCoords(\n input.operator_action,\n this.normalizeCoordinates,\n );\n }\n this.logger.info('action to execute:', JSON.stringify(input.operator_action));\n const result = await this.operator!.doExecute({\n actions: [input.operator_action],\n });\n if (result.errorMessage) {\n return createGUIErrorResponse(input.action, result.errorMessage);\n }\n // return { action: input.action, status: 'success', result };\n return {\n success: true,\n action: input.action,\n normalizedAction: convertToAgentUIAction(input.operator_action),\n observation: undefined, // Reserved for future implementation\n };\n },\n }),\n );\n super.initialize();\n }\n\n async onLLMRequest(id: string, payload: LLMRequestHookPayload): Promise<void> {\n try {\n const safeStringify = (obj: unknown, max = 800): string => {\n try {\n const s = JSON.stringify(obj);\n return s.length > max ? s.slice(0, max) + '…(truncated)' : s;\n } catch {\n return '[unserializable]';\n }\n };\n\n const req = payload?.request;\n const messages = req && Array.isArray(req.messages) ? req.messages : [];\n const model = req && typeof req.model === 'string' ? req.model : 'unknown';\n\n const hasImages = (() => {\n try {\n let cnt = 0;\n for (const m of messages as Array<{ content?: unknown }>) {\n const content = m?.content;\n if (Array.isArray(content)) {\n for (const part of content as Array<{ type?: string }>) {\n if (part?.type === 'image_url') cnt++;\n }\n }\n }\n return cnt;\n } catch {\n return undefined as unknown as number | undefined;\n }\n })();\n\n const summary = {\n id,\n model,\n messagesCount: messages.length,\n hasImages,\n };\n\n this.logger.info('[GUIAgent] onLLMRequest summary:', safeStringify(summary));\n\n const firstMsg = messages[0];\n if (firstMsg) {\n this.logger.debug('[GUIAgent] onLLMRequest first message:', safeStringify(firstMsg, 1200));\n }\n } catch (e) {\n this.logger.error('[GUIAgent] onLLMRequest logging failed:', e);\n }\n }\n\n async onEachAgentLoopStart(sessionId: string) {\n this.logger.info('onEachAgentLoopStart', sessionId);\n }\n\n async onAgentLoopEnd(id: string): Promise<void> {\n // await this.browserOperator.cleanup();\n }\n\n async onBeforeToolCall(\n id: string,\n toolCall: { toolCallId: string; name: string },\n args: unknown,\n ) {\n return args;\n }\n\n async onAfterToolCall(\n id: string,\n toolCall: { toolCallId: string; name: string },\n result: unknown,\n ): Promise<unknown> {\n this.logger.info('onAfterToolCall toolCall', JSON.stringify(toolCall));\n\n if (toolCall.name !== GUI_ADAPTED_TOOL_NAME) {\n this.logger.info('onAfterToolCall: skipping screenshot');\n return;\n }\n\n await sleep(this.loopIntervalInMs);\n\n const output = await this.operator!.doScreenshot();\n if (!output) {\n this.logger.error('Failed to get screenshot');\n return;\n }\n\n const base64Tool = new Base64ImageParser(output.base64);\n const base64Uri = base64Tool.getDataUri();\n if (!base64Uri) {\n this.logger.error('Failed to get base64 image uri');\n return;\n }\n\n const { width: imageWidth, height: imageHeight } = base64Tool.getDimensions() || {\n width: -1,\n height: -1,\n };\n\n const content: ChatCompletionContentPart[] = [\n {\n type: 'image_url',\n image_url: {\n url: base64Uri,\n detail: this.detailCalculator(imageWidth, imageHeight),\n },\n },\n ];\n\n if (output?.url) {\n content.push({\n type: 'text',\n text: `The current page's url: ${output.url}`,\n });\n }\n\n const eventStream = this.getEventStream();\n const events = eventStream.getEvents();\n this.logger.info('onAfterToolCall events length:', events.length);\n\n const event = eventStream.createEvent('environment_input', {\n description: 'Browser Screenshot',\n content,\n metadata: {\n type: 'screenshot',\n url: output?.url,\n },\n });\n eventStream.sendEvent(event);\n return result;\n }\n}\n"],"names":["defaultLogger","ConsoleLogger","LogLevel","GUIAgent","BaseGUIAgent","Tool","GUI_ADAPTED_TOOL_NAME","input","createGUIErrorResponse","normalizeActionCoords","JSON","result","convertToAgentUIAction","undefined","id","payload","safeStringify","obj","max","s","req","messages","Array","model","hasImages","cnt","m","content","part","summary","firstMsg","e","sessionId","toolCall","args","sleep","output","base64Tool","Base64ImageParser","base64Uri","imageWidth","imageHeight","eventStream","events","event","config","operator","systemPrompt","customeActionParser","normalizeCoordinates","detailCalculator","maxLoopCount","loopIntervalInMs","finalSystemPrompt","SYSTEM_PROMPT","p","isSystemPromptTemplate","assembleSystemPrompt","AdaptedToolCallEngine","GUIAgentToolCallEngine","defaultNormalizeCoords","defaultDetailCalculator"],"mappings":";;;;;;;;;;;;;AAGC;;;;;;;;;;AA2BD,MAAMA,gBAAgB,IAAIC,cAAc,cAAcC,SAAS,KAAK;AAE7D,MAAMC,iBAAqCC;IAuDhD,MAAM,aAAa;QAEjB,IAAI,CAAC,YAAY,CACf,IAAIC,KAAK;YACP,IAAIC;YACJ,aAAa;YACb,YAAY,CAAC;YACb,UAAU,OAAOC;gBACf,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAGD,sBAAsB,OAAO,CAAC,EAAEC;gBACnD,IAAI,CAAC,IAAI,CAAC,QAAQ,EAChB,OAAOC,uBAAuBD,MAAM,MAAM,EAAE;gBAE9C,IAAIA,MAAM,YAAY,EACpB,OAAOC,uBAAuBD,MAAM,MAAM,EAAEA,MAAM,YAAY;gBAGhE,IAAIA,MAAM,eAAe,EACvBA,MAAM,eAAe,GAAGE,sBACtBF,MAAM,eAAe,EACrB,IAAI,CAAC,oBAAoB;gBAG7B,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,sBAAsBG,KAAK,SAAS,CAACH,MAAM,eAAe;gBAC3E,MAAMI,SAAS,MAAM,IAAI,CAAC,QAAQ,CAAE,SAAS,CAAC;oBAC5C,SAAS;wBAACJ,MAAM,eAAe;qBAAC;gBAClC;gBACA,IAAII,OAAO,YAAY,EACrB,OAAOH,uBAAuBD,MAAM,MAAM,EAAEI,OAAO,YAAY;gBAGjE,OAAO;oBACL,SAAS;oBACT,QAAQJ,MAAM,MAAM;oBACpB,kBAAkBK,uBAAuBL,MAAM,eAAe;oBAC9D,aAAaM;gBACf;YACF;QACF;QAEF,KAAK,CAAC;IACR;IAEA,MAAM,aAAaC,EAAU,EAAEC,OAA8B,EAAiB;QAC5E,IAAI;YACF,MAAMC,gBAAgB,CAACC,KAAcC,MAAM,GAAG;gBAC5C,IAAI;oBACF,MAAMC,IAAIT,KAAK,SAAS,CAACO;oBACzB,OAAOE,EAAE,MAAM,GAAGD,MAAMC,EAAE,KAAK,CAAC,GAAGD,OAAO,sBAAiBC;gBAC7D,EAAE,OAAM;oBACN,OAAO;gBACT;YACF;YAEA,MAAMC,MAAML,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO;YAC5B,MAAMM,WAAWD,OAAOE,MAAM,OAAO,CAACF,IAAI,QAAQ,IAAIA,IAAI,QAAQ,GAAG,EAAE;YACvE,MAAMG,QAAQH,OAAO,AAAqB,YAArB,OAAOA,IAAI,KAAK,GAAgBA,IAAI,KAAK,GAAG;YAEjE,MAAMI,YAAa,AAAC;gBAClB,IAAI;oBACF,IAAIC,MAAM;oBACV,KAAK,MAAMC,KAAKL,SAA0C;wBACxD,MAAMM,UAAUD,QAAAA,IAAAA,KAAAA,IAAAA,EAAG,OAAO;wBAC1B,IAAIJ,MAAM,OAAO,CAACK,UAChB;4BAAA,KAAK,MAAMC,QAAQD,QACjB,IAAIC,AAAAA,CAAAA,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,IAAI,AAAD,MAAM,aAAaH;wBAClC;oBAEJ;oBACA,OAAOA;gBACT,EAAE,OAAM;oBACN;gBACF;YACF;YAEA,MAAMI,UAAU;gBACdf;gBACAS;gBACA,eAAeF,SAAS,MAAM;gBAC9BG;YACF;YAEA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,oCAAoCR,cAAca;YAEnE,MAAMC,WAAWT,QAAQ,CAAC,EAAE;YAC5B,IAAIS,UACF,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,0CAA0Cd,cAAcc,UAAU;QAExF,EAAE,OAAOC,GAAG;YACV,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,2CAA2CA;QAC/D;IACF;IAEA,MAAM,qBAAqBC,SAAiB,EAAE;QAC5C,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,wBAAwBA;IAC3C;IAEA,MAAM,eAAelB,EAAU,EAAiB,CAEhD;IAEA,MAAM,iBACJA,EAAU,EACVmB,QAA8C,EAC9CC,IAAa,EACb;QACA,OAAOA;IACT;IAEA,MAAM,gBACJpB,EAAU,EACVmB,QAA8C,EAC9CtB,MAAe,EACG;QAClB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,4BAA4BD,KAAK,SAAS,CAACuB;QAE5D,IAAIA,SAAS,IAAI,KAAK3B,uBAAuB,YAC3C,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC;QAInB,MAAM6B,MAAM,IAAI,CAAC,gBAAgB;QAEjC,MAAMC,SAAS,MAAM,IAAI,CAAC,QAAQ,CAAE,YAAY;QAChD,IAAI,CAACA,QAAQ,YACX,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC;QAIpB,MAAMC,aAAa,IAAIC,kBAAkBF,OAAO,MAAM;QACtD,MAAMG,YAAYF,WAAW,UAAU;QACvC,IAAI,CAACE,WAAW,YACd,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC;QAIpB,MAAM,EAAE,OAAOC,UAAU,EAAE,QAAQC,WAAW,EAAE,GAAGJ,WAAW,aAAa,MAAM;YAC/E,OAAO;YACP,QAAQ;QACV;QAEA,MAAMV,UAAuC;YAC3C;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKY;oBACL,QAAQ,IAAI,CAAC,gBAAgB,CAACC,YAAYC;gBAC5C;YACF;SACD;QAED,IAAIL,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,GAAG,EACbT,QAAQ,IAAI,CAAC;YACX,MAAM;YACN,MAAM,CAAC,wBAAwB,EAAES,OAAO,GAAG,EAAE;QAC/C;QAGF,MAAMM,cAAc,IAAI,CAAC,cAAc;QACvC,MAAMC,SAASD,YAAY,SAAS;QACpC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,kCAAkCC,OAAO,MAAM;QAEhE,MAAMC,QAAQF,YAAY,WAAW,CAAC,qBAAqB;YACzD,aAAa;YACbf;YACA,UAAU;gBACR,MAAM;gBACN,KAAKS,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,GAAG;YAClB;QACF;QACAM,YAAY,SAAS,CAACE;QACtB,OAAOjC;IACT;IA1NA,YAAYkC,MAAyB,CAAE;QACrC,MAAM,EACJC,QAAQ,EACRvB,KAAK,EACLwB,YAAY,EACZC,mBAAmB,EACnBC,oBAAoB,EACpBC,gBAAgB,EAChBC,YAAY,EACZC,mBAAmB,GAAG,EACvB,GAAGP;QACJ,IAAIQ,oBAAoBC;QACxB,IAAI,AAAwB,YAAxB,OAAOP,cACTM,oBAAoBN;aACf,IAAIzB,MAAM,OAAO,CAACyB,eACvBM,oBAAoBN,aACjB,GAAG,CAAC,CAACQ,IAAO,AAAa,YAAb,OAAOA,IAAiBA,IAAIA,EAAE,OAAO,EACjD,IAAI,CAAC;aACH,IAAIR,gBAAgBS,uBAAuBT,eAChDM,oBAAoBI,qBAAqBV,cAAcD,SAAS,mBAAmB;QAErF9C,cAAc,KAAK,CAAC,8BAA8BqD;QAGlD,MAAMK,wBAAwB,cAAcC;YAC1C,aAAc;gBACZ,KAAK,CAACX;YACR;QACF;QAEA,KAAK,CAAC;YACJ,MAAM7C,SAAS,KAAK;YACpB,cAAckD;YACd,OAAO,EAAE;YACT,gBAAgBK;YAChB,OAAOnC;YACP,GAAI4B,gBAAgB;gBAAE,eAAeA;YAAa,CAAC;YACnD,UAAUjD,SAAS,KAAK;QAC1B,IA3CF,uBAAQ,YAAR,SACA,uBAAQ,wBAAR,SACA,uBAAQ,oBAAR,SACA,uBAAQ,oBAAR;QAyCE,IAAI,CAAC,QAAQ,GAAG4C;QAChB,IAAI,CAAC,oBAAoB,GAAGG,wBAAwBW;QAEpD,IAAI,CAAC,gBAAgB,GAAGV,oBAAoBW;QAC5C,IAAI,CAAC,gBAAgB,GAAGT;QACxB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC;IAClC;AA8KF;AAlOE,iBADWjD,UACJ,SAAQ"}
@@ -0,0 +1,61 @@
1
+ import { ToolCallEngine, Tool, ToolCallEnginePrepareRequestContext, ChatCompletionCreateParams, ChatCompletionAssistantMessageParam, ChatCompletionChunk, MultimodalToolCallResult, AgentEventStream, ChatCompletionMessageParam, ParsedModelResponse, StreamProcessingState, StreamChunkResult } from '@tarko/agent-interface';
2
+ import { CustomActionParser } from '@ui-tars-test/shared/types';
3
+ /**
4
+ * GUIAgentToolCallEngine - Minimal prompt engineering tool call engine
5
+ *
6
+ * This is the simplest possible implementation of a tool call engine that:
7
+ * 1. Uses prompt engineering to instruct the LLM to output tool calls in a specific format
8
+ * 2. Parses tool calls from LLM response text using simple regex matching
9
+ * 3. Does not support streaming (focuses on core functionality only)
10
+ *
11
+ * Format used: <tool_call>{"name": "tool_name", "arguments": {...}}</tool_call>
12
+ */
13
+ export declare class GUIAgentToolCallEngine extends ToolCallEngine {
14
+ private customActionParser?;
15
+ constructor(customActionParser?: CustomActionParser);
16
+ /**
17
+ * Prepare system prompt with tool information and instructions
18
+ */
19
+ preparePrompt(instructions: string, tools: Tool[]): string;
20
+ /**
21
+ * Prepare request parameters for the LLM
22
+ *
23
+ * FIXME: move to base tool call engine.
24
+ */
25
+ prepareRequest(context: ToolCallEnginePrepareRequestContext): ChatCompletionCreateParams;
26
+ /**
27
+ * Initialize processing state (minimal implementation)
28
+ *
29
+ * FIXME: move to base tool call engine.
30
+ */
31
+ initStreamProcessingState(): StreamProcessingState;
32
+ /**
33
+ * Process streaming chunks - simply accumulate content
34
+ *
35
+ * FIXME: make it optional
36
+ */
37
+ processStreamingChunk(chunk: ChatCompletionChunk, state: StreamProcessingState): StreamChunkResult;
38
+ /**
39
+ * Generate a tool call ID
40
+ */
41
+ private generateToolCallId;
42
+ /**
43
+ * Extract tool calls from complete response text
44
+ */
45
+ finalizeStreamProcessing(state: StreamProcessingState): ParsedModelResponse;
46
+ /**
47
+ * Build assistant message for conversation history
48
+ * For PE engines, we preserve the raw content including tool call markup
49
+ *
50
+ * FIXME: move to base tool call engine.
51
+ */
52
+ buildHistoricalAssistantMessage(currentLoopAssistantEvent: AgentEventStream.AssistantMessageEvent): ChatCompletionAssistantMessageParam;
53
+ /**
54
+ * Build tool result messages as user messages
55
+ * PE engines format tool results as user input for next iteration
56
+ *
57
+ * FIXME: move to base tool call engine.
58
+ */
59
+ buildHistoricalToolCallResultMessages(toolCallResults: MultimodalToolCallResult[]): ChatCompletionMessageParam[];
60
+ }
61
+ //# sourceMappingURL=ToolCallEngine.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ToolCallEngine.d.ts","sourceRoot":"","sources":["../src/ToolCallEngine.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,IAAI,EACJ,mCAAmC,EACnC,0BAA0B,EAC1B,mCAAmC,EACnC,mBAAmB,EACnB,wBAAwB,EACxB,gBAAgB,EAChB,0BAA0B,EAE1B,mBAAmB,EACnB,qBAAqB,EACrB,iBAAiB,EAClB,MAAM,wBAAwB,CAAC;AAKhC,OAAO,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAC;AAKhE;;;;;;;;;GASG;AACH,qBAAa,sBAAuB,SAAQ,cAAc;IACxD,OAAO,CAAC,kBAAkB,CAAC,CAAqB;gBAEpC,kBAAkB,CAAC,EAAE,kBAAkB;IAKnD;;OAEG;IACH,aAAa,CAAC,YAAY,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM;IAI1D;;;;OAIG;IACH,cAAc,CAAC,OAAO,EAAE,mCAAmC,GAAG,0BAA0B;IAaxF;;;;OAIG;IACH,yBAAyB,IAAI,qBAAqB;IASlD;;;;OAIG;IACH,qBAAqB,CACnB,KAAK,EAAE,mBAAmB,EAC1B,KAAK,EAAE,qBAAqB,GAC3B,iBAAiB;IAsBpB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAI1B;;OAEG;IACH,wBAAwB,CAAC,KAAK,EAAE,qBAAqB,GAAG,mBAAmB;IAiG3E;;;;;OAKG;IACH,+BAA+B,CAC7B,yBAAyB,EAAE,gBAAgB,CAAC,qBAAqB,GAChE,mCAAmC;IAOtC;;;;;OAKG;IACH,qCAAqC,CACnC,eAAe,EAAE,wBAAwB,EAAE,GAC1C,0BAA0B,EAAE;CAchC"}