@ui-tars-test/shared 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/dist/base/agent.d.ts +9 -0
  2. package/dist/base/agent.d.ts.map +1 -0
  3. package/dist/base/agent.js +54 -0
  4. package/dist/base/agent.js.map +1 -0
  5. package/dist/base/agent.mjs +10 -0
  6. package/dist/base/agent.mjs.map +1 -0
  7. package/dist/base/index.d.ts +4 -0
  8. package/dist/base/index.d.ts.map +1 -0
  9. package/dist/base/index.js +84 -0
  10. package/dist/base/index.js.map +1 -0
  11. package/dist/base/index.mjs +7 -0
  12. package/dist/base/operator.d.ts +140 -0
  13. package/dist/base/operator.d.ts.map +1 -0
  14. package/dist/base/operator.js +112 -0
  15. package/dist/base/operator.js.map +1 -0
  16. package/dist/base/operator.mjs +75 -0
  17. package/dist/base/operator.mjs.map +1 -0
  18. package/dist/base/parser.d.ts +11 -0
  19. package/dist/base/parser.d.ts.map +1 -0
  20. package/dist/base/parser.js +43 -0
  21. package/dist/base/parser.js.map +1 -0
  22. package/dist/base/parser.mjs +9 -0
  23. package/dist/base/parser.mjs.map +1 -0
  24. package/dist/types/actions.d.ts +224 -0
  25. package/dist/types/actions.d.ts.map +1 -0
  26. package/dist/types/actions.js +155 -0
  27. package/dist/types/actions.js.map +1 -0
  28. package/dist/types/actions.mjs +115 -0
  29. package/dist/types/actions.mjs.map +1 -0
  30. package/dist/types/agents.d.ts +108 -0
  31. package/dist/types/agents.d.ts.map +1 -0
  32. package/dist/types/agents.js +42 -0
  33. package/dist/types/agents.js.map +1 -0
  34. package/dist/types/agents.mjs +8 -0
  35. package/dist/types/agents.mjs.map +1 -0
  36. package/dist/types/archived.d.ts +44 -0
  37. package/dist/types/archived.d.ts.map +1 -0
  38. package/dist/types/archived.js +86 -0
  39. package/dist/types/archived.js.map +1 -0
  40. package/dist/types/archived.mjs +46 -0
  41. package/dist/types/archived.mjs.map +1 -0
  42. package/dist/types/index.d.ts +4 -0
  43. package/dist/types/index.d.ts.map +1 -0
  44. package/dist/types/index.js +84 -0
  45. package/dist/types/index.js.map +1 -0
  46. package/dist/types/index.mjs +7 -0
  47. package/dist/utils/actions.d.ts +15 -0
  48. package/dist/utils/actions.d.ts.map +1 -0
  49. package/dist/utils/actions.js +196 -0
  50. package/dist/utils/actions.js.map +1 -0
  51. package/dist/utils/actions.mjs +156 -0
  52. package/dist/utils/actions.mjs.map +1 -0
  53. package/dist/utils/coordinateNormalizer.d.ts +10 -0
  54. package/dist/utils/coordinateNormalizer.d.ts.map +1 -0
  55. package/dist/utils/coordinateNormalizer.js +59 -0
  56. package/dist/utils/coordinateNormalizer.js.map +1 -0
  57. package/dist/utils/coordinateNormalizer.mjs +25 -0
  58. package/dist/utils/coordinateNormalizer.mjs.map +1 -0
  59. package/dist/utils/index.d.ts +5 -0
  60. package/dist/utils/index.d.ts.map +1 -0
  61. package/dist/utils/index.js +93 -0
  62. package/dist/utils/index.js.map +1 -0
  63. package/dist/utils/index.mjs +8 -0
  64. package/dist/utils/sleep.d.ts +14 -0
  65. package/dist/utils/sleep.d.ts.map +1 -0
  66. package/dist/utils/sleep.js +45 -0
  67. package/dist/utils/sleep.js.map +1 -0
  68. package/dist/utils/sleep.mjs +11 -0
  69. package/dist/utils/sleep.mjs.map +1 -0
  70. package/dist/utils/systemPromptProcessor.d.ts +16 -0
  71. package/dist/utils/systemPromptProcessor.d.ts.map +1 -0
  72. package/dist/utils/systemPromptProcessor.js +61 -0
  73. package/dist/utils/systemPromptProcessor.js.map +1 -0
  74. package/dist/utils/systemPromptProcessor.mjs +24 -0
  75. package/dist/utils/systemPromptProcessor.mjs.map +1 -0
  76. package/package.json +66 -0
  77. package/src/base/agent.ts +13 -0
  78. package/src/base/index.ts +7 -0
  79. package/src/base/operator.ts +221 -0
  80. package/src/base/parser.ts +16 -0
  81. package/src/types/actions.ts +382 -0
  82. package/src/types/agents.ts +128 -0
  83. package/src/types/archived.ts +55 -0
  84. package/src/types/index.ts +8 -0
  85. package/src/utils/actions.ts +244 -0
  86. package/src/utils/coordinateNormalizer.ts +49 -0
  87. package/src/utils/index.ts +9 -0
  88. package/src/utils/sleep.ts +21 -0
  89. package/src/utils/systemPromptProcessor.ts +48 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base/parser.mjs","sources":["webpack://@ui-tars-test/shared/./src/base/parser.ts"],"sourcesContent":["/*\n * Copyright (c) 2025 Bytedance, Inc. and its affiliates.\n * SPDX-License-Identifier: Apache-2.0\n */\n\nimport { ParsedGUIResponse } from '../types';\n\nexport abstract class BaseActionParser {\n /**\n * Parse model output\n * @param input Model output string\n * @returns Parsed ParsedGUIResponse object, returns null if parsing fails\n * There is no need to throw error, the error message is returned in ParsedGUIResponse\n */\n abstract parsePrediction(input: string): ParsedGUIResponse | null;\n}\n"],"names":["BaseActionParser"],"mappings":";;;;AAOO,MAAeA;AAQtB"}
@@ -0,0 +1,224 @@
1
+ export type Factors = [number, number];
2
+ /**
3
+ * Coordinate data structure
4
+ * - Supports pixel coordinates (raw)
5
+ * - Supports normalized coordinates (normalized)
6
+ */
7
+ export interface Coordinates {
8
+ raw?: {
9
+ x: number;
10
+ y: number;
11
+ };
12
+ normalized?: {
13
+ x: number;
14
+ y: number;
15
+ };
16
+ referenceBox?: {
17
+ x1: number;
18
+ y1: number;
19
+ x2: number;
20
+ y2: number;
21
+ };
22
+ referenceSystem?: 'screen' | 'window' | 'browserPage' | string;
23
+ }
24
+ /**
25
+ * Standard structure for GUI Actions
26
+ */
27
+ export interface BaseAction<T extends string = string, I extends Record<string, any> = Record<string, any>> {
28
+ type: T;
29
+ inputs: I;
30
+ meta?: {
31
+ toolHint?: string;
32
+ comment?: string;
33
+ };
34
+ }
35
+ /**
36
+ * ScreenShot action
37
+ */
38
+ export type ScreenShotAction = BaseAction<'screenshot', {
39
+ start?: Coordinates;
40
+ end?: Coordinates;
41
+ }>;
42
+ /**
43
+ * Click action with coordinates
44
+ */
45
+ export type ClickAction = BaseAction<'click', {
46
+ point: Coordinates;
47
+ }>;
48
+ /**
49
+ * Right click action with coordinates
50
+ */
51
+ export type RightClickAction = BaseAction<'right_click', {
52
+ point: Coordinates;
53
+ }>;
54
+ /**
55
+ * Double click action with coordinates
56
+ */
57
+ export type DoubleClickAction = BaseAction<'double_click', {
58
+ point: Coordinates;
59
+ }>;
60
+ /**
61
+ * Middle click action with coordinates
62
+ */
63
+ export type MiddleClickAction = BaseAction<'middle_click', {
64
+ point: Coordinates;
65
+ }>;
66
+ /**
67
+ * Mouse down action
68
+ */
69
+ export type MouseDownAction = BaseAction<'mouse_down', {
70
+ point?: Coordinates;
71
+ button?: 'left' | 'right';
72
+ }>;
73
+ /**
74
+ * Mouse up action
75
+ */
76
+ export type MouseUpAction = BaseAction<'mouse_up', {
77
+ point?: Coordinates;
78
+ button?: 'left' | 'right';
79
+ }>;
80
+ /**
81
+ * Mouse move action
82
+ */
83
+ export type MouseMoveAction = BaseAction<'mouse_move', // 'move' | 'move_to' | 'hover',
84
+ {
85
+ point: Coordinates;
86
+ }>;
87
+ /**
88
+ * Drag action with start and end coordinates
89
+ */
90
+ export type DragAction = BaseAction<'drag', // 'left_click_drag' | 'select',
91
+ {
92
+ start: Coordinates;
93
+ end: Coordinates;
94
+ direction?: 'up' | 'down' | 'left' | 'right';
95
+ }>;
96
+ /**
97
+ * Scroll action with coordinates and direction
98
+ */
99
+ export type ScrollAction = BaseAction<'scroll', {
100
+ point?: Coordinates;
101
+ direction: 'up' | 'down' | 'left' | 'right';
102
+ }>;
103
+ /**
104
+ * Type action with text content
105
+ */
106
+ export type TypeAction = BaseAction<'type', {
107
+ content: string;
108
+ }>;
109
+ /**
110
+ * Hotkey action with key combination
111
+ */
112
+ export type HotkeyAction = BaseAction<'hotkey', {
113
+ key: string;
114
+ }>;
115
+ /**
116
+ * Press key action
117
+ */
118
+ export type PressAction = BaseAction<'press', {
119
+ key: string;
120
+ }>;
121
+ /**
122
+ * Release key action
123
+ */
124
+ export type ReleaseAction = BaseAction<'release', {
125
+ key: string;
126
+ }>;
127
+ /**
128
+ * Navigate action with URL
129
+ */
130
+ export type NavigateAction = BaseAction<'navigate', {
131
+ url: string;
132
+ }>;
133
+ /**
134
+ * Navigate back action
135
+ */
136
+ export type NavigateBackAction = BaseAction<'navigate_back', Record<string, never>>;
137
+ /**
138
+ * Long press action with coordinates
139
+ */
140
+ export type LongPressAction = BaseAction<'long_press', {
141
+ point: Coordinates;
142
+ }>;
143
+ export type SwipeAction = BaseAction<'swipe', // 'drag',
144
+ {
145
+ start: Coordinates;
146
+ end: Coordinates;
147
+ direction: 'up' | 'down' | 'left' | 'right';
148
+ }>;
149
+ /**
150
+ * Home action
151
+ */
152
+ export type HomeAction = BaseAction<'home' | 'press_home', Record<string, never>>;
153
+ /**
154
+ * Back action
155
+ */
156
+ export type BackAction = BaseAction<'back' | 'press_back', Record<string, never>>;
157
+ /**
158
+ * Open app action
159
+ */
160
+ export type OpenAppAction = BaseAction<'open_app', {
161
+ name: string;
162
+ }>;
163
+ /**
164
+ * Wait action with no inputs
165
+ */
166
+ export type WaitAction = BaseAction<'wait', {
167
+ time?: number;
168
+ }>;
169
+ /**
170
+ * Finished - Complete the current operation.
171
+ */
172
+ export type FinishAction = BaseAction<'finished', {
173
+ content?: string;
174
+ }>;
175
+ /**
176
+ * CallUser - Request user interaction.
177
+ */
178
+ export type CallUserAction = BaseAction<'call_user', {
179
+ content?: string;
180
+ }>;
181
+ /**
182
+ * Operational action types (excluding screenshot which has special handling)
183
+ */
184
+ export type OperationalGUIAction = ClickAction | DoubleClickAction | RightClickAction | MiddleClickAction | MouseDownAction | MouseUpAction | MouseMoveAction | DragAction | ScrollAction | TypeAction | HotkeyAction | PressAction | ReleaseAction | NavigateAction | NavigateBackAction | LongPressAction | SwipeAction | HomeAction | BackAction | OpenAppAction | WaitAction | FinishAction | CallUserAction;
185
+ /**
186
+ * Complete GUI action types including screenshot
187
+ */
188
+ export type GUIAction = ScreenShotAction | OperationalGUIAction;
189
+ /**
190
+ * Extract action type from action interface
191
+ */
192
+ export type ExtractActionType<T> = T extends BaseAction<infer U, any> ? U : never;
193
+ /**
194
+ * Supported operational action types (excluding screenshot)
195
+ */
196
+ export type SupportedActionType = ExtractActionType<OperationalGUIAction>;
197
+ /**
198
+ * All action types including screenshot
199
+ */
200
+ export type AllActionType = ExtractActionType<GUIAction>;
201
+ /**
202
+ * Action metadata for documentation and serialization
203
+ */
204
+ export interface ActionMetadata {
205
+ description: string;
206
+ category: 'mouse' | 'keyboard' | 'navigation' | 'mobile' | 'system' | 'wait';
207
+ }
208
+ /**
209
+ * Comprehensive action metadata registry
210
+ */
211
+ export declare const ACTION_METADATA: Record<SupportedActionType, ActionMetadata>;
212
+ /**
213
+ * Type guard function to check if a string is a valid operational action type
214
+ * @param type - The string to check
215
+ * @returns Whether the string is a valid SupportedActionType
216
+ */
217
+ export declare function isSupportedActionType(type: string): type is SupportedActionType;
218
+ /**
219
+ * Type guard function to check if a string is any valid action type (including screenshot)
220
+ * @param type - The string to check
221
+ * @returns Whether the string is a valid AllActionType
222
+ */
223
+ export declare function isValidActionType(type: string): type is AllActionType;
224
+ //# sourceMappingURL=actions.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../../src/types/actions.ts"],"names":[],"mappings":"AAOA,MAAM,MAAM,OAAO,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;AAEvC;;;;GAIG;AACH,MAAM,WAAW,WAAW;IAC1B,GAAG,CAAC,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAC/B,UAAU,CAAC,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACtC,YAAY,CAAC,EAAE;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,EAAE,EAAE,MAAM,CAAA;KAAE,CAAC;IAClE,eAAe,CAAC,EAAE,QAAQ,GAAG,QAAQ,GAAG,aAAa,GAAG,MAAM,CAAC;CAChE;AAED;;GAEG;AACH,MAAM,WAAW,UAAU,CACzB,CAAC,SAAS,MAAM,GAAG,MAAM,EACzB,CAAC,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAEnD,IAAI,EAAE,CAAC,CAAC;IACR,MAAM,EAAE,CAAC,CAAC;IACV,IAAI,CAAC,EAAE;QACL,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;CACH;AAID;;GAEG;AACH,MAAM,MAAM,gBAAgB,GAAG,UAAU,CACvC,YAAY,EACZ;IACE,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,WAAW,CAAC;CACnB,CACF,CAAC;AAIF;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,UAAU,CAClC,OAAO,EACP;IACE,KAAK,EAAE,WAAW,CAAC;CACpB,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,gBAAgB,GAAG,UAAU,CACvC,aAAa,EACb;IACE,KAAK,EAAE,WAAW,CAAC;CACpB,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,iBAAiB,GAAG,UAAU,CACxC,cAAc,EACd;IACE,KAAK,EAAE,WAAW,CAAC;CACpB,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,iBAAiB,GAAG,UAAU,CACxC,cAAc,EACd;IACE,KAAK,EAAE,WAAW,CAAC;CACpB,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,eAAe,GAAG,UAAU,CACtC,YAAY,EACZ;IACE,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;CAC3B,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG,UAAU,CACpC,UAAU,EACV;IACE,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;CAC3B,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,eAAe,GAAG,UAAU,CACtC,YAAY,EAAE,gCAAgC;AAC9C;IACE,KAAK,EAAE,WAAW,CAAC;CACpB,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,UAAU,GAAG,UAAU,CACjC,MAAM,EAAE,gCAAgC;AACxC;IACE,KAAK,EAAE,WAAW,CAAC;IACnB,GAAG,EAAE,WAAW,CAAC;IACjB,SAAS,CAAC,EAAE,IAAI,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;CAC9C,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,UAAU,CACnC,QAAQ,EACR;IACE,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,EAAE,IAAI,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;CAC7C,CACF,CAAC;AAIF;;GAEG;AACH,MAAM,MAAM,UAAU,GAAG,UAAU,CACjC,MAAM,EACN;IACE,OAAO,EAAE,MAAM,CAAC;CACjB,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,UAAU,CACnC,QAAQ,EACR;IACE,GAAG,EAAE,MAAM,CAAC;CACb,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,UAAU,CAClC,OAAO,EACP;IACE,GAAG,EAAE,MAAM,CAAC;CACb,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG,UAAU,CACpC,SAAS,EACT;IACE,GAAG,EAAE,MAAM,CAAC;CACb,CACF,CAAC;AAIF;;GAEG;AACH,MAAM,MAAM,cAAc,GAAG,UAAU,CACrC,UAAU,EACV;IACE,GAAG,EAAE,MAAM,CAAC;CACb,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,kBAAkB,GAAG,UAAU,CAAC,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC;AAIpF;;GAEG;AACH,MAAM,MAAM,eAAe,GAAG,UAAU,CACtC,YAAY,EACZ;IACE,KAAK,EAAE,WAAW,CAAC;CACpB,CACF,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG,UAAU,CAClC,OAAO,EAAE,UAAU;AACnB;IACE,KAAK,EAAE,WAAW,CAAC;IACnB,GAAG,EAAE,WAAW,CAAC;IACjB,SAAS,EAAE,IAAI,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;CAC7C,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,GAAG,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC;AAElF;;GAEG;AACH,MAAM,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,GAAG,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC;AAElF;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG,UAAU,CACpC,UAAU,EACV;IACE,IAAI,EAAE,MAAM,CAAC;CACd,CACF,CAAC;AAIF;;GAEG;AACH,MAAM,MAAM,UAAU,GAAG,UAAU,CACjC,MAAM,EACN;IACE,IAAI,CAAC,EAAE,MAAM,CAAC;CACf,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,UAAU,CACnC,UAAU,EACV;IACE,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,cAAc,GAAG,UAAU,CACrC,WAAW,EACX;IACE,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB,CACF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,oBAAoB,GAC5B,WAAW,GACX,iBAAiB,GACjB,gBAAgB,GAChB,iBAAiB,GACjB,eAAe,GACf,aAAa,GACb,eAAe,GACf,UAAU,GACV,YAAY,GACZ,UAAU,GACV,YAAY,GACZ,WAAW,GACX,aAAa,GACb,cAAc,GACd,kBAAkB,GAClB,eAAe,GACf,WAAW,GACX,UAAU,GACV,UAAU,GACV,aAAa,GACb,UAAU,GACV,YAAY,GACZ,cAAc,CAAC;AAEnB;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,gBAAgB,GAAG,oBAAoB,CAAC;AAEhE;;GAEG;AACH,MAAM,MAAM,iBAAiB,CAAC,CAAC,IAAI,CAAC,SAAS,UAAU,CAAC,MAAM,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;AAElF;;GAEG;AACH,MAAM,MAAM,mBAAmB,GAAG,iBAAiB,CAAC,oBAAoB,CAAC,CAAC;AAE1E;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG,iBAAiB,CAAC,SAAS,CAAC,CAAC;AAEzD;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,OAAO,GAAG,UAAU,GAAG,YAAY,GAAG,QAAQ,GAAG,QAAQ,GAAG,MAAM,CAAC;CAC9E;AAED;;GAEG;AACH,eAAO,MAAM,eAAe,EAAE,MAAM,CAAC,mBAAmB,EAAE,cAAc,CA0B9D,CAAC;AAEX;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,IAAI,mBAAmB,CAE/E;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,IAAI,aAAa,CAErE"}
@@ -0,0 +1,155 @@
1
+ /**
2
+ * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ */
5
+ "use strict";
6
+ var __webpack_require__ = {};
7
+ (()=>{
8
+ __webpack_require__.d = (exports1, definition)=>{
9
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
10
+ enumerable: true,
11
+ get: definition[key]
12
+ });
13
+ };
14
+ })();
15
+ (()=>{
16
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
17
+ })();
18
+ (()=>{
19
+ __webpack_require__.r = (exports1)=>{
20
+ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
21
+ value: 'Module'
22
+ });
23
+ Object.defineProperty(exports1, '__esModule', {
24
+ value: true
25
+ });
26
+ };
27
+ })();
28
+ var __webpack_exports__ = {};
29
+ __webpack_require__.r(__webpack_exports__);
30
+ __webpack_require__.d(__webpack_exports__, {
31
+ ACTION_METADATA: ()=>ACTION_METADATA,
32
+ isSupportedActionType: ()=>isSupportedActionType,
33
+ isValidActionType: ()=>isValidActionType
34
+ });
35
+ const ACTION_METADATA = {
36
+ click: {
37
+ category: 'mouse',
38
+ description: 'Click on an element'
39
+ },
40
+ right_click: {
41
+ category: 'mouse',
42
+ description: 'Right click on an element'
43
+ },
44
+ double_click: {
45
+ category: 'mouse',
46
+ description: 'Double click on an element'
47
+ },
48
+ middle_click: {
49
+ category: 'mouse',
50
+ description: 'Middle click on an element'
51
+ },
52
+ mouse_down: {
53
+ category: 'mouse',
54
+ description: 'Press mouse button down'
55
+ },
56
+ mouse_up: {
57
+ category: 'mouse',
58
+ description: 'Release mouse button'
59
+ },
60
+ mouse_move: {
61
+ category: 'mouse',
62
+ description: 'Move mouse to position'
63
+ },
64
+ drag: {
65
+ category: 'mouse',
66
+ description: 'Drag from one position to another'
67
+ },
68
+ scroll: {
69
+ category: 'mouse',
70
+ description: 'Scroll in a direction'
71
+ },
72
+ type: {
73
+ category: 'keyboard',
74
+ description: 'Type text'
75
+ },
76
+ hotkey: {
77
+ category: 'keyboard',
78
+ description: 'Press hotkey combination'
79
+ },
80
+ press: {
81
+ category: 'keyboard',
82
+ description: 'Press a key'
83
+ },
84
+ release: {
85
+ category: 'keyboard',
86
+ description: 'Release a key'
87
+ },
88
+ navigate: {
89
+ category: 'navigation',
90
+ description: 'Navigate to URL'
91
+ },
92
+ navigate_back: {
93
+ category: 'navigation',
94
+ description: 'Navigate back'
95
+ },
96
+ long_press: {
97
+ category: 'mobile',
98
+ description: 'Long press on element'
99
+ },
100
+ swipe: {
101
+ category: 'mobile',
102
+ description: 'Swipe gesture'
103
+ },
104
+ home: {
105
+ category: 'mobile',
106
+ description: 'Go to home'
107
+ },
108
+ press_home: {
109
+ category: 'mobile',
110
+ description: 'Press home button'
111
+ },
112
+ back: {
113
+ category: 'mobile',
114
+ description: 'Go back'
115
+ },
116
+ press_back: {
117
+ category: 'mobile',
118
+ description: 'Press back button'
119
+ },
120
+ open_app: {
121
+ category: 'mobile',
122
+ description: 'Open application'
123
+ },
124
+ wait: {
125
+ category: 'wait',
126
+ description: 'Wait for specified time'
127
+ },
128
+ finished: {
129
+ category: 'system',
130
+ description: 'Mark task as finished'
131
+ },
132
+ call_user: {
133
+ category: 'system',
134
+ description: 'Request user interaction'
135
+ }
136
+ };
137
+ function isSupportedActionType(type) {
138
+ return type in ACTION_METADATA;
139
+ }
140
+ function isValidActionType(type) {
141
+ return 'screenshot' === type || isSupportedActionType(type);
142
+ }
143
+ exports.ACTION_METADATA = __webpack_exports__.ACTION_METADATA;
144
+ exports.isSupportedActionType = __webpack_exports__.isSupportedActionType;
145
+ exports.isValidActionType = __webpack_exports__.isValidActionType;
146
+ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
147
+ "ACTION_METADATA",
148
+ "isSupportedActionType",
149
+ "isValidActionType"
150
+ ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
151
+ Object.defineProperty(exports, '__esModule', {
152
+ value: true
153
+ });
154
+
155
+ //# sourceMappingURL=actions.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types/actions.js","sources":["webpack://@ui-tars-test/shared/webpack/runtime/define_property_getters","webpack://@ui-tars-test/shared/webpack/runtime/has_own_property","webpack://@ui-tars-test/shared/webpack/runtime/make_namespace_object","webpack://@ui-tars-test/shared/./src/types/actions.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","/*\n * Copyright (c) 2025 Bytedance, Inc. and its affiliates.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n/* eslint-disable @typescript-eslint/no-explicit-any */\n\nexport type Factors = [number, number];\n\n/**\n * Coordinate data structure\n * - Supports pixel coordinates (raw)\n * - Supports normalized coordinates (normalized)\n */\nexport interface Coordinates {\n raw?: { x: number; y: number }; // Raw pixels\n normalized?: { x: number; y: number }; // Normalized coordinates (0–1)\n referenceBox?: { x1: number; y1: number; x2: number; y2: number };\n referenceSystem?: 'screen' | 'window' | 'browserPage' | string; // Coordinate reference system\n}\n\n/**\n * Standard structure for GUI Actions\n */\nexport interface BaseAction<\n T extends string = string,\n I extends Record<string, any> = Record<string, any>,\n> {\n type: T; // Action type (e.g., \"click\", \"key\", \"swipe\")\n inputs: I; // Parameters required for the action\n meta?: {\n toolHint?: string; // Suggested execution tool (xdotool / adb / pyautogui etc.)\n comment?: string; // Notes / Debug information\n };\n}\n\n// ---------- ScreenShot Action ----------\n\n/**\n * ScreenShot action\n */\nexport type ScreenShotAction = BaseAction<\n 'screenshot',\n {\n start?: Coordinates;\n end?: Coordinates;\n }\n>;\n\n// ---------- Mouse Actions ----------\n\n/**\n * Click action with coordinates\n */\nexport type ClickAction = BaseAction<\n 'click',\n {\n point: Coordinates;\n }\n>;\n\n/**\n * Right click action with coordinates\n */\nexport type RightClickAction = BaseAction<\n 'right_click',\n {\n point: Coordinates;\n }\n>;\n\n/**\n * Double click action with coordinates\n */\nexport type DoubleClickAction = BaseAction<\n 'double_click',\n {\n point: Coordinates;\n }\n>;\n\n/**\n * Middle click action with coordinates\n */\nexport type MiddleClickAction = BaseAction<\n 'middle_click',\n {\n point: Coordinates;\n }\n>;\n\n/**\n * Mouse down action\n */\nexport type MouseDownAction = BaseAction<\n 'mouse_down',\n {\n point?: Coordinates; // Mouse down position. If not specified, default to execute on the current mouse position.\n button?: 'left' | 'right'; // Down button. Default to left.\n }\n>;\n\n/**\n * Mouse up action\n */\nexport type MouseUpAction = BaseAction<\n 'mouse_up',\n {\n point?: Coordinates; // Mouse up position. If not specified, default to execute on the current mouse position.\n button?: 'left' | 'right'; // Up button. Default to left.\n }\n>;\n\n/**\n * Mouse move action\n */\nexport type MouseMoveAction = BaseAction<\n 'mouse_move', // 'move' | 'move_to' | 'hover',\n {\n point: Coordinates; // Target coordinates\n }\n>;\n\n/**\n * Drag action with start and end coordinates\n */\nexport type DragAction = BaseAction<\n 'drag', // 'left_click_drag' | 'select',\n {\n start: Coordinates;\n end: Coordinates;\n direction?: 'up' | 'down' | 'left' | 'right';\n }\n>;\n\n/**\n * Scroll action with coordinates and direction\n */\nexport type ScrollAction = BaseAction<\n 'scroll',\n {\n point?: Coordinates;\n direction: 'up' | 'down' | 'left' | 'right';\n }\n>;\n\n// ---------- Keyboard Actions ----------\n\n/**\n * Type action with text content\n */\nexport type TypeAction = BaseAction<\n 'type',\n {\n content: string;\n }\n>;\n\n/**\n * Hotkey action with key combination\n */\nexport type HotkeyAction = BaseAction<\n 'hotkey',\n {\n key: string;\n }\n>;\n\n/**\n * Press key action\n */\nexport type PressAction = BaseAction<\n 'press',\n {\n key: string; // Key you want to press. Only one key can be pressed at one time.\n }\n>;\n\n/**\n * Release key action\n */\nexport type ReleaseAction = BaseAction<\n 'release',\n {\n key: string; // Key you want to release. Only one key can be released at one time.\n }\n>;\n\n// ---------- Browser Actions ----------\n\n/**\n * Navigate action with URL\n */\nexport type NavigateAction = BaseAction<\n 'navigate',\n {\n url: string;\n }\n>;\n\n/**\n * Navigate back action\n */\nexport type NavigateBackAction = BaseAction<'navigate_back', Record<string, never>>;\n\n// ---------- App Actions ----------\n\n/**\n * Long press action with coordinates\n */\nexport type LongPressAction = BaseAction<\n 'long_press',\n {\n point: Coordinates;\n }\n>;\n\nexport type SwipeAction = BaseAction<\n 'swipe', // 'drag',\n {\n start: Coordinates;\n end: Coordinates;\n direction: 'up' | 'down' | 'left' | 'right';\n }\n>;\n\n/**\n * Home action\n */\nexport type HomeAction = BaseAction<'home' | 'press_home', Record<string, never>>;\n\n/**\n * Back action\n */\nexport type BackAction = BaseAction<'back' | 'press_back', Record<string, never>>;\n\n/**\n * Open app action\n */\nexport type OpenAppAction = BaseAction<\n 'open_app',\n {\n name: string;\n }\n>;\n\n// ---------- Wait Actions ----------\n\n/**\n * Wait action with no inputs\n */\nexport type WaitAction = BaseAction<\n 'wait',\n {\n time?: number; // in seconds (optional)\n }\n>;\n\n/**\n * Finished - Complete the current operation.\n */\nexport type FinishAction = BaseAction<\n 'finished',\n {\n content?: string;\n }\n>;\n\n/**\n * CallUser - Request user interaction.\n */\nexport type CallUserAction = BaseAction<\n 'call_user',\n {\n content?: string;\n }\n>;\n\n/**\n * Operational action types (excluding screenshot which has special handling)\n */\nexport type OperationalGUIAction =\n | ClickAction\n | DoubleClickAction\n | RightClickAction\n | MiddleClickAction\n | MouseDownAction\n | MouseUpAction\n | MouseMoveAction\n | DragAction\n | ScrollAction\n | TypeAction\n | HotkeyAction\n | PressAction\n | ReleaseAction\n | NavigateAction\n | NavigateBackAction\n | LongPressAction\n | SwipeAction\n | HomeAction\n | BackAction\n | OpenAppAction\n | WaitAction\n | FinishAction\n | CallUserAction;\n\n/**\n * Complete GUI action types including screenshot\n */\nexport type GUIAction = ScreenShotAction | OperationalGUIAction;\n\n/**\n * Extract action type from action interface\n */\nexport type ExtractActionType<T> = T extends BaseAction<infer U, any> ? U : never;\n\n/**\n * Supported operational action types (excluding screenshot)\n */\nexport type SupportedActionType = ExtractActionType<OperationalGUIAction>;\n\n/**\n * All action types including screenshot\n */\nexport type AllActionType = ExtractActionType<GUIAction>;\n\n/**\n * Action metadata for documentation and serialization\n */\nexport interface ActionMetadata {\n description: string;\n category: 'mouse' | 'keyboard' | 'navigation' | 'mobile' | 'system' | 'wait';\n}\n\n/**\n * Comprehensive action metadata registry\n */\nexport const ACTION_METADATA: Record<SupportedActionType, ActionMetadata> = {\n click: { category: 'mouse', description: 'Click on an element' },\n right_click: { category: 'mouse', description: 'Right click on an element' },\n double_click: { category: 'mouse', description: 'Double click on an element' },\n middle_click: { category: 'mouse', description: 'Middle click on an element' },\n mouse_down: { category: 'mouse', description: 'Press mouse button down' },\n mouse_up: { category: 'mouse', description: 'Release mouse button' },\n mouse_move: { category: 'mouse', description: 'Move mouse to position' },\n drag: { category: 'mouse', description: 'Drag from one position to another' },\n scroll: { category: 'mouse', description: 'Scroll in a direction' },\n type: { category: 'keyboard', description: 'Type text' },\n hotkey: { category: 'keyboard', description: 'Press hotkey combination' },\n press: { category: 'keyboard', description: 'Press a key' },\n release: { category: 'keyboard', description: 'Release a key' },\n navigate: { category: 'navigation', description: 'Navigate to URL' },\n navigate_back: { category: 'navigation', description: 'Navigate back' },\n long_press: { category: 'mobile', description: 'Long press on element' },\n swipe: { category: 'mobile', description: 'Swipe gesture' },\n home: { category: 'mobile', description: 'Go to home' },\n press_home: { category: 'mobile', description: 'Press home button' },\n back: { category: 'mobile', description: 'Go back' },\n press_back: { category: 'mobile', description: 'Press back button' },\n open_app: { category: 'mobile', description: 'Open application' },\n wait: { category: 'wait', description: 'Wait for specified time' },\n finished: { category: 'system', description: 'Mark task as finished' },\n call_user: { category: 'system', description: 'Request user interaction' },\n} as const;\n\n/**\n * Type guard function to check if a string is a valid operational action type\n * @param type - The string to check\n * @returns Whether the string is a valid SupportedActionType\n */\nexport function isSupportedActionType(type: string): type is SupportedActionType {\n return type in ACTION_METADATA;\n}\n\n/**\n * Type guard function to check if a string is any valid action type (including screenshot)\n * @param type - The string to check\n * @returns Whether the string is a valid AllActionType\n */\nexport function isValidActionType(type: string): type is AllActionType {\n return type === 'screenshot' || isSupportedActionType(type);\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","ACTION_METADATA","isSupportedActionType","type","isValidActionType"],"mappings":";;;;;;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;AC2UO,MAAMI,kBAA+D;IAC1E,OAAO;QAAE,UAAU;QAAS,aAAa;IAAsB;IAC/D,aAAa;QAAE,UAAU;QAAS,aAAa;IAA4B;IAC3E,cAAc;QAAE,UAAU;QAAS,aAAa;IAA6B;IAC7E,cAAc;QAAE,UAAU;QAAS,aAAa;IAA6B;IAC7E,YAAY;QAAE,UAAU;QAAS,aAAa;IAA0B;IACxE,UAAU;QAAE,UAAU;QAAS,aAAa;IAAuB;IACnE,YAAY;QAAE,UAAU;QAAS,aAAa;IAAyB;IACvE,MAAM;QAAE,UAAU;QAAS,aAAa;IAAoC;IAC5E,QAAQ;QAAE,UAAU;QAAS,aAAa;IAAwB;IAClE,MAAM;QAAE,UAAU;QAAY,aAAa;IAAY;IACvD,QAAQ;QAAE,UAAU;QAAY,aAAa;IAA2B;IACxE,OAAO;QAAE,UAAU;QAAY,aAAa;IAAc;IAC1D,SAAS;QAAE,UAAU;QAAY,aAAa;IAAgB;IAC9D,UAAU;QAAE,UAAU;QAAc,aAAa;IAAkB;IACnE,eAAe;QAAE,UAAU;QAAc,aAAa;IAAgB;IACtE,YAAY;QAAE,UAAU;QAAU,aAAa;IAAwB;IACvE,OAAO;QAAE,UAAU;QAAU,aAAa;IAAgB;IAC1D,MAAM;QAAE,UAAU;QAAU,aAAa;IAAa;IACtD,YAAY;QAAE,UAAU;QAAU,aAAa;IAAoB;IACnE,MAAM;QAAE,UAAU;QAAU,aAAa;IAAU;IACnD,YAAY;QAAE,UAAU;QAAU,aAAa;IAAoB;IACnE,UAAU;QAAE,UAAU;QAAU,aAAa;IAAmB;IAChE,MAAM;QAAE,UAAU;QAAQ,aAAa;IAA0B;IACjE,UAAU;QAAE,UAAU;QAAU,aAAa;IAAwB;IACrE,WAAW;QAAE,UAAU;QAAU,aAAa;IAA2B;AAC3E;AAOO,SAASC,sBAAsBC,IAAY;IAChD,OAAOA,QAAQF;AACjB;AAOO,SAASG,kBAAkBD,IAAY;IAC5C,OAAOA,AAAS,iBAATA,QAAyBD,sBAAsBC;AACxD"}
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ */
5
+ const ACTION_METADATA = {
6
+ click: {
7
+ category: 'mouse',
8
+ description: 'Click on an element'
9
+ },
10
+ right_click: {
11
+ category: 'mouse',
12
+ description: 'Right click on an element'
13
+ },
14
+ double_click: {
15
+ category: 'mouse',
16
+ description: 'Double click on an element'
17
+ },
18
+ middle_click: {
19
+ category: 'mouse',
20
+ description: 'Middle click on an element'
21
+ },
22
+ mouse_down: {
23
+ category: 'mouse',
24
+ description: 'Press mouse button down'
25
+ },
26
+ mouse_up: {
27
+ category: 'mouse',
28
+ description: 'Release mouse button'
29
+ },
30
+ mouse_move: {
31
+ category: 'mouse',
32
+ description: 'Move mouse to position'
33
+ },
34
+ drag: {
35
+ category: 'mouse',
36
+ description: 'Drag from one position to another'
37
+ },
38
+ scroll: {
39
+ category: 'mouse',
40
+ description: 'Scroll in a direction'
41
+ },
42
+ type: {
43
+ category: 'keyboard',
44
+ description: 'Type text'
45
+ },
46
+ hotkey: {
47
+ category: 'keyboard',
48
+ description: 'Press hotkey combination'
49
+ },
50
+ press: {
51
+ category: 'keyboard',
52
+ description: 'Press a key'
53
+ },
54
+ release: {
55
+ category: 'keyboard',
56
+ description: 'Release a key'
57
+ },
58
+ navigate: {
59
+ category: 'navigation',
60
+ description: 'Navigate to URL'
61
+ },
62
+ navigate_back: {
63
+ category: 'navigation',
64
+ description: 'Navigate back'
65
+ },
66
+ long_press: {
67
+ category: 'mobile',
68
+ description: 'Long press on element'
69
+ },
70
+ swipe: {
71
+ category: 'mobile',
72
+ description: 'Swipe gesture'
73
+ },
74
+ home: {
75
+ category: 'mobile',
76
+ description: 'Go to home'
77
+ },
78
+ press_home: {
79
+ category: 'mobile',
80
+ description: 'Press home button'
81
+ },
82
+ back: {
83
+ category: 'mobile',
84
+ description: 'Go back'
85
+ },
86
+ press_back: {
87
+ category: 'mobile',
88
+ description: 'Press back button'
89
+ },
90
+ open_app: {
91
+ category: 'mobile',
92
+ description: 'Open application'
93
+ },
94
+ wait: {
95
+ category: 'wait',
96
+ description: 'Wait for specified time'
97
+ },
98
+ finished: {
99
+ category: 'system',
100
+ description: 'Mark task as finished'
101
+ },
102
+ call_user: {
103
+ category: 'system',
104
+ description: 'Request user interaction'
105
+ }
106
+ };
107
+ function isSupportedActionType(type) {
108
+ return type in ACTION_METADATA;
109
+ }
110
+ function isValidActionType(type) {
111
+ return 'screenshot' === type || isSupportedActionType(type);
112
+ }
113
+ export { ACTION_METADATA, isSupportedActionType, isValidActionType };
114
+
115
+ //# sourceMappingURL=actions.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types/actions.mjs","sources":["webpack://@ui-tars-test/shared/./src/types/actions.ts"],"sourcesContent":["/*\n * Copyright (c) 2025 Bytedance, Inc. and its affiliates.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n/* eslint-disable @typescript-eslint/no-explicit-any */\n\nexport type Factors = [number, number];\n\n/**\n * Coordinate data structure\n * - Supports pixel coordinates (raw)\n * - Supports normalized coordinates (normalized)\n */\nexport interface Coordinates {\n raw?: { x: number; y: number }; // Raw pixels\n normalized?: { x: number; y: number }; // Normalized coordinates (0–1)\n referenceBox?: { x1: number; y1: number; x2: number; y2: number };\n referenceSystem?: 'screen' | 'window' | 'browserPage' | string; // Coordinate reference system\n}\n\n/**\n * Standard structure for GUI Actions\n */\nexport interface BaseAction<\n T extends string = string,\n I extends Record<string, any> = Record<string, any>,\n> {\n type: T; // Action type (e.g., \"click\", \"key\", \"swipe\")\n inputs: I; // Parameters required for the action\n meta?: {\n toolHint?: string; // Suggested execution tool (xdotool / adb / pyautogui etc.)\n comment?: string; // Notes / Debug information\n };\n}\n\n// ---------- ScreenShot Action ----------\n\n/**\n * ScreenShot action\n */\nexport type ScreenShotAction = BaseAction<\n 'screenshot',\n {\n start?: Coordinates;\n end?: Coordinates;\n }\n>;\n\n// ---------- Mouse Actions ----------\n\n/**\n * Click action with coordinates\n */\nexport type ClickAction = BaseAction<\n 'click',\n {\n point: Coordinates;\n }\n>;\n\n/**\n * Right click action with coordinates\n */\nexport type RightClickAction = BaseAction<\n 'right_click',\n {\n point: Coordinates;\n }\n>;\n\n/**\n * Double click action with coordinates\n */\nexport type DoubleClickAction = BaseAction<\n 'double_click',\n {\n point: Coordinates;\n }\n>;\n\n/**\n * Middle click action with coordinates\n */\nexport type MiddleClickAction = BaseAction<\n 'middle_click',\n {\n point: Coordinates;\n }\n>;\n\n/**\n * Mouse down action\n */\nexport type MouseDownAction = BaseAction<\n 'mouse_down',\n {\n point?: Coordinates; // Mouse down position. If not specified, default to execute on the current mouse position.\n button?: 'left' | 'right'; // Down button. Default to left.\n }\n>;\n\n/**\n * Mouse up action\n */\nexport type MouseUpAction = BaseAction<\n 'mouse_up',\n {\n point?: Coordinates; // Mouse up position. If not specified, default to execute on the current mouse position.\n button?: 'left' | 'right'; // Up button. Default to left.\n }\n>;\n\n/**\n * Mouse move action\n */\nexport type MouseMoveAction = BaseAction<\n 'mouse_move', // 'move' | 'move_to' | 'hover',\n {\n point: Coordinates; // Target coordinates\n }\n>;\n\n/**\n * Drag action with start and end coordinates\n */\nexport type DragAction = BaseAction<\n 'drag', // 'left_click_drag' | 'select',\n {\n start: Coordinates;\n end: Coordinates;\n direction?: 'up' | 'down' | 'left' | 'right';\n }\n>;\n\n/**\n * Scroll action with coordinates and direction\n */\nexport type ScrollAction = BaseAction<\n 'scroll',\n {\n point?: Coordinates;\n direction: 'up' | 'down' | 'left' | 'right';\n }\n>;\n\n// ---------- Keyboard Actions ----------\n\n/**\n * Type action with text content\n */\nexport type TypeAction = BaseAction<\n 'type',\n {\n content: string;\n }\n>;\n\n/**\n * Hotkey action with key combination\n */\nexport type HotkeyAction = BaseAction<\n 'hotkey',\n {\n key: string;\n }\n>;\n\n/**\n * Press key action\n */\nexport type PressAction = BaseAction<\n 'press',\n {\n key: string; // Key you want to press. Only one key can be pressed at one time.\n }\n>;\n\n/**\n * Release key action\n */\nexport type ReleaseAction = BaseAction<\n 'release',\n {\n key: string; // Key you want to release. Only one key can be released at one time.\n }\n>;\n\n// ---------- Browser Actions ----------\n\n/**\n * Navigate action with URL\n */\nexport type NavigateAction = BaseAction<\n 'navigate',\n {\n url: string;\n }\n>;\n\n/**\n * Navigate back action\n */\nexport type NavigateBackAction = BaseAction<'navigate_back', Record<string, never>>;\n\n// ---------- App Actions ----------\n\n/**\n * Long press action with coordinates\n */\nexport type LongPressAction = BaseAction<\n 'long_press',\n {\n point: Coordinates;\n }\n>;\n\nexport type SwipeAction = BaseAction<\n 'swipe', // 'drag',\n {\n start: Coordinates;\n end: Coordinates;\n direction: 'up' | 'down' | 'left' | 'right';\n }\n>;\n\n/**\n * Home action\n */\nexport type HomeAction = BaseAction<'home' | 'press_home', Record<string, never>>;\n\n/**\n * Back action\n */\nexport type BackAction = BaseAction<'back' | 'press_back', Record<string, never>>;\n\n/**\n * Open app action\n */\nexport type OpenAppAction = BaseAction<\n 'open_app',\n {\n name: string;\n }\n>;\n\n// ---------- Wait Actions ----------\n\n/**\n * Wait action with no inputs\n */\nexport type WaitAction = BaseAction<\n 'wait',\n {\n time?: number; // in seconds (optional)\n }\n>;\n\n/**\n * Finished - Complete the current operation.\n */\nexport type FinishAction = BaseAction<\n 'finished',\n {\n content?: string;\n }\n>;\n\n/**\n * CallUser - Request user interaction.\n */\nexport type CallUserAction = BaseAction<\n 'call_user',\n {\n content?: string;\n }\n>;\n\n/**\n * Operational action types (excluding screenshot which has special handling)\n */\nexport type OperationalGUIAction =\n | ClickAction\n | DoubleClickAction\n | RightClickAction\n | MiddleClickAction\n | MouseDownAction\n | MouseUpAction\n | MouseMoveAction\n | DragAction\n | ScrollAction\n | TypeAction\n | HotkeyAction\n | PressAction\n | ReleaseAction\n | NavigateAction\n | NavigateBackAction\n | LongPressAction\n | SwipeAction\n | HomeAction\n | BackAction\n | OpenAppAction\n | WaitAction\n | FinishAction\n | CallUserAction;\n\n/**\n * Complete GUI action types including screenshot\n */\nexport type GUIAction = ScreenShotAction | OperationalGUIAction;\n\n/**\n * Extract action type from action interface\n */\nexport type ExtractActionType<T> = T extends BaseAction<infer U, any> ? U : never;\n\n/**\n * Supported operational action types (excluding screenshot)\n */\nexport type SupportedActionType = ExtractActionType<OperationalGUIAction>;\n\n/**\n * All action types including screenshot\n */\nexport type AllActionType = ExtractActionType<GUIAction>;\n\n/**\n * Action metadata for documentation and serialization\n */\nexport interface ActionMetadata {\n description: string;\n category: 'mouse' | 'keyboard' | 'navigation' | 'mobile' | 'system' | 'wait';\n}\n\n/**\n * Comprehensive action metadata registry\n */\nexport const ACTION_METADATA: Record<SupportedActionType, ActionMetadata> = {\n click: { category: 'mouse', description: 'Click on an element' },\n right_click: { category: 'mouse', description: 'Right click on an element' },\n double_click: { category: 'mouse', description: 'Double click on an element' },\n middle_click: { category: 'mouse', description: 'Middle click on an element' },\n mouse_down: { category: 'mouse', description: 'Press mouse button down' },\n mouse_up: { category: 'mouse', description: 'Release mouse button' },\n mouse_move: { category: 'mouse', description: 'Move mouse to position' },\n drag: { category: 'mouse', description: 'Drag from one position to another' },\n scroll: { category: 'mouse', description: 'Scroll in a direction' },\n type: { category: 'keyboard', description: 'Type text' },\n hotkey: { category: 'keyboard', description: 'Press hotkey combination' },\n press: { category: 'keyboard', description: 'Press a key' },\n release: { category: 'keyboard', description: 'Release a key' },\n navigate: { category: 'navigation', description: 'Navigate to URL' },\n navigate_back: { category: 'navigation', description: 'Navigate back' },\n long_press: { category: 'mobile', description: 'Long press on element' },\n swipe: { category: 'mobile', description: 'Swipe gesture' },\n home: { category: 'mobile', description: 'Go to home' },\n press_home: { category: 'mobile', description: 'Press home button' },\n back: { category: 'mobile', description: 'Go back' },\n press_back: { category: 'mobile', description: 'Press back button' },\n open_app: { category: 'mobile', description: 'Open application' },\n wait: { category: 'wait', description: 'Wait for specified time' },\n finished: { category: 'system', description: 'Mark task as finished' },\n call_user: { category: 'system', description: 'Request user interaction' },\n} as const;\n\n/**\n * Type guard function to check if a string is a valid operational action type\n * @param type - The string to check\n * @returns Whether the string is a valid SupportedActionType\n */\nexport function isSupportedActionType(type: string): type is SupportedActionType {\n return type in ACTION_METADATA;\n}\n\n/**\n * Type guard function to check if a string is any valid action type (including screenshot)\n * @param type - The string to check\n * @returns Whether the string is a valid AllActionType\n */\nexport function isValidActionType(type: string): type is AllActionType {\n return type === 'screenshot' || isSupportedActionType(type);\n}\n"],"names":["ACTION_METADATA","isSupportedActionType","type","isValidActionType"],"mappings":";;;;AAiVO,MAAMA,kBAA+D;IAC1E,OAAO;QAAE,UAAU;QAAS,aAAa;IAAsB;IAC/D,aAAa;QAAE,UAAU;QAAS,aAAa;IAA4B;IAC3E,cAAc;QAAE,UAAU;QAAS,aAAa;IAA6B;IAC7E,cAAc;QAAE,UAAU;QAAS,aAAa;IAA6B;IAC7E,YAAY;QAAE,UAAU;QAAS,aAAa;IAA0B;IACxE,UAAU;QAAE,UAAU;QAAS,aAAa;IAAuB;IACnE,YAAY;QAAE,UAAU;QAAS,aAAa;IAAyB;IACvE,MAAM;QAAE,UAAU;QAAS,aAAa;IAAoC;IAC5E,QAAQ;QAAE,UAAU;QAAS,aAAa;IAAwB;IAClE,MAAM;QAAE,UAAU;QAAY,aAAa;IAAY;IACvD,QAAQ;QAAE,UAAU;QAAY,aAAa;IAA2B;IACxE,OAAO;QAAE,UAAU;QAAY,aAAa;IAAc;IAC1D,SAAS;QAAE,UAAU;QAAY,aAAa;IAAgB;IAC9D,UAAU;QAAE,UAAU;QAAc,aAAa;IAAkB;IACnE,eAAe;QAAE,UAAU;QAAc,aAAa;IAAgB;IACtE,YAAY;QAAE,UAAU;QAAU,aAAa;IAAwB;IACvE,OAAO;QAAE,UAAU;QAAU,aAAa;IAAgB;IAC1D,MAAM;QAAE,UAAU;QAAU,aAAa;IAAa;IACtD,YAAY;QAAE,UAAU;QAAU,aAAa;IAAoB;IACnE,MAAM;QAAE,UAAU;QAAU,aAAa;IAAU;IACnD,YAAY;QAAE,UAAU;QAAU,aAAa;IAAoB;IACnE,UAAU;QAAE,UAAU;QAAU,aAAa;IAAmB;IAChE,MAAM;QAAE,UAAU;QAAQ,aAAa;IAA0B;IACjE,UAAU;QAAE,UAAU;QAAU,aAAa;IAAwB;IACrE,WAAW;QAAE,UAAU;QAAU,aAAa;IAA2B;AAC3E;AAOO,SAASC,sBAAsBC,IAAY;IAChD,OAAOA,QAAQF;AACjB;AAOO,SAASG,kBAAkBD,IAAY;IAC5C,OAAOA,AAAS,iBAATA,QAAyBD,sBAAsBC;AACxD"}
@@ -0,0 +1,108 @@
1
+ import { AgentOptions } from '@tarko/agent-interface';
2
+ import { Factors, BaseAction, Coordinates, SupportedActionType } from './actions';
3
+ /**
4
+ * Type definition for parsed GUI response structure
5
+ * Represents the components extracted from a model's output string
6
+ * Aligned with tarko's web UI design
7
+ */
8
+ export interface ParsedGUIResponse {
9
+ /** raw prediction string */
10
+ rawContent: string;
11
+ /** parsed from Thought: `<thought>` */
12
+ reasoningContent?: string;
13
+ /** parsed from Action: action(params=`action`) */
14
+ rawActionStrings?: string[];
15
+ /** parsed from Action: action(params=`action`) */
16
+ actions: BaseAction[];
17
+ /** error message to feedback to LLM */
18
+ errorMessage?: string;
19
+ }
20
+ /**
21
+ * Type definition for function to normalize raw coordinates
22
+ * Converts raw pixel coordinates to normalized coordinates (0-1)
23
+ * @param rawX - Raw X coordinate in pixels
24
+ * @param rawY - Raw Y coordinate in pixels
25
+ * @returns Normalized coordinates {x, y} with values between 0 and 1
26
+ */
27
+ export type NormalizeCoordinates = (rawCoords: Coordinates, factors?: Factors) => {
28
+ normalized: Coordinates;
29
+ };
30
+ /**
31
+ * Type definition for handler function to parse model output into ParsedGUIResponse object
32
+ * @param prediction - The raw output from the model to be parsed
33
+ * @returns ParsedGUIResponse object if parsing is successful, null otherwise
34
+ */
35
+ export type CustomActionParser = (prediction: string) => ParsedGUIResponse | null;
36
+ /**
37
+ * Function type for serializing supported actions to string format
38
+ * @param actions - Array of supported action types
39
+ * @returns String representation of the actions for agent processing
40
+ */
41
+ export type SerializeSupportedActions = (actions: Array<SupportedActionType>) => string;
42
+ export type ExecuteParams = {
43
+ /** Required actions to execute */
44
+ actions: BaseAction[];
45
+ } & Partial<Omit<ParsedGUIResponse, 'actions'>> & Record<string, any>;
46
+ export type ExecuteOutput = {
47
+ status: 'success' | 'failed';
48
+ errorMessage?: string;
49
+ url?: string;
50
+ } & Record<string, any>;
51
+ /**
52
+ * Function type for calculating detail level based on image dimensions
53
+ */
54
+ export type ImageDetailCalculator = (width: number, height: number) => 'low' | 'high' | 'auto';
55
+ export interface ScreenshotOutput extends ExecuteOutput {
56
+ /** screenshot base64, `keep screenshot size as physical pixels` */
57
+ base64: string;
58
+ }
59
+ /**
60
+ * Reserved placeholder name for action space in system prompt template
61
+ */
62
+ export declare const ACTION_SPACE_PLACEHOLDER = "action_space";
63
+ /**
64
+ * Interface for system prompt template configuration
65
+ */
66
+ export interface SystemPromptTemplate {
67
+ /**
68
+ * Template string with placeholders. Must include an action space placeholder
69
+ * `{{${ACTION_SPACE_PLACEHOLDER}}}` that will be replaced with the string representation of available actions
70
+ */
71
+ template: string;
72
+ /**
73
+ * Function to convert BaseAction array to string representation for the action space
74
+ * This will be used to fill the action space placeholder in the template
75
+ */
76
+ actionsToString?: SerializeSupportedActions;
77
+ /**
78
+ * Optional map of additional placeholder values to be replaced in the template
79
+ * Keys are placeholder names, values are the replacement strings
80
+ * Note: '${ACTION_SPACE_PLACEHOLDER}' is a reserved placeholder and should NOT be included here
81
+ * as it will be automatically filled using the actionsToString function
82
+ */
83
+ placeholders?: Record<string, string>;
84
+ }
85
+ export interface GUIAgentConfig<TOperator> extends AgentOptions {
86
+ operator: TOperator;
87
+ /**
88
+ * System prompt configuration. Can be either:
89
+ * - A simple string (legacy mode)
90
+ * - An array of strings or chat messages (will be concatenated)
91
+ * - A SystemPromptTemplate object with template and actionsToString function
92
+ */
93
+ systemPrompt?: string | Array<string | {
94
+ role: string;
95
+ content: string;
96
+ }> | SystemPromptTemplate;
97
+ /** The handler function to parse model output into PredictionParsed object */
98
+ customeActionParser?: CustomActionParser;
99
+ /** The function to normalize raw coordinates */
100
+ normalizeCoordinates?: NormalizeCoordinates;
101
+ /** The function to calculate detail level based on image dimensions */
102
+ detailCalculator?: ImageDetailCalculator;
103
+ /** Maximum number of turns for Agent to execute, @default 1000 */
104
+ maxLoopCount?: number;
105
+ /** Time interval between two loop iterations (in milliseconds), @default 0 */
106
+ loopIntervalInMs?: number;
107
+ }
108
+ //# sourceMappingURL=agents.d.ts.map