@ui-tars-test/shared 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/dist/base/agent.d.ts +9 -0
  2. package/dist/base/agent.d.ts.map +1 -0
  3. package/dist/base/agent.js +54 -0
  4. package/dist/base/agent.js.map +1 -0
  5. package/dist/base/agent.mjs +10 -0
  6. package/dist/base/agent.mjs.map +1 -0
  7. package/dist/base/index.d.ts +4 -0
  8. package/dist/base/index.d.ts.map +1 -0
  9. package/dist/base/index.js +84 -0
  10. package/dist/base/index.js.map +1 -0
  11. package/dist/base/index.mjs +7 -0
  12. package/dist/base/operator.d.ts +140 -0
  13. package/dist/base/operator.d.ts.map +1 -0
  14. package/dist/base/operator.js +112 -0
  15. package/dist/base/operator.js.map +1 -0
  16. package/dist/base/operator.mjs +75 -0
  17. package/dist/base/operator.mjs.map +1 -0
  18. package/dist/base/parser.d.ts +11 -0
  19. package/dist/base/parser.d.ts.map +1 -0
  20. package/dist/base/parser.js +43 -0
  21. package/dist/base/parser.js.map +1 -0
  22. package/dist/base/parser.mjs +9 -0
  23. package/dist/base/parser.mjs.map +1 -0
  24. package/dist/types/actions.d.ts +224 -0
  25. package/dist/types/actions.d.ts.map +1 -0
  26. package/dist/types/actions.js +155 -0
  27. package/dist/types/actions.js.map +1 -0
  28. package/dist/types/actions.mjs +115 -0
  29. package/dist/types/actions.mjs.map +1 -0
  30. package/dist/types/agents.d.ts +108 -0
  31. package/dist/types/agents.d.ts.map +1 -0
  32. package/dist/types/agents.js +42 -0
  33. package/dist/types/agents.js.map +1 -0
  34. package/dist/types/agents.mjs +8 -0
  35. package/dist/types/agents.mjs.map +1 -0
  36. package/dist/types/archived.d.ts +44 -0
  37. package/dist/types/archived.d.ts.map +1 -0
  38. package/dist/types/archived.js +86 -0
  39. package/dist/types/archived.js.map +1 -0
  40. package/dist/types/archived.mjs +46 -0
  41. package/dist/types/archived.mjs.map +1 -0
  42. package/dist/types/index.d.ts +4 -0
  43. package/dist/types/index.d.ts.map +1 -0
  44. package/dist/types/index.js +84 -0
  45. package/dist/types/index.js.map +1 -0
  46. package/dist/types/index.mjs +7 -0
  47. package/dist/utils/actions.d.ts +15 -0
  48. package/dist/utils/actions.d.ts.map +1 -0
  49. package/dist/utils/actions.js +196 -0
  50. package/dist/utils/actions.js.map +1 -0
  51. package/dist/utils/actions.mjs +156 -0
  52. package/dist/utils/actions.mjs.map +1 -0
  53. package/dist/utils/coordinateNormalizer.d.ts +10 -0
  54. package/dist/utils/coordinateNormalizer.d.ts.map +1 -0
  55. package/dist/utils/coordinateNormalizer.js +59 -0
  56. package/dist/utils/coordinateNormalizer.js.map +1 -0
  57. package/dist/utils/coordinateNormalizer.mjs +25 -0
  58. package/dist/utils/coordinateNormalizer.mjs.map +1 -0
  59. package/dist/utils/index.d.ts +5 -0
  60. package/dist/utils/index.d.ts.map +1 -0
  61. package/dist/utils/index.js +93 -0
  62. package/dist/utils/index.js.map +1 -0
  63. package/dist/utils/index.mjs +8 -0
  64. package/dist/utils/sleep.d.ts +14 -0
  65. package/dist/utils/sleep.d.ts.map +1 -0
  66. package/dist/utils/sleep.js +45 -0
  67. package/dist/utils/sleep.js.map +1 -0
  68. package/dist/utils/sleep.mjs +11 -0
  69. package/dist/utils/sleep.mjs.map +1 -0
  70. package/dist/utils/systemPromptProcessor.d.ts +16 -0
  71. package/dist/utils/systemPromptProcessor.d.ts.map +1 -0
  72. package/dist/utils/systemPromptProcessor.js +61 -0
  73. package/dist/utils/systemPromptProcessor.js.map +1 -0
  74. package/dist/utils/systemPromptProcessor.mjs +24 -0
  75. package/dist/utils/systemPromptProcessor.mjs.map +1 -0
  76. package/package.json +66 -0
  77. package/src/base/agent.ts +13 -0
  78. package/src/base/index.ts +7 -0
  79. package/src/base/operator.ts +221 -0
  80. package/src/base/parser.ts +16 -0
  81. package/src/types/actions.ts +382 -0
  82. package/src/types/agents.ts +128 -0
  83. package/src/types/archived.ts +55 -0
  84. package/src/types/index.ts +8 -0
  85. package/src/utils/actions.ts +244 -0
  86. package/src/utils/coordinateNormalizer.ts +49 -0
  87. package/src/utils/index.ts +9 -0
  88. package/src/utils/sleep.ts +21 -0
  89. package/src/utils/systemPromptProcessor.ts +48 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils/systemPromptProcessor.mjs","sources":["webpack://@ui-tars-test/shared/./src/utils/systemPromptProcessor.ts"],"sourcesContent":["/*\n * Copyright (c) 2025 Bytedance, Inc. and its affiliates.\n * SPDX-License-Identifier: Apache-2.0\n */\n\nimport { SystemPromptTemplate, ACTION_SPACE_PLACEHOLDER } from '../types/agents';\nimport { SupportedActionType } from '../types/actions';\n\n/**\n * Assemble system prompt template by replacing placeholders with actual values\n * @param template - The system prompt template configuration\n * @param supportedActions - Array of supported action types\n * @returns Assembled system prompt string with placeholders replaced\n */\nexport function assembleSystemPrompt(\n template: SystemPromptTemplate,\n supportedActions: SupportedActionType[],\n): string {\n let assembledPrompt = template.template;\n\n // Replace action space placeholder if actionsToString function is provided\n if (template.actionsToString) {\n const actionSpacePlaceholder = `{{${ACTION_SPACE_PLACEHOLDER}}}`;\n const realActionSpaces = template.actionsToString(supportedActions);\n assembledPrompt = assembledPrompt.replace(actionSpacePlaceholder, realActionSpaces || '');\n }\n\n // Handle other custom placeholders replacement\n if (template.placeholders) {\n Object.entries(template.placeholders).forEach(([key, value]) => {\n const placeholder = `{{${key}}}`;\n assembledPrompt = assembledPrompt.replace(new RegExp(placeholder, 'g'), String(value));\n });\n }\n\n return assembledPrompt;\n}\n\n/**\n * Type guard to check if a system prompt is a template object\n * @param systemPrompt - The system prompt to check\n * @returns Whether the system prompt is a SystemPromptTemplate\n */\nexport function isSystemPromptTemplate(\n systemPrompt: string | SystemPromptTemplate,\n): systemPrompt is SystemPromptTemplate {\n return typeof systemPrompt === 'object' && 'template' in systemPrompt;\n}\n"],"names":["assembleSystemPrompt","template","supportedActions","assembledPrompt","actionSpacePlaceholder","ACTION_SPACE_PLACEHOLDER","realActionSpaces","Object","key","value","placeholder","RegExp","String","isSystemPromptTemplate","systemPrompt"],"mappings":";;;;;AAcO,SAASA,qBACdC,QAA8B,EAC9BC,gBAAuC;IAEvC,IAAIC,kBAAkBF,SAAS,QAAQ;IAGvC,IAAIA,SAAS,eAAe,EAAE;QAC5B,MAAMG,yBAAyB,CAAC,EAAE,EAAEC,yBAAyB,EAAE,CAAC;QAChE,MAAMC,mBAAmBL,SAAS,eAAe,CAACC;QAClDC,kBAAkBA,gBAAgB,OAAO,CAACC,wBAAwBE,oBAAoB;IACxF;IAGA,IAAIL,SAAS,YAAY,EACvBM,OAAO,OAAO,CAACN,SAAS,YAAY,EAAE,OAAO,CAAC,CAAC,CAACO,KAAKC,MAAM;QACzD,MAAMC,cAAc,CAAC,EAAE,EAAEF,IAAI,EAAE,CAAC;QAChCL,kBAAkBA,gBAAgB,OAAO,CAAC,IAAIQ,OAAOD,aAAa,MAAME,OAAOH;IACjF;IAGF,OAAON;AACT;AAOO,SAASU,uBACdC,YAA2C;IAE3C,OAAO,AAAwB,YAAxB,OAAOA,gBAA6B,cAAcA;AAC3D"}
package/package.json ADDED
@@ -0,0 +1,66 @@
1
+ {
2
+ "name": "@ui-tars-test/shared",
3
+ "version": "0.3.2",
4
+ "description": "Shared types for GUI-Agent",
5
+ "repository": {
6
+ "type": "git",
7
+ "url": "https://github.com/bytedance/UI-TARS-desktop"
8
+ },
9
+ "bugs": {
10
+ "url": "https://github.com/bytedance/UI-TARS-desktop/issues"
11
+ },
12
+ "exports": {
13
+ "./types": {
14
+ "types": "./dist/types/index.d.ts",
15
+ "import": "./dist/types/index.mjs",
16
+ "require": "./dist/types/index.js"
17
+ },
18
+ "./base": {
19
+ "types": "./dist/base/index.d.ts",
20
+ "import": "./dist/base/index.mjs",
21
+ "require": "./dist/base/index.js"
22
+ },
23
+ "./utils": {
24
+ "types": "./dist/utils/index.d.ts",
25
+ "import": "./dist/utils/index.mjs",
26
+ "require": "./dist/utils/index.js"
27
+ }
28
+ },
29
+ "typesVersions": {
30
+ "*": {
31
+ "*": [
32
+ "./src/*/index.ts"
33
+ ]
34
+ }
35
+ },
36
+ "scripts": {
37
+ "dev": "rslib build --watch",
38
+ "build": "rslib build",
39
+ "build:watch": "rslib build --watch",
40
+ "test": "vitest"
41
+ },
42
+ "keywords": [
43
+ "UI-TARS",
44
+ "GUI Agent",
45
+ "SDK"
46
+ ],
47
+ "license": "Apache-2.0",
48
+ "publishConfig": {
49
+ "access": "public",
50
+ "registry": "https://registry.npmjs.org/"
51
+ },
52
+ "files": [
53
+ "dist",
54
+ "src"
55
+ ],
56
+ "dependencies": {
57
+ "@agent-infra/logger": "0.0.2-beta.2",
58
+ "@tarko/agent": "^0.3.0",
59
+ "@tarko/agent-interface": "^0.3.0"
60
+ },
61
+ "devDependencies": {
62
+ "@rslib/core": "0.10.0",
63
+ "typescript": "^5.7.2",
64
+ "vitest": "^3.0.2"
65
+ }
66
+ }
@@ -0,0 +1,13 @@
1
+ /*
2
+ * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ */
5
+
6
+ import Agent from '@tarko/agent';
7
+
8
+ /**
9
+ * @abstract
10
+ * @class BaseGUIAgent
11
+ * @classdesc Abstract base class for GUI Agents.
12
+ */
13
+ export abstract class BaseGUIAgent extends Agent {}
@@ -0,0 +1,7 @@
1
+ /*
2
+ * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ */
5
+ export * from './agent';
6
+ export * from './parser';
7
+ export * from './operator';
@@ -0,0 +1,221 @@
1
+ /*
2
+ * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ */
5
+
6
+ import { ExecuteParams, ScreenshotOutput, ExecuteOutput, SupportedActionType } from '../types';
7
+
8
+ export interface ScreenContext {
9
+ screenWidth: number;
10
+ screenHeight: number;
11
+ scaleX: number;
12
+ scaleY: number;
13
+ }
14
+
15
+ /**
16
+ * @abstract
17
+ * @class BaseOperator
18
+ * @classdesc Abstract base class for Operators.
19
+ */
20
+ export abstract class BaseOperator {
21
+ abstract doScreenshot(params?: unknown): Promise<unknown>;
22
+ abstract doExecute(params: unknown): Promise<unknown>;
23
+ }
24
+
25
+ /**
26
+ * @abstract
27
+ * @class Operator
28
+ * @classdesc Abstract base class for Operators.
29
+ *
30
+ * @example
31
+ * // Example of defining ACTION_SPACES for a custom Operator
32
+ * import type { GUIAction, ClickAction, DoubleClickAction, TypeAction, ScreenShotAction, SupportedActionType } from '../types/actions';
33
+ *
34
+ * class MyDesktopOperator extends Operator {
35
+ *
36
+ * // Implement the required abstract methods
37
+ * protected async initialize(): Promise<void> {
38
+ * // Implementation for initializing the operator
39
+ * // e.g., validate connections, setup resources
40
+ * // ...
41
+ * }
42
+ *
43
+ * supportedActions(): Array<SupportedActionType> {
44
+ * return [
45
+ * 'click',
46
+ * 'double_click',
47
+ * 'right_click',
48
+ * 'type',
49
+ * 'hotkey',
50
+ * 'scroll',
51
+ * 'drag',
52
+ * 'screenshot'
53
+ * ] as SupportedActionType[];
54
+ * }
55
+ *
56
+ * screenContext(): ScreenContext {
57
+ * // Implementation for getting screen context
58
+ * // ...
59
+ * return {
60
+ * screenWidth: 1920,
61
+ * screenHeight: 1080,
62
+ * scaleX: 1,
63
+ * scaleY: 1
64
+ * };
65
+ * }
66
+ *
67
+ * async screenshot(): Promise<ScreenshotOutput> {
68
+ * // Implementation for taking screenshots
69
+ * // ...
70
+ * return { }; // screenshot output
71
+ * }
72
+ *
73
+ * async execute(params: ExecuteParams): Promise<ExecuteOutput> {
74
+ * // Implementation for executing actions
75
+ * // ...
76
+ * return { }; // execution output
77
+ * }
78
+ * }
79
+ */
80
+ export abstract class Operator extends BaseOperator {
81
+ // Track initialization state
82
+ private _initialized = false;
83
+ private _initializing = false;
84
+ private _initPromise: Promise<void> | null = null;
85
+
86
+ constructor() {
87
+ super();
88
+ // this.ensureInitialized();
89
+ }
90
+
91
+ /**
92
+ * Initializes the operator
93
+ * @description Performs initialization operations for the operator, such as validating connections,
94
+ * setting up resources, and preparing the operation environment.
95
+ * @returns Promise that resolves when initialization is complete
96
+ * @throws Error if initialization fails
97
+ */
98
+ async doInitialize(): Promise<void> {
99
+ // If already initialized, return immediately
100
+ if (this._initialized) {
101
+ return;
102
+ }
103
+
104
+ // If initialization is in progress, wait for it to complete
105
+ if (this._initializing && this._initPromise) {
106
+ return this._initPromise;
107
+ }
108
+
109
+ // Start initialization
110
+ this._initializing = true;
111
+ this._initPromise = (async () => {
112
+ try {
113
+ await this.initialize();
114
+ this._initialized = true;
115
+ } finally {
116
+ this._initializing = false;
117
+ }
118
+ })();
119
+
120
+ return this._initPromise;
121
+ }
122
+
123
+ /**
124
+ * Implementation of initialization logic
125
+ * @description Subclasses should implement this method to perform their specific initialization
126
+ * @returns Promise that resolves when initialization is complete
127
+ * @throws Error if initialization fails
128
+ */
129
+ protected abstract initialize(): Promise<void>;
130
+
131
+ /**
132
+ * Ensures the operator is initialized before performing operations
133
+ * @private
134
+ */
135
+ private async ensureInitialized(): Promise<void> {
136
+ if (!this._initialized && !this._initializing) {
137
+ await this.doInitialize();
138
+ } else if (this._initializing && this._initPromise) {
139
+ await this._initPromise;
140
+ }
141
+ }
142
+
143
+ /**
144
+ * Safely returns an array of supported action types with initialization guarantee
145
+ * @returns Array of action types supported by this operator
146
+ */
147
+ getSupportedActions(): Array<SupportedActionType> {
148
+ // await this.ensureInitialized();
149
+ return this.supportedActions();
150
+ }
151
+
152
+ /**
153
+ * Returns an array of supported action types
154
+ * @returns Array of action types supported by this operator
155
+ */
156
+ protected abstract supportedActions(): Array<SupportedActionType>;
157
+
158
+ /**
159
+ * Safely returns the screen context with initialization guarantee
160
+ * @returns The screen context
161
+ */
162
+ async getScreenContext(): Promise<ScreenContext> {
163
+ await this.ensureInitialized();
164
+ return this.screenContext();
165
+ }
166
+
167
+ /**
168
+ * Returns the screen context
169
+ * @returns The screen context
170
+ */
171
+ protected abstract screenContext(): ScreenContext;
172
+
173
+ /**
174
+ * Safely takes a screenshot with initialization guarantee
175
+ * @returns Promise that resolves to the screenshot output
176
+ */
177
+ async doScreenshot(): Promise<ScreenshotOutput> {
178
+ try {
179
+ await this.ensureInitialized();
180
+ return await this.screenshot();
181
+ } catch (error) {
182
+ console.error('Error in doScreenshot:', error);
183
+ return {
184
+ base64: '',
185
+ status: 'failed',
186
+ errorMessage: (error as Error).message,
187
+ };
188
+ }
189
+ }
190
+
191
+ /**
192
+ * Takes a screenshot
193
+ * @returns Promise that resolves to the screenshot output
194
+ */
195
+ protected abstract screenshot(): Promise<ScreenshotOutput>;
196
+
197
+ /**
198
+ * Safely executes actions with initialization guarantee
199
+ * @param params - The parameters for the actions
200
+ * @returns Promise that resolves to the execution output
201
+ */
202
+ async doExecute(params: ExecuteParams): Promise<ExecuteOutput> {
203
+ try {
204
+ await this.ensureInitialized();
205
+ return await this.execute(params);
206
+ } catch (error) {
207
+ console.error('Error in doExecute:', error);
208
+ return {
209
+ status: 'failed',
210
+ errorMessage: (error as Error).message,
211
+ };
212
+ }
213
+ }
214
+
215
+ /**
216
+ * Executes actions
217
+ * @param params - The parameters for the actions
218
+ * @returns Promise that resolves to the execution output
219
+ */
220
+ protected abstract execute(params: ExecuteParams): Promise<ExecuteOutput>;
221
+ }
@@ -0,0 +1,16 @@
1
+ /*
2
+ * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ */
5
+
6
+ import { ParsedGUIResponse } from '../types';
7
+
8
+ export abstract class BaseActionParser {
9
+ /**
10
+ * Parse model output
11
+ * @param input Model output string
12
+ * @returns Parsed ParsedGUIResponse object, returns null if parsing fails
13
+ * There is no need to throw error, the error message is returned in ParsedGUIResponse
14
+ */
15
+ abstract parsePrediction(input: string): ParsedGUIResponse | null;
16
+ }