shellx-ai 1.0.12 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +586 -0
  3. package/dist/automation/element-finder.d.ts +189 -0
  4. package/dist/automation/element-finder.js +322 -0
  5. package/dist/automation/element-finder.js.map +1 -0
  6. package/dist/automation/ui-action-handler.d.ts +330 -0
  7. package/dist/automation/ui-action-handler.js +873 -0
  8. package/dist/automation/ui-action-handler.js.map +1 -0
  9. package/dist/cbor-compat.d.ts +27 -0
  10. package/dist/cbor-compat.js +108 -0
  11. package/dist/cbor-compat.js.map +1 -0
  12. package/dist/domain-manager.d.ts +80 -0
  13. package/dist/domain-manager.js +158 -0
  14. package/dist/domain-manager.js.map +1 -0
  15. package/dist/error-handler.d.ts +87 -0
  16. package/dist/error-handler.js +148 -0
  17. package/dist/error-handler.js.map +1 -0
  18. package/dist/errors.d.ts +114 -0
  19. package/dist/errors.js +139 -0
  20. package/dist/errors.js.map +1 -0
  21. package/dist/index.d.ts +163 -54
  22. package/dist/index.js +706 -480
  23. package/dist/index.js.map +1 -0
  24. package/dist/logger.d.ts +81 -0
  25. package/dist/logger.js +128 -0
  26. package/dist/logger.js.map +1 -0
  27. package/dist/protocol.d.ts +147 -31
  28. package/dist/protocol.js +2 -2
  29. package/dist/protocol.js.map +1 -0
  30. package/dist/shell/output-buffer.d.ts +152 -0
  31. package/dist/shell/output-buffer.js +163 -0
  32. package/dist/shell/output-buffer.js.map +1 -0
  33. package/dist/shell/shell-command-executor.d.ts +182 -0
  34. package/dist/shell/shell-command-executor.js +348 -0
  35. package/dist/shell/shell-command-executor.js.map +1 -0
  36. package/dist/shellx.d.ts +681 -178
  37. package/dist/shellx.js +762 -1159
  38. package/dist/shellx.js.map +1 -0
  39. package/dist/types.d.ts +132 -57
  40. package/dist/types.js +4 -4
  41. package/dist/types.js.map +1 -0
  42. package/dist/utils/retry-helper.d.ts +73 -0
  43. package/dist/utils/retry-helper.js +92 -0
  44. package/dist/utils/retry-helper.js.map +1 -0
  45. package/dist/utils.d.ts +3 -3
  46. package/dist/utils.js +17 -23
  47. package/dist/utils.js.map +1 -0
  48. package/package.json +95 -62
package/dist/index.d.ts CHANGED
@@ -1,39 +1,82 @@
1
+ import { type Logger } from "./logger.js";
1
2
  export interface TaskClientConfig {
2
3
  timeout: number;
3
4
  reconnect: boolean;
4
5
  reconnectMaxAttempts: number;
5
6
  reconnectInterval: number;
6
7
  pingInterval: number;
7
- onOpen: () => void;
8
+ onOpen: (deviceId: string | undefined) => void;
8
9
  onClose: (event?: CloseEvent) => void;
9
- onError: (error?: Event) => void;
10
+ onError: (error?: Error | Event) => void;
10
11
  onReconnectFailed: () => void;
11
- onMessage?: (data: any) => void;
12
+ onMessage?: (data: unknown) => void;
12
13
  }
13
14
  export declare const DEFAULT_CONFIG: TaskClientConfig;
14
- import type { WsClient, ActionSequence, ScreenShotOptions, ElementSelector, FindOptions, WaitOptions, UIHierarchy, WAITElement, ScreenShotResponse, ScreenInfoResponse, AppListResponse } from './protocol';
15
- export interface TaskResponse<T = any> {
15
+ import type { WsClient, ActionSequence, ScreenShotOptions, ElementSelector, FindOptions, WaitOptions, UIHierarchy, WAITElement, ScreenShotResponse, ScreenInfoResponse } from "./protocol.js";
16
+ import type { Buffer } from "buffer";
17
+ /**
18
+ * WebSocket interface for cross-platform compatibility
19
+ */
20
+ interface IWebSocket {
21
+ binaryType?: string;
22
+ onopen?: ((event: Event) => void) | null;
23
+ onmessage?: ((event: MessageEvent) => void) | null;
24
+ onerror?: ((event: Event) => void) | null;
25
+ onclose?: ((event: CloseEvent) => void) | null;
26
+ readyState?: number;
27
+ send(data: ArrayBuffer | Uint8Array | string | Blob | Buffer): void;
28
+ close(code?: number, reason?: string): void;
29
+ ping?(data?: ArrayBuffer | string): void;
30
+ }
31
+ /**
32
+ * Interface for ShellX instance
33
+ */
34
+ interface IShellXInstance {
35
+ handleShellOutput: (chunks: [number, number, Uint8Array[]]) => void;
36
+ }
37
+ export interface TaskResponse<T = unknown> {
16
38
  taskId?: string;
17
39
  success?: boolean;
18
40
  data?: T;
19
- error?: {
20
- message: string;
21
- };
41
+ error?: string;
22
42
  }
23
- export interface PendingTask {
24
- resolve: (data: any) => void;
43
+ export interface PendingTask<T = unknown> {
44
+ resolve: (data: T | PromiseLike<T>) => void;
25
45
  reject: (err: Error) => void;
26
46
  timer: number;
27
47
  type: string;
28
48
  }
49
+ export interface AuthenticationResponse {
50
+ authenticate: string;
51
+ last_updated: string;
52
+ machine: string;
53
+ registered_at: string;
54
+ status?: "registered" | "not_registered";
55
+ message?: string;
56
+ }
29
57
  /**
30
- * Enhanced WebSocket Task Client with protocol-aware task methods
58
+ * Low-level WebSocket Client for direct protocol communication
59
+ *
60
+ * @warning This is a low-level API intended for advanced use cases only.
61
+ * Most users should use the ShellX class instead, which provides a simpler,
62
+ * more intuitive interface with automatic error handling and retry logic.
63
+ *
64
+ * @example
65
+ * ```typescript
66
+ * // For most users, use ShellX instead:
67
+ * import { ShellX } from './shellx';
68
+ * const shellx = new ShellX({ deviceId: 'your-device-id' });
69
+ * await shellx.connect();
70
+ * await shellx.click({ text: 'Submit' });
71
+ * ```
72
+ *
73
+ * @see Use ShellX for a high-level API unless you need low-level WebSocket access
31
74
  */
32
- export declare class ConnectionTaskClient {
75
+ export declare class ConnectionClient {
33
76
  private wsUrl;
34
77
  private deviceId;
35
78
  private config;
36
- ws: any;
79
+ ws: IWebSocket | null;
37
80
  private shellxConnected;
38
81
  private wsConnected;
39
82
  private authenticated;
@@ -43,22 +86,35 @@ export declare class ConnectionTaskClient {
43
86
  private pingIntervalId;
44
87
  private initializationPromise;
45
88
  private shellx;
46
- constructor(deviceId: string, config?: Partial<TaskClientConfig>);
89
+ private executionLogsRef;
90
+ private logger;
91
+ constructor(deviceId: string | undefined, config?: Partial<TaskClientConfig>, logger?: Logger);
47
92
  private init;
48
93
  private authenticateDevice;
49
- getFetch(): (url: string, options?: any) => Promise<any>;
94
+ getFetch(): (url: string, options?: RequestInit) => Promise<unknown>;
50
95
  /**
51
- * 等待WebSocket初始化完成
96
+ * Wait for WebSocket initialization complete
52
97
  */
53
98
  waitForInitialization(): Promise<void>;
54
99
  /**
55
- * 发送认证消息
100
+ * Ensure connection is ready before executing device commands
101
+ * If not connected, wait for connection with timeout
102
+ *
103
+ * @param timeout - Timeout in milliseconds (default: 10000ms)
104
+ * @throws Error if connection timeout or connection failed
105
+ *
106
+ * @example
107
+ * ```typescript
108
+ * await client.ensureConnected(10000);
109
+ * console.log('Connection ready');
110
+ * ```
56
111
  */
57
- private sendAuthenticationMessage;
112
+ ensureConnected(timeout?: number): Promise<void>;
58
113
  private handleMessage;
59
114
  private processServerMessage;
115
+ private matchResponseByType;
60
116
  private handleJsonDataResponse;
61
- sendMessage(message: WsClient, taskType?: string, timeout?: number): Promise<any>;
117
+ sendMessage<T = unknown>(message: WsClient, taskType?: string): Promise<T>;
62
118
  /**
63
119
  * Find UI elements on the screen
64
120
  */
@@ -69,31 +125,34 @@ export declare class ConnectionTaskClient {
69
125
  waitElement(selector: ElementSelector, options?: WaitOptions): Promise<WAITElement>;
70
126
  /**
71
127
  * Take a screenshot
128
+ * @deprecated Use takeScreenshot() instead for consistency
72
129
  */
73
130
  screenShot(options?: ScreenShotOptions): Promise<ScreenShotResponse>;
131
+ /**
132
+ * Take a screenshot
133
+ */
134
+ takeScreenshot(options?: ScreenShotOptions): Promise<ScreenShotResponse>;
74
135
  /**
75
136
  * Get screen information
76
137
  */
77
138
  getScreenInfo(): Promise<ScreenInfoResponse>;
78
139
  /**
79
- * Get list of installed apps
140
+ * Get screen information
141
+ * @deprecated Use wakeScreenAndGetInfo() instead for clarity
80
142
  */
81
- getAppList(options?: {
82
- includeSystemApps?: boolean;
83
- includeDisabledApps?: boolean;
84
- }): Promise<AppListResponse>;
143
+ getAndWakeScreen(): Promise<ScreenInfoResponse>;
85
144
  /**
86
- * Get specific app information
145
+ * Wake screen and get screen information
87
146
  */
88
- getAppInfo(packageName: string): Promise<any>;
147
+ wakeScreenAndGetInfo(): Promise<ScreenInfoResponse>;
89
148
  /**
90
149
  * Execute an action sequence
91
150
  */
92
- executeAction(actionSequence: ActionSequence, taskId?: string, timeeout?: number): Promise<any>;
151
+ executeAction(actionSequence: ActionSequence, taskId?: string, timeout?: number): Promise<unknown>;
93
152
  /**
94
153
  * Execute promptflow actions
95
154
  */
96
- executePromptFlow(actionSequence: ActionSequence): Promise<any>;
155
+ executePromptFlow(actionSequence: ActionSequence): Promise<unknown>;
97
156
  /**
98
157
  * Listen for display changes
99
158
  */
@@ -119,19 +178,43 @@ export declare class ConnectionTaskClient {
119
178
  * Send chat message
120
179
  */
121
180
  sendChat(message: string): Promise<void>;
181
+ /**
182
+ * Start ASR (Automatic Speech Recognition)
183
+ */
184
+ startAsr(): Promise<void>;
185
+ /**
186
+ * Stop ASR (Automatic Speech Recognition)
187
+ */
188
+ stopAsr(): Promise<void>;
189
+ /**
190
+ * Speak text using TTS (Text-to-Speech)
191
+ */
192
+ speak(text: string): Promise<void>;
193
+ /**
194
+ * Stop TTS (Text-to-Speech)
195
+ */
196
+ stopTts(): Promise<void>;
122
197
  /**
123
198
  * Send raw message (for custom integrations)
124
199
  */
125
200
  sendRawMessage(message: WsClient): Promise<void>;
126
201
  /**
127
- * 发送消息并返回 taskId,允许手动控制任务完成
128
- * @param message 要发送的消息
129
- * @param taskType 任务类型
130
- * @returns Promise<{taskId: string, promise: Promise<any>}>
202
+ * Detect task type from message content
203
+ */
204
+ private detectTaskType;
205
+ /**
206
+ * Send message and return taskId
207
+ * @param message Message to send
208
+ * @param taskType Task type
209
+ * @param options Optional parameters { timeout, taskId }
210
+ * @returns Promise<{taskId: string, promise: Promise<T>>}
131
211
  */
132
- sendMessageWithTaskId(message: WsClient, taskType: string, definedTaskId?: string, timeout?: number): Promise<{
212
+ sendMessageWithTaskId<T = unknown>(message: WsClient, taskType: string, options?: {
213
+ timeout?: number;
214
+ taskId?: string;
215
+ }): Promise<{
133
216
  taskId: string;
134
- promise: Promise<any>;
217
+ promise: Promise<T>;
135
218
  }>;
136
219
  private startPing;
137
220
  private stopPing;
@@ -141,42 +224,68 @@ export declare class ConnectionTaskClient {
141
224
  get isConnected(): boolean;
142
225
  get isWebSocketConnected(): boolean;
143
226
  get isAuthenticated(): boolean;
227
+ /**
228
+ * Get the authenticated device ID (UUID)
229
+ */
230
+ getDeviceId(): string | undefined;
144
231
  get pendingTaskCount(): number;
145
232
  get queuedMessageCount(): number;
146
233
  /**
147
- * 设置关联的 ShellX 实例
234
+ * Get current execution log
235
+ */
236
+ getExecutionLogs(): string[];
237
+ /**
238
+ * Clear current execution log
239
+ */
240
+ clearExecutionLogs(): void;
241
+ /**
242
+ * Manually append an execution log
243
+ */
244
+ appendExecutionLog(log: string): void;
245
+ /**
246
+ * Set associated ShellX instance
247
+ */
248
+ setShellX(shellx: IShellXInstance): void;
249
+ /**
250
+ * Create a timeout result object based on task type
251
+ * @param taskType Type of the task that timed out
252
+ * @param timeoutMs Timeout duration in milliseconds
253
+ * @returns A result object with success: false
148
254
  */
149
- setShellX(shellx: any): void;
255
+ private createTimeoutResult;
150
256
  /**
151
- * 获取关联的 ShellX 实例
257
+ * Get associated ShellX instance
152
258
  */
153
- getShellX(): any;
259
+ getShellX(): IShellXInstance | null;
154
260
  /**
155
- * 手动完成指定 taskId 的任务
156
- * @param taskId 任务ID
157
- * @param result 任务结果
158
- * @param success 是否成功
261
+ * Manually complete task by taskId
262
+ * @param taskId Task ID
263
+ * @param result Task result
264
+ * @param success Success status
159
265
  */
160
- completeTask(taskId: string, result?: any, success?: boolean): boolean;
266
+ completeTask(taskId: string, result?: unknown, success?: boolean): boolean;
161
267
  /**
162
- * 获取所有待处理任务的ID列表
268
+ * Get all pending task IDs
163
269
  */
164
270
  getPendingTaskIds(): string[];
165
271
  /**
166
- * 获取指定任务的信息
272
+ * Get task info by taskId
167
273
  */
168
274
  getTaskInfo(taskId: string): {
169
275
  type: string;
170
276
  } | null;
171
277
  /**
172
- * 批量完成指定类型的任务
173
- * @param taskType 任务类型
174
- * @param result 任务结果
175
- * @param success 是否成功
278
+ * Batch complete tasks by type
279
+ * @param taskType Task type
280
+ * @param result Task result
281
+ * @param success Success status
176
282
  */
177
- completeTasksByType(taskType: string, result?: any, success?: boolean): number;
283
+ completeTasksByType(taskType: string, result?: unknown, success?: boolean): number;
178
284
  }
179
- export default ConnectionTaskClient;
180
- export type { UIElement, ElementSelector, ActionSequence, WsClient, WsServer, ScreenShotOptions, FindOptions, WaitOptions, UIHierarchy, WAITElement, ScreenShotResponse, ScreenInfoResponse, AppListResponse } from './protocol';
181
- export type { Click, Input, Swipe, Key, Wait, Find, Command, AppInfo, Screenshot, ActionResult, Element, FindResult, CommandResult, Action, Actions, } from './types';
182
- export { createShellXWithShellMonitoring, ShellX } from './shellx';
285
+ export default ConnectionClient;
286
+ export type { UIElement, ElementSelector, ActionSequence, WsClient, WsServer, ScreenShotOptions, FindOptions, WaitOptions, UIHierarchy, WAITElement, ScreenShotResponse, ScreenInfoResponse, } from "./protocol.js";
287
+ export type { Click, Input, Swipe, Key, Wait, Find, Command, AppInfo, Screenshot, BaseResult, Element, FindResult, CommandResult, Action, Clipboard, Actions, ClickResult, InputResult, SwipeResult, KeyResult, WaitResult, ClipboardResult, AppInfoResult, ScreenshotResult, ScreenInfoResult, ExecuteActionsResult, OperationResult, } from "./types.js";
288
+ export { ShellX, type ShellXOptions } from "./shellx.js";
289
+ export { type ErrorResponse, type BaseOperationResult, createErrorResponse, createOperationResult, ErrorCode, ShellXError, ConnectionError, OperationError, ElementError, ValidationError, } from "./errors.js";
290
+ export { extractErrorMessage, createErrorResult, createSuccessResult, handleOperation, handleOperationWithRetry, validateRequired, validateOneOfRequired, wrapErrorWithContext, type ErrorHandlerOptions, type ErrorHandlerResult, } from "./error-handler.js";
291
+ export { buildDeviceApiUrl, buildDeviceConfigUrl, buildDeviceRegisterUrl, buildTokenVerifyUrl, buildTokenRefreshUrl, getApiBaseUrl, getWsServerUrl, getCurrentDomainInfo, createDomainConfig, isChineseUser, type DomainConfig, } from "./domain-manager.js";