shellx-ai 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +162 -586
  2. package/dist/automation/element-finder.d.ts +0 -8
  3. package/dist/automation/element-finder.js +0 -7
  4. package/dist/automation/element-finder.js.map +1 -1
  5. package/dist/automation/ui-action-handler.d.ts +1 -1
  6. package/dist/automation/ui-action-handler.js +58 -31
  7. package/dist/automation/ui-action-handler.js.map +1 -1
  8. package/dist/cbor-compat.js +9 -11
  9. package/dist/cbor-compat.js.map +1 -1
  10. package/dist/data/index.d.ts +9 -0
  11. package/dist/data/index.js +11 -0
  12. package/dist/data/index.js.map +1 -0
  13. package/dist/data/types.d.ts +351 -0
  14. package/dist/data/types.js +8 -0
  15. package/dist/data/types.js.map +1 -0
  16. package/dist/domain-manager.js +15 -13
  17. package/dist/domain-manager.js.map +1 -1
  18. package/dist/error-handler.js +5 -2
  19. package/dist/error-handler.js.map +1 -1
  20. package/dist/errors.js +4 -2
  21. package/dist/errors.js.map +1 -1
  22. package/dist/index.d.ts +36 -26
  23. package/dist/index.js +197 -86
  24. package/dist/index.js.map +1 -1
  25. package/dist/protocol.d.ts +27 -5
  26. package/dist/shellx.d.ts +139 -56
  27. package/dist/shellx.js +201 -88
  28. package/dist/shellx.js.map +1 -1
  29. package/dist/types.d.ts +38 -1
  30. package/dist/utils/retry-helper.js +5 -2
  31. package/dist/utils/retry-helper.js.map +1 -1
  32. package/dist/utils.js +6 -3
  33. package/dist/utils.js.map +1 -1
  34. package/package.json +25 -4
  35. package/dist/shell/output-buffer.d.ts +0 -152
  36. package/dist/shell/output-buffer.js +0 -163
  37. package/dist/shell/output-buffer.js.map +0 -1
  38. package/dist/shell/shell-command-executor.d.ts +0 -182
  39. package/dist/shell/shell-command-executor.js +0 -348
  40. package/dist/shell/shell-command-executor.js.map +0 -1
package/dist/index.d.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import { type Logger } from "./logger.js";
2
+ import type { ShellX } from "./shellx.js";
2
3
  export interface TaskClientConfig {
3
4
  timeout: number;
4
5
  reconnect: boolean;
@@ -10,10 +11,13 @@ export interface TaskClientConfig {
10
11
  onError: (error?: Error | Event) => void;
11
12
  onReconnectFailed: () => void;
12
13
  onMessage?: (data: unknown) => void;
14
+ /** Called whenever the device sends an ASR recognition result */
15
+ onAsrResult?: (text: string, source: string) => void;
16
+ /** Called when an OpenClaw Gateway message is received */
17
+ onGatewayMessage?: (message: GatewayMsg) => void;
13
18
  }
14
19
  export declare const DEFAULT_CONFIG: TaskClientConfig;
15
- import type { WsClient, ActionSequence, ScreenShotOptions, ElementSelector, FindOptions, WaitOptions, UIHierarchy, WAITElement, ScreenShotResponse, ScreenInfoResponse } from "./protocol.js";
16
- import type { Buffer } from "buffer";
20
+ import type { WsClient, ActionSequence, ScreenShotOptions, ElementSelector, FindOptions, WaitOptions, UIHierarchy, WAITElement, ScreenShotResponse, ScreenInfoResponse, GatewayMsg } from "./protocol.js";
17
21
  /**
18
22
  * WebSocket interface for cross-platform compatibility
19
23
  */
@@ -24,16 +28,10 @@ interface IWebSocket {
24
28
  onerror?: ((event: Event) => void) | null;
25
29
  onclose?: ((event: CloseEvent) => void) | null;
26
30
  readyState?: number;
27
- send(data: ArrayBuffer | Uint8Array | string | Blob | Buffer): void;
31
+ send(data: ArrayBuffer | Uint8Array | string | Blob): void;
28
32
  close(code?: number, reason?: string): void;
29
33
  ping?(data?: ArrayBuffer | string): void;
30
34
  }
31
- /**
32
- * Interface for ShellX instance
33
- */
34
- interface IShellXInstance {
35
- handleShellOutput: (chunks: [number, number, Uint8Array[]]) => void;
36
- }
37
35
  export interface TaskResponse<T = unknown> {
38
36
  taskId?: string;
39
37
  success?: boolean;
@@ -91,7 +89,7 @@ export declare class ConnectionClient {
91
89
  constructor(deviceId: string | undefined, config?: Partial<TaskClientConfig>, logger?: Logger);
92
90
  private init;
93
91
  private authenticateDevice;
94
- getFetch(): (url: string, options?: RequestInit) => Promise<unknown>;
92
+ getFetch(): (url: string, options?: RequestInit) => Promise<any>;
95
93
  /**
96
94
  * Wait for WebSocket initialization complete
97
95
  */
@@ -112,41 +110,40 @@ export declare class ConnectionClient {
112
110
  ensureConnected(timeout?: number): Promise<void>;
113
111
  private handleMessage;
114
112
  private processServerMessage;
113
+ /**
114
+ * Handle OpenClaw Gateway messages forwarded from the backend
115
+ */
116
+ private handleGatewayMessage;
115
117
  private matchResponseByType;
116
118
  private handleJsonDataResponse;
117
119
  sendMessage<T = unknown>(message: WsClient, taskType?: string): Promise<T>;
118
120
  /**
119
- * Find UI elements on the screen
121
+ * Find UI elements on the screen (using dataRequest)
120
122
  */
121
123
  findElement(selector: ElementSelector, options?: FindOptions): Promise<UIHierarchy>;
122
124
  /**
123
- * Wait for UI element to appear
125
+ * Wait for UI element to appear (using dataRequest)
124
126
  */
125
127
  waitElement(selector: ElementSelector, options?: WaitOptions): Promise<WAITElement>;
126
- /**
127
- * Take a screenshot
128
- * @deprecated Use takeScreenshot() instead for consistency
129
- */
130
- screenShot(options?: ScreenShotOptions): Promise<ScreenShotResponse>;
131
128
  /**
132
129
  * Take a screenshot
133
130
  */
134
131
  takeScreenshot(options?: ScreenShotOptions): Promise<ScreenShotResponse>;
135
132
  /**
136
- * Get screen information
133
+ * Screenshot method (alias for takeScreenshot)
134
+ * This method is required by IConnectionClient interface
137
135
  */
138
- getScreenInfo(): Promise<ScreenInfoResponse>;
136
+ screenshot(options?: ScreenShotOptions): Promise<ScreenShotResponse>;
139
137
  /**
140
138
  * Get screen information
141
- * @deprecated Use wakeScreenAndGetInfo() instead for clarity
142
139
  */
143
- getAndWakeScreen(): Promise<ScreenInfoResponse>;
140
+ getScreenInfo(): Promise<ScreenInfoResponse>;
144
141
  /**
145
142
  * Wake screen and get screen information
146
143
  */
147
144
  wakeScreenAndGetInfo(): Promise<ScreenInfoResponse>;
148
145
  /**
149
- * Execute an action sequence
146
+ * Execute an action sequence (using dataRequest)
150
147
  */
151
148
  executeAction(actionSequence: ActionSequence, taskId?: string, timeout?: number): Promise<unknown>;
152
149
  /**
@@ -188,12 +185,24 @@ export declare class ConnectionClient {
188
185
  stopAsr(): Promise<void>;
189
186
  /**
190
187
  * Speak text using TTS (Text-to-Speech)
188
+ * @param text - Text to synthesise
189
+ * @param voiceType - Optional Volcengine voice type (e.g. "BV001_streaming")
191
190
  */
192
- speak(text: string): Promise<void>;
191
+ speak(text: string, voiceType?: string, volume?: number): Promise<void>;
193
192
  /**
194
193
  * Stop TTS (Text-to-Speech)
195
194
  */
196
195
  stopTts(): Promise<void>;
196
+ /**
197
+ * Close a terminal session by ID
198
+ *
199
+ * @param sid - Session ID (terminal ID) to close
200
+ *
201
+ * @example
202
+ * // Close terminal with ID 12345
203
+ * await client.closeTerminal(12345);
204
+ */
205
+ closeTerminal(sid: number): Promise<void>;
197
206
  /**
198
207
  * Send raw message (for custom integrations)
199
208
  */
@@ -245,7 +254,7 @@ export declare class ConnectionClient {
245
254
  /**
246
255
  * Set associated ShellX instance
247
256
  */
248
- setShellX(shellx: IShellXInstance): void;
257
+ setShellX(shellx: ShellX): void;
249
258
  /**
250
259
  * Create a timeout result object based on task type
251
260
  * @param taskType Type of the task that timed out
@@ -256,7 +265,7 @@ export declare class ConnectionClient {
256
265
  /**
257
266
  * Get associated ShellX instance
258
267
  */
259
- getShellX(): IShellXInstance | null;
268
+ getShellX(): ShellX | null;
260
269
  /**
261
270
  * Manually complete task by taskId
262
271
  * @param taskId Task ID
@@ -283,9 +292,10 @@ export declare class ConnectionClient {
283
292
  completeTasksByType(taskType: string, result?: unknown, success?: boolean): number;
284
293
  }
285
294
  export default ConnectionClient;
286
- export type { UIElement, ElementSelector, ActionSequence, WsClient, WsServer, ScreenShotOptions, FindOptions, WaitOptions, UIHierarchy, WAITElement, ScreenShotResponse, ScreenInfoResponse, } from "./protocol.js";
295
+ export type { UIElement, ElementSelector, ActionSequence, WsClient, WsServer, ScreenShotOptions, FindOptions, WaitOptions, UIHierarchy, WAITElement, ScreenShotResponse, ScreenInfoResponse, GatewayMsg, } from "./protocol.js";
287
296
  export type { Click, Input, Swipe, Key, Wait, Find, Command, AppInfo, Screenshot, BaseResult, Element, FindResult, CommandResult, Action, Clipboard, Actions, ClickResult, InputResult, SwipeResult, KeyResult, WaitResult, ClipboardResult, AppInfoResult, ScreenshotResult, ScreenInfoResult, ExecuteActionsResult, OperationResult, } from "./types.js";
288
297
  export { ShellX, type ShellXOptions } from "./shellx.js";
298
+ export { type AddAlarmRequest, type AddAlarmResponse, type DeleteAlarmRequest, type DeleteAlarmResponse, type GetAlarmListRequest, type GetAlarmListResponse, type AlarmInfo, type LaunchAppRequest, type LaunchAppResponse, type ExitAppRequest, type ExitAppResponse, type InstallAppRequest, type InstallAppResponse, type UninstallAppRequest, type UninstallAppResponse, type GetAppInfoRequest, type GetAppInfoResponse, type GetAppListRequest, type GetAppListResponse, type AppListInfo, type GetCalendarListRequest, type GetCalendarListResponse, type GetCalendarEventRequest, type GetCalendarEventResponse, type GetCalendarDetailRequest, type GetCalendarDetailResponse, type AddCalendarEventRequest, type AddCalendarEventResponse, type CalendarInfo, type CalendarEvent, type CalendarEventReminder, type CalendarEventAttendee, type GetCallsRequest, type GetCallsResponse, type GetCallsStatRequest, type GetCallsStatResponse, type CallRecord, type CallStat, type SearchContactsRequest, type SearchContactsResponse, type AddContactRequest, type AddContactResponse, type DeleteContactRequest, type DeleteContactResponse, type Contact, type ContactPhoneNumber, type SetWifiRequest, type SetWifiResponse, type SetMobileDataRequest, type SetMobileDataResponse, type SetFlashlightRequest, type SetFlashlightResponse, type SetVolumeRequest, type SetVolumeResponse, type SetBrightnessRequest, type SetBrightnessResponse, type SetFontSizeRequest, type SetFontSizeResponse, } from "./data/index.js";
289
299
  export { type ErrorResponse, type BaseOperationResult, createErrorResponse, createOperationResult, ErrorCode, ShellXError, ConnectionError, OperationError, ElementError, ValidationError, } from "./errors.js";
290
300
  export { extractErrorMessage, createErrorResult, createSuccessResult, handleOperation, handleOperationWithRetry, validateRequired, validateOneOfRequired, wrapErrorWithContext, type ErrorHandlerOptions, type ErrorHandlerResult, } from "./error-handler.js";
291
301
  export { buildDeviceApiUrl, buildDeviceConfigUrl, buildDeviceRegisterUrl, buildTokenVerifyUrl, buildTokenRefreshUrl, getApiBaseUrl, getWsServerUrl, getCurrentDomainInfo, createDomainConfig, isChineseUser, type DomainConfig, } from "./domain-manager.js";
package/dist/index.js CHANGED
@@ -2,38 +2,10 @@ import { v4 as uuidv4 } from "uuid";
2
2
  import { wait } from "./utils.js";
3
3
  import { buildDeviceApiUrl } from "./domain-manager.js";
4
4
  import { createLogger } from "./logger.js";
5
- // Add readable timestamp to all logs
6
- const formatTs = () => {
7
- const now = new Date();
8
- return now.toISOString();
9
- };
5
+ // Create logger for ConnectionClient
6
+ const logger = createLogger("ConnectionClient");
7
+ // Execution log store for debugging
10
8
  const executionLogStore = [];
11
- const formatLogArgs = (args) => args
12
- .map((arg) => {
13
- if (typeof arg === "string")
14
- return arg;
15
- try {
16
- return JSON.stringify(arg);
17
- }
18
- catch {
19
- return String(arg);
20
- }
21
- })
22
- .join(" ");
23
- const pushExecutionLog = (ts, args) => {
24
- executionLogStore.push(`[${ts}] ${formatLogArgs(args)}`);
25
- };
26
- const originalConsoleLog = console.log.bind(console);
27
- const originalConsoleWarn = console.warn.bind(console);
28
- const originalConsoleError = console.error.bind(console);
29
- const wrapConsole = (originalFn) => (...args) => {
30
- const ts = formatTs();
31
- pushExecutionLog(ts, args);
32
- originalFn(`[${ts}]`, ...args);
33
- };
34
- console.log = wrapConsole(originalConsoleLog);
35
- console.warn = wrapConsole(originalConsoleWarn);
36
- console.error = wrapConsole(originalConsoleError);
37
9
  // Define default configuration
38
10
  export const DEFAULT_CONFIG = {
39
11
  timeout: 5000,
@@ -57,17 +29,18 @@ async function getWebSocket() {
57
29
  }
58
30
  // Dynamically import ws module in Node.js environment
59
31
  try {
60
- const wsModule = await import("ws");
32
+ const wsModule = await import(/* @vite-ignore */ "ws");
61
33
  // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access
62
34
  return (wsModule.default || wsModule);
63
35
  }
64
36
  catch {
65
- console.error("❌ [WebSocket] ws module is not available, please install: npm install ws");
66
- throw new Error("WebSocket not available. Please install ws module: npm install ws");
37
+ logger.error("❌ [WebSocket] Native WebSocket unavailable and 'ws' package not found.", "Node.js < 20 requires: npm install ws");
38
+ throw new Error("WebSocket not available. " +
39
+ "In Node.js < 20 please install the 'ws' package: npm install ws");
67
40
  }
68
41
  }
69
42
  function isPendingTask(taskType) {
70
- return taskType && taskType !== "command" && taskType !== "oneway";
43
+ return taskType && taskType !== "oneway";
71
44
  }
72
45
  /**
73
46
  * Low-level WebSocket Client for direct protocol communication
@@ -148,12 +121,12 @@ export class ConnectionClient {
148
121
  // Resolve initialization promise after WebSocket is connected
149
122
  resolve();
150
123
  // Check if we're in Android sandbox environment
151
- const isAndroidSandbox = process.env["SANDBOX"] === "android";
124
+ const isAndroidSandbox = typeof process !== "undefined" && process?.env["SANDBOX"] === "android";
152
125
  if (isAndroidSandbox) {
153
126
  void this.getScreenInfo();
154
127
  }
155
128
  else {
156
- void this.getAndWakeScreen();
129
+ void this.wakeScreenAndGetInfo();
157
130
  }
158
131
  };
159
132
  this.ws.onmessage = (event) => {
@@ -209,7 +182,7 @@ export class ConnectionClient {
209
182
  const localResp = await fetchWithTimeout("http://127.0.0.1:9091/info", { method: "GET", headers: { Accept: "application/json" } }, 1000);
210
183
  if (localResp.ok) {
211
184
  const info = (await localResp.json());
212
- if (info && (info.status === "ok" || info.status === 1) && info.uuid) {
185
+ if (info && info.uuid) {
213
186
  if (this.deviceId === undefined || this.deviceId === info.uuid) {
214
187
  const localUuid = info.uuid;
215
188
  this.deviceId = localUuid;
@@ -243,8 +216,8 @@ export class ConnectionClient {
243
216
  if (jsonData?.status === "not_registered") {
244
217
  this.logger.error("❌ [Auth] Device not registered");
245
218
  this.logger.error(`📝 [Auth] Status: ${jsonData.message || "Device not registered"}`);
246
- this.logger.error("💡 [Auth] Please register this device on ShellX.ai platform first");
247
- throw new Error(`Device "${this.deviceId}" not registered on ShellX.ai platform. Please visit https://shellx.ai to register the device first.`);
219
+ this.logger.error("💡 [Auth] Please register this device on ShellX platform first");
220
+ throw new Error(`Device "${this.deviceId}" not registered on ShellX platform. Please visit https://shellx.cc to register the device first.`);
248
221
  }
249
222
  // Verify authentication response is valid
250
223
  // If interface returns null or missing required fields, device is not registered
@@ -283,7 +256,7 @@ export class ConnectionClient {
283
256
  return response.json();
284
257
  }
285
258
  try {
286
- const { default: fetch } = await import("node-fetch");
259
+ const { default: fetch } = await import(/* @vite-ignore */ "node-fetch");
287
260
  this.logger.info(`🌐 [Fetch] Request: ${options?.method || "GET"} ${url}`);
288
261
  // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-argument
289
262
  const response = await fetch(url, options);
@@ -400,6 +373,10 @@ export class ConnectionClient {
400
373
  //console.log('📨 [ShellX] Received server message:', message);
401
374
  // Call user-defined message handler
402
375
  this.config.onMessage?.(message);
376
+ // Handle OpenClaw Gateway messages
377
+ if (message.gatewayMsg) {
378
+ this.handleGatewayMessage(message.gatewayMsg);
379
+ }
403
380
  // Handle specific message types and resolve pending tasks
404
381
  if (message.jsonData) {
405
382
  this.handleJsonDataResponse(message.jsonData);
@@ -415,6 +392,65 @@ export class ConnectionClient {
415
392
  this.logger.error("❌ [ShellX] Server error:", message.error);
416
393
  }
417
394
  }
395
+ /**
396
+ * Handle OpenClaw Gateway messages forwarded from the backend
397
+ */
398
+ handleGatewayMessage(gatewayMsgRaw) {
399
+ // gatewayMsg may be a JSON string or already parsed object
400
+ let gatewayMsg;
401
+ if (typeof gatewayMsgRaw === "string") {
402
+ try {
403
+ gatewayMsg = JSON.parse(gatewayMsgRaw);
404
+ this.logger.debug("📨 [Gateway] Parsed Gateway message:", gatewayMsg);
405
+ }
406
+ catch (error) {
407
+ this.logger.error("❌ [Gateway] Failed to parse Gateway message:", gatewayMsgRaw, error);
408
+ return;
409
+ }
410
+ }
411
+ else {
412
+ gatewayMsg = gatewayMsgRaw;
413
+ }
414
+ this.logger.info("📨 [Gateway] Received Gateway message:", gatewayMsg);
415
+ // Call user-defined Gateway message handler
416
+ this.config.onGatewayMessage?.(gatewayMsg);
417
+ // Handle Gateway response messages (for requests sent via gatewayReq)
418
+ if (gatewayMsg.type === "res") {
419
+ // This is a response to a Gateway request
420
+ if (gatewayMsg.ok === false && gatewayMsg.error) {
421
+ this.logger.error("❌ [Gateway] Error response:", gatewayMsg.error.code, gatewayMsg.error.message);
422
+ }
423
+ // Note: Gateway request responses are not matched with pending tasks
424
+ // because Gateway requests are typically fire-and-forget or handled via callbacks
425
+ }
426
+ // Handle Gateway event messages
427
+ if (gatewayMsg.type === "event" && gatewayMsg.event) {
428
+ this.logger.debug("📨 [Gateway] Event:", gatewayMsg.event);
429
+ // Handle specific Gateway events
430
+ switch (gatewayMsg.event) {
431
+ case "chat":
432
+ // Chat events contain streaming responses
433
+ if (gatewayMsg.payload) {
434
+ this.logger.debug("📨 [Gateway] Chat payload:", gatewayMsg.payload);
435
+ }
436
+ break;
437
+ case "agent":
438
+ // Agent events contain tool execution updates
439
+ if (gatewayMsg.payload) {
440
+ this.logger.debug("📨 [Gateway] Agent payload:", gatewayMsg.payload);
441
+ }
442
+ break;
443
+ case "health":
444
+ // Health check events
445
+ if (gatewayMsg.payload) {
446
+ this.logger.debug("📨 [Gateway] Health payload:", gatewayMsg.payload);
447
+ }
448
+ break;
449
+ default:
450
+ this.logger.debug("📨 [Gateway] Unknown event type:", gatewayMsg.event);
451
+ }
452
+ }
453
+ }
418
454
  matchResponseByType(jsonData, taskType) {
419
455
  switch (taskType) {
420
456
  case "findElement":
@@ -427,9 +463,9 @@ export class ConnectionClient {
427
463
  return jsonData.waitElement;
428
464
  }
429
465
  break;
430
- case "screenShot":
431
- if (jsonData.screenShot) {
432
- return jsonData.screenShot;
466
+ case "screenshot":
467
+ if (jsonData.screenshot) {
468
+ return jsonData.screenshot;
433
469
  }
434
470
  break;
435
471
  case "screenInfo":
@@ -438,16 +474,23 @@ export class ConnectionClient {
438
474
  }
439
475
  break;
440
476
  case "appList":
441
- console.log("appList");
442
- console.log(jsonData);
477
+ logger.debug("appList:", jsonData);
443
478
  if (jsonData.appList) {
444
479
  return jsonData.appList;
445
480
  }
446
481
  break;
447
482
  case "action":
483
+ // Try multiple possible response fields for action type
448
484
  if (jsonData.action_event) {
449
485
  return jsonData.action_event;
450
486
  }
487
+ // Server may return action-specific fields like screenshot, screenInfo, etc.
488
+ if (jsonData.screenshot) {
489
+ return jsonData.screenshot;
490
+ }
491
+ if (jsonData.screenInfo) {
492
+ return jsonData.screenInfo;
493
+ }
451
494
  break;
452
495
  case "promptflowx":
453
496
  if (jsonData.promptflowx) {
@@ -462,10 +505,26 @@ export class ConnectionClient {
462
505
  case "asrControl":
463
506
  // ASR control doesn't expect a specific response
464
507
  return { success: true };
508
+ case "dataRequest":
509
+ // Data module request response
510
+ if (jsonData.dataRequest) {
511
+ return jsonData.dataRequest.result;
512
+ }
513
+ break;
514
+ case "command":
515
+ // Shell command result (ptyType=0): server responds with commandResult field
516
+ if (jsonData.command !== undefined) {
517
+ return jsonData.command;
518
+ }
519
+ break;
465
520
  }
466
521
  return undefined;
467
522
  }
468
523
  handleJsonDataResponse(jsonData) {
524
+ // Route ASR results (device-initiated, no taskId)
525
+ if (jsonData.asr && this.config.onAsrResult) {
526
+ this.config.onAsrResult(jsonData.asr.text, jsonData.asr.source);
527
+ }
469
528
  const responseTaskId = jsonData?.taskId;
470
529
  if (responseTaskId) {
471
530
  const task = this.pendingTasks.get(responseTaskId);
@@ -485,7 +544,8 @@ export class ConnectionClient {
485
544
  }
486
545
  }
487
546
  else {
488
- this.logger.warn(`⚠️ [ShellX] Received taskId ${responseTaskId} but no pending task found`);
547
+ // 改为 debug 级别,因为可能是延迟的异步响应或已被 fallback 处理
548
+ this.logger.debug(`⚠️ [ShellX] Received taskId ${responseTaskId} but no pending task found (may be handled by fallback)`);
489
549
  }
490
550
  }
491
551
  else {
@@ -512,7 +572,7 @@ export class ConnectionClient {
512
572
  }
513
573
  // ===== PROTOCOL-AWARE TASK METHODS =====
514
574
  /**
515
- * Find UI elements on the screen
575
+ * Find UI elements on the screen (using dataRequest)
516
576
  */
517
577
  async findElement(selector, options) {
518
578
  const findAction = {
@@ -520,10 +580,11 @@ export class ConnectionClient {
520
580
  selector,
521
581
  options,
522
582
  };
523
- return this.sendMessage({ findElement: findAction }, "findElement");
583
+ // 转换为 dataRequest 格式
584
+ return this.sendMessage({ dataRequest: { requestType: "findElement", params: findAction } }, "dataRequest");
524
585
  }
525
586
  /**
526
- * Wait for UI element to appear
587
+ * Wait for UI element to appear (using dataRequest)
527
588
  */
528
589
  async waitElement(selector, options) {
529
590
  const waitAction = {
@@ -531,45 +592,41 @@ export class ConnectionClient {
531
592
  selector,
532
593
  options,
533
594
  };
534
- return this.sendMessage({ waitElement: waitAction }, "waitElement");
535
- }
536
- /**
537
- * Take a screenshot
538
- * @deprecated Use takeScreenshot() instead for consistency
539
- */
540
- async screenShot(options) {
541
- return this.takeScreenshot(options);
595
+ // 转换为 dataRequest 格式
596
+ return this.sendMessage({ dataRequest: { requestType: "waitElement", params: waitAction } }, "dataRequest");
542
597
  }
543
598
  /**
544
599
  * Take a screenshot
545
600
  */
546
601
  async takeScreenshot(options) {
547
- return this.sendMessage({
548
- screenShot: options || {
602
+ const screenshotAction = {
603
+ type: "screenshot",
604
+ ...(options || {
549
605
  format: "png",
550
606
  quality: 30,
551
607
  scale: 1.0,
552
- region: {
553
- left: 0,
554
- top: 0,
555
- width: 1080,
556
- height: 2340,
557
- },
608
+ }),
609
+ };
610
+ // Use sendMessageWithTaskId to set a longer timeout for screenshots (16 seconds)
611
+ const { promise } = await this.sendMessageWithTaskId({
612
+ action: {
613
+ action: screenshotAction,
558
614
  },
559
- }, "screenShot");
615
+ }, "screenshot", { timeout: 16000 });
616
+ return await promise;
560
617
  }
561
618
  /**
562
- * Get screen information
619
+ * Screenshot method (alias for takeScreenshot)
620
+ * This method is required by IConnectionClient interface
563
621
  */
564
- async getScreenInfo() {
565
- return this.sendMessage({ screenInfo: { keepScreenOn: false, wakeApp: true } }, "screenInfo");
622
+ async screenshot(options = { format: "png" }) {
623
+ return await this.takeScreenshot(options);
566
624
  }
567
625
  /**
568
626
  * Get screen information
569
- * @deprecated Use wakeScreenAndGetInfo() instead for clarity
570
627
  */
571
- async getAndWakeScreen() {
572
- return this.wakeScreenAndGetInfo();
628
+ async getScreenInfo() {
629
+ return this.sendMessage({ screenInfo: { keepScreenOn: false, wakeApp: true } }, "screenInfo");
573
630
  }
574
631
  /**
575
632
  * Wake screen and get screen information
@@ -578,10 +635,10 @@ export class ConnectionClient {
578
635
  return this.sendMessage({ screenInfo: { keepScreenOn: true, wakeApp: true } }, "screenInfo");
579
636
  }
580
637
  /**
581
- * Execute an action sequence
638
+ * Execute an action sequence (using dataRequest)
582
639
  */
583
640
  async executeAction(actionSequence, taskId, timeout) {
584
- const { promise } = await this.sendMessageWithTaskId({ actions: actionSequence }, "action", { taskId, timeout });
641
+ const { promise } = await this.sendMessageWithTaskId({ dataRequest: { requestType: "actions", params: actionSequence } }, "dataRequest", { taskId, timeout });
585
642
  return await promise;
586
643
  }
587
644
  /**
@@ -636,9 +693,19 @@ export class ConnectionClient {
636
693
  }
637
694
  /**
638
695
  * Speak text using TTS (Text-to-Speech)
696
+ * @param text - Text to synthesise
697
+ * @param voiceType - Optional Volcengine voice type (e.g. "BV001_streaming")
639
698
  */
640
- async speak(text) {
641
- return this.sendMessage({ ttsControl: { action: "tts_speak", text } }, "oneway");
699
+ async speak(text, voiceType, volume) {
700
+ const ctrl = {
701
+ action: "tts_speak",
702
+ text,
703
+ };
704
+ if (voiceType)
705
+ ctrl.voice_type = voiceType;
706
+ if (volume !== undefined)
707
+ ctrl.volume = volume;
708
+ return this.sendMessage({ ttsControl: ctrl }, "oneway");
642
709
  }
643
710
  /**
644
711
  * Stop TTS (Text-to-Speech)
@@ -646,6 +713,19 @@ export class ConnectionClient {
646
713
  async stopTts() {
647
714
  return this.sendMessage({ ttsControl: { action: "tts_stop" } }, "oneway");
648
715
  }
716
+ /**
717
+ * Close a terminal session by ID
718
+ *
719
+ * @param sid - Session ID (terminal ID) to close
720
+ *
721
+ * @example
722
+ * // Close terminal with ID 12345
723
+ * await client.closeTerminal(12345);
724
+ */
725
+ async closeTerminal(sid) {
726
+ this.logger.info(`🔌 [ConnectionClient] Closing terminal: sid=${sid}`);
727
+ return this.sendMessage({ close: sid }, "oneway");
728
+ }
649
729
  /**
650
730
  * Send raw message (for custom integrations)
651
731
  */
@@ -658,12 +738,16 @@ export class ConnectionClient {
658
738
  * Detect task type from message content
659
739
  */
660
740
  detectTaskType(message) {
741
+ // Check for dataRequest first (new protocol)
742
+ if (message.dataRequest)
743
+ return "dataRequest";
744
+ // Legacy protocol support
661
745
  if (message.findElement)
662
746
  return "findElement";
663
747
  if (message.waitElement)
664
748
  return "waitElement";
665
- if (message.screenShot)
666
- return "screenShot";
749
+ if (message.screenshot)
750
+ return "screenshot";
667
751
  if (message.screenInfo)
668
752
  return "screenInfo";
669
753
  if (message.appList)
@@ -691,6 +775,8 @@ export class ConnectionClient {
691
775
  return "oneway";
692
776
  if (message.ping)
693
777
  return "oneway";
778
+ if (message.close !== undefined)
779
+ return "oneway";
694
780
  // Default to oneway for unknown message types
695
781
  return "oneway";
696
782
  }
@@ -704,10 +790,7 @@ export class ConnectionClient {
704
790
  async sendMessageWithTaskId(message, taskType, options) {
705
791
  return new Promise((resolve) => {
706
792
  const { timeout, taskId: definedTaskId } = options || {};
707
- let taskId = definedTaskId;
708
- if (taskId == null) {
709
- taskId = uuidv4();
710
- }
793
+ const taskId = definedTaskId ?? uuidv4();
711
794
  this.logger.info(`📋 [ShellX] Created task: ${taskId}, Task type: ${taskType}`);
712
795
  this.logger.info(message);
713
796
  const outgoingMessage = taskId ? { ...message, taskId } : message;
@@ -766,8 +849,30 @@ export class ConnectionClient {
766
849
  else {
767
850
  this.logger.info("⏳ [ShellX] Connection not ready, message queued");
768
851
  this.messageQueue.push(outgoingMessage);
769
- // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
770
- resolve({ taskId, promise: Promise.resolve(undefined) });
852
+ if (isPendingTask(taskType)) {
853
+ // Attach the real promise to the already-registered pendingTask so the
854
+ // caller actually waits for the response once the connection is flushed.
855
+ // (Previously this branch returned Promise.resolve(undefined), causing
856
+ // callers to receive undefined even though the message was queued and
857
+ // the response would eventually arrive.)
858
+ const promise = new Promise((promiseResolve, promiseReject) => {
859
+ if (taskId != null) {
860
+ const task = this.pendingTasks.get(taskId);
861
+ if (task) {
862
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-explicit-any
863
+ task.resolve = promiseResolve;
864
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-explicit-any
865
+ task.reject = promiseReject;
866
+ }
867
+ }
868
+ });
869
+ resolve({ taskId, promise });
870
+ }
871
+ else {
872
+ // Fire-and-forget messages (oneway/command) don't expect a response.
873
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-explicit-any
874
+ resolve({ taskId, promise: Promise.resolve(undefined) });
875
+ }
771
876
  }
772
877
  });
773
878
  }
@@ -830,6 +935,12 @@ export class ConnectionClient {
830
935
  this.wsConnected = false;
831
936
  this.authenticated = false;
832
937
  this.ws?.close();
938
+ // Cancel all pending task timers and resolve them with a connection-closed
939
+ // result so callers don't hang forever. Must happen before clearing the map.
940
+ for (const [, task] of this.pendingTasks) {
941
+ clearTimeout(task.timer);
942
+ task.resolve(this.createTimeoutResult(task.type, 0));
943
+ }
833
944
  this.pendingTasks.clear();
834
945
  this.messageQueue = [];
835
946
  this.stopPing();
@@ -900,7 +1011,7 @@ export class ConnectionClient {
900
1011
  case "action":
901
1012
  // For action tasks, return a basic result
902
1013
  return baseResult;
903
- case "screenShot":
1014
+ case "screenshot":
904
1015
  return {
905
1016
  ...baseResult,
906
1017
  imageData: "",