shellx-ai 1.1.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -161,7 +161,7 @@ export type JSONData = {
161
161
  findElement?: UIHierarchy;
162
162
  findAllElement?: string;
163
163
  waitElement?: WAITElement;
164
- screenShot?: ScreenShotResponse;
164
+ screenshot?: ScreenShotResponse;
165
165
  screenInfo?: ScreenInfoResponse;
166
166
  screenChange?: ScreenChangeEvent;
167
167
  appList?: AppListResponse;
@@ -170,6 +170,17 @@ export type JSONData = {
170
170
  action_event?: ClickAction | KeyAction | SwipeAction;
171
171
  /** Forwarded openclaw gateway message */
172
172
  gatewayMsg?: GatewayMsg;
173
+ /** Data module response */
174
+ dataRequest?: {
175
+ requestType: string;
176
+ result: unknown;
177
+ };
178
+ /** Shell command execution result (ptyType=0) */
179
+ command?: {
180
+ success: boolean;
181
+ result?: string;
182
+ errorMessage?: string;
183
+ };
173
184
  };
174
185
  /** Position and size of a window, see the Rust version. */
175
186
  export type WsWinsize = {
@@ -206,6 +217,8 @@ export type WsServer = {
206
217
  jsonData?: JSONData;
207
218
  uiClick?: [string, number, number];
208
219
  uiRefresh?: boolean;
220
+ /** Gateway message is sent as JSON string from Java, needs parsing in SDK */
221
+ gatewayMsg?: string;
209
222
  };
210
223
  /** Screen capture region */
211
224
  export type ScreenRegion = {
@@ -223,6 +236,7 @@ export type ScreenShotOptions = {
223
236
  isRecording?: boolean;
224
237
  saveToFile?: boolean;
225
238
  duration?: number;
239
+ timeout?: number;
226
240
  };
227
241
  /** Client message type, see the Rust version. */
228
242
  export type WsClient = {
@@ -240,7 +254,7 @@ export type WsClient = {
240
254
  ping?: bigint;
241
255
  findElement?: findAction;
242
256
  waitElement?: WaitAction;
243
- screenShot?: ScreenShotOptions;
257
+ screenshot?: ScreenShotOptions;
244
258
  screenInfo?: {
245
259
  keepScreenOn?: boolean;
246
260
  wakeApp?: boolean;
@@ -264,6 +278,11 @@ export type WsClient = {
264
278
  switchNode?: string;
265
279
  asrControl?: AsrControl;
266
280
  ttsControl?: TtsControl;
281
+ /** Data module request */
282
+ dataRequest?: {
283
+ requestType: string;
284
+ params: unknown;
285
+ };
267
286
  /** Openclaw gateway request to forward to the gateway */
268
287
  gatewayReq?: {
269
288
  type: "req";
@@ -276,7 +295,7 @@ export type ShellXActions = {
276
295
  taskId?: string;
277
296
  title?: string;
278
297
  thought?: string;
279
- action: ShellCommandAction | ClickAction | InputAction | SwipeAction | KeyAction | WaitAction | findAction | ClipboardAction | getAppInfoAction | completeAction;
298
+ action: ShellCommandAction | ClickAction | InputAction | SwipeAction | KeyAction | WaitAction | findAction | ClipboardAction | getAppInfoAction | ScreenShotAction | completeAction;
280
299
  };
281
300
  export interface ShellCommandAction {
282
301
  title?: string;
@@ -285,10 +304,12 @@ export interface ShellCommandAction {
285
304
  thinking?: string;
286
305
  note?: string;
287
306
  command: string;
307
+ ptyType?: 0 | 1;
308
+ terminalId?: number;
288
309
  }
289
310
  export interface ScreenShotAction {
290
311
  title?: string;
291
- type: "screenShot";
312
+ type: "screenshot";
292
313
  activity?: string;
293
314
  thinking?: string;
294
315
  note?: string;
@@ -301,7 +322,7 @@ export interface ScreenShotAction {
301
322
  }
302
323
  export interface getAppInfoAction {
303
324
  title?: string;
304
- type: "get_app_info";
325
+ type: "getAppInfo";
305
326
  activity?: string;
306
327
  thinking?: string;
307
328
  note?: string;
@@ -462,5 +483,7 @@ export type AsrControl = {
462
483
  export type TtsControl = {
463
484
  action: "tts_speak" | "tts_stop";
464
485
  text?: string;
486
+ voice_type?: string;
487
+ volume?: number;
465
488
  };
466
489
  export {};
package/dist/shellx.d.ts CHANGED
@@ -7,14 +7,13 @@
7
7
  * @module shellx
8
8
  */
9
9
  import type { ElementSelector, UIElement, WsServer } from "./protocol.js";
10
- import type { Actions, AppInfo, AppInfoResult, BaseResult, Click, ClickResult, Clipboard, ClipboardResult, Command, CommandResult as TypesCommandResult, Element, ExecuteActionsResult, Find, FindResult as TypesFindResult, Input, InputResult, Key, KeyResult, ScreenInfoResult, Screenshot, ScreenshotResult, Swipe, SwipeResult, Wait, WaitResult } from "./types.js";
10
+ import type { Actions, AppInfo, AppInfoResult, Asr, AsrResult, BaseResult, Click, ClickResult, Clipboard, ClipboardResult, Command, CommandResult as TypesCommandResult, Element, ExecuteActionsResult, Find, FindResult as TypesFindResult, Input, InputResult, Key, KeyResult, ScreenInfoResult, Screenshot, ScreenshotResult, Swipe, SwipeResult, Tts, TtsResult, Wait, WaitResult } from "./types.js";
11
11
  import ConnectionClient from "./index.js";
12
12
  import { type LogLevel } from "./logger.js";
13
13
  import type { WsClient } from "./protocol.js";
14
14
  export type { UIElement, ElementSelector };
15
15
  export type { LogLevel } from "./logger.js";
16
- export type { Click, Input, Swipe, Key, Wait, Find, Command, AppInfo, Screenshot, Clipboard, Actions, ClickResult, InputResult, SwipeResult, KeyResult, WaitResult, TypesFindResult as FindResult, TypesCommandResult as CommandResult, ClipboardResult, AppInfoResult, ScreenshotResult, ScreenInfoResult, ExecuteActionsResult, BaseResult, Element, };
17
- export type { ShellCommandResult, ShellCommandOptions } from "./shell/output-buffer.js";
16
+ export type { Click, Input, Swipe, Key, Wait, Find, Command, AppInfo, Screenshot, Clipboard, Asr, AsrResult, Tts, TtsResult, Actions, ClickResult, InputResult, SwipeResult, KeyResult, WaitResult, TypesFindResult as FindResult, TypesCommandResult as CommandResult, ClipboardResult, AppInfoResult, ScreenshotResult, ScreenInfoResult, ExecuteActionsResult, BaseResult, Element, };
18
17
  /**
19
18
  * Configuration options for ShellX instance
20
19
  */
@@ -43,6 +42,23 @@ export interface ShellXOptions {
43
42
  onReconnectFailed?: () => void;
44
43
  /** Callback for WebSocket messages */
45
44
  onMessage?: (message: WsServer) => void;
45
+ onGatewayMessage?: (message: any) => void;
46
+ /**
47
+ * Called every time the device sends an ASR recognition result.
48
+ * Use this to receive speech-to-text output without handling raw messages.
49
+ *
50
+ * @example
51
+ * ```typescript
52
+ * const shellx = new ShellX({
53
+ * deviceId: 'your-device-id',
54
+ * onAsrResult: (text) => console.log('Recognised:', text)
55
+ * });
56
+ * await shellx.startAsr();
57
+ * // ...user speaks...
58
+ * await shellx.stopAsr();
59
+ * ```
60
+ */
61
+ onAsrResult?: (text: string) => void;
46
62
  }
47
63
  /**
48
64
  * ShellX - High-level Android automation SDK (Recommended API)
@@ -96,8 +112,6 @@ export declare class ShellX {
96
112
  private uiActionHandler;
97
113
  /** Element finder for locating UI elements */
98
114
  private elementFinder;
99
- /** Shell command executor for running shell commands */
100
- private shellCommandExecutor;
101
115
  /** Connection client for device communication */
102
116
  private client;
103
117
  /**
@@ -349,7 +363,7 @@ export declare class ShellX {
349
363
  *
350
364
  * @example
351
365
  * ```typescript
352
- * // Simplified - execute command
366
+ * // Simplified - execute command (ptyType=0, shell execution with result)
353
367
  * const result = await shellx.command('ls -la');
354
368
  *
355
369
  * // Simplified - execute with timeout
@@ -358,6 +372,12 @@ export declare class ShellX {
358
372
  * // Full - execute with all options
359
373
  * const result = await shellx.command({ cmd: 'ls -la', timeout: 5000 });
360
374
  *
375
+ * // PTY execution (ptyType=1, one-way execution without result)
376
+ * const result = await shellx.command({ cmd: 'ls -la', ptyType: 1 });
377
+ *
378
+ * // PTY execution with specific terminal (ptyType=1 with terminalId)
379
+ * const result = await shellx.command({ cmd: 'ls -la', ptyType: 1, terminalId: 12345 });
380
+ *
361
381
  * console.log(result.output);
362
382
  * ```
363
383
  */
@@ -542,14 +562,6 @@ export declare class ShellX {
542
562
  visibleOnly?: boolean;
543
563
  clickableOnly?: boolean;
544
564
  }): Promise<UIElement[]>;
545
- /**
546
- * Wait for any of multiple elements to appear
547
- * @deprecated Use waitAnyElement() instead for consistency
548
- */
549
- waitForAnyElement(selectors: ElementSelector[], timeout?: number): Promise<{
550
- element: UIElement;
551
- selectorIndex: number;
552
- } | null>;
553
565
  /**
554
566
  * Wait for any of multiple elements to appear
555
567
  *
@@ -591,25 +603,6 @@ export declare class ShellX {
591
603
  * ```
592
604
  */
593
605
  scrollToFindElement(selector: ElementSelector, maxScrolls?: number, direction?: "up" | "down"): Promise<UIElement | null>;
594
- /**
595
- * Handle shell output from WebSocket messages
596
- *
597
- * This method should be called in the WebSocket message handler.
598
- *
599
- * @param chunks - Shell output chunks
600
- *
601
- * @example
602
- * ```typescript
603
- * const client = new ConnectionClient(deviceId, {
604
- * onMessage: (message) => {
605
- * if (message.chunks) {
606
- * shellx.handleShellOutput(message.chunks);
607
- * }
608
- * }
609
- * });
610
- * ```
611
- */
612
- handleShellOutput(chunks: [number, number, Uint8Array[]]): void;
613
606
  /**
614
607
  * Send raw WebSocket message (for advanced use cases)
615
608
  *
@@ -649,41 +642,65 @@ export declare class ShellX {
649
642
  */
650
643
  sendRawMessage(message: WsClient): Promise<unknown>;
651
644
  /**
652
- * Start ASR (Automatic Speech Recognition)
653
- * @returns Promise that resolves when speech recognition has started
645
+ * Start ASR (Automatic Speech Recognition) on the device.
646
+ *
647
+ * The device will begin capturing microphone audio and streaming recognition
648
+ * results back. Each result is delivered via the `onAsrResult` callback
649
+ * configured in `ShellXOptions`, or you can listen to raw messages via
650
+ * `onMessage` and check `message.jsonData?.asr`.
651
+ *
652
+ * @param options - Optional ASR options (e.g. per-call `onResult` callback)
653
+ * @returns Promise that resolves when the start command has been sent
654
654
  *
655
655
  * @example
656
656
  * ```typescript
657
- * await shellx.startSpeechRecognition();
658
- * console.log('Speech recognition started');
657
+ * const shellx = new ShellX({
658
+ * deviceId: 'your-device-id',
659
+ * onAsrResult: (text) => console.log('ASR:', text)
660
+ * });
661
+ * await shellx.ready();
662
+ * await shellx.startAsr();
663
+ * // ...user speaks into device microphone...
664
+ * await shellx.stopAsr();
659
665
  * ```
660
666
  */
661
- startSpeechRecognition(): Promise<void>;
667
+ startAsr(options?: Asr): Promise<void>;
662
668
  /**
663
- * Stop ASR (Automatic Speech Recognition)
664
- * @returns Promise that resolves when speech recognition has stopped
669
+ * Stop ASR (Automatic Speech Recognition) on the device.
670
+ *
671
+ * @returns Promise that resolves when the stop command has been sent
665
672
  *
666
673
  * @example
667
674
  * ```typescript
668
- * await shellx.stopSpeechRecognition();
669
- * console.log('Speech recognition stopped');
675
+ * await shellx.stopAsr();
670
676
  * ```
671
677
  */
672
- stopSpeechRecognition(): Promise<void>;
678
+ stopAsr(): Promise<void>;
673
679
  /**
674
- * Speak text aloud using TTS (Text-to-Speech)
675
- * @param text - The text to speak
676
- * @returns Promise that resolves when TTS has started speaking
680
+ * Speak text aloud using TTS (Text-to-Speech) on the device.
681
+ *
682
+ * @param text - The text to synthesise and play
683
+ * @param options - Optional TTS options (e.g. voice type override)
684
+ * @returns Promise resolving to TtsResult when the command is sent
677
685
  *
678
686
  * @example
679
687
  * ```typescript
688
+ * // Simple usage
680
689
  * await shellx.speak('Hello, world!');
690
+ *
691
+ * // With voice type
692
+ * await shellx.speak('你好,世界!', { voiceType: 'BV002_streaming' });
693
+ *
694
+ * // Or pass a Tts object
695
+ * await shellx.speak({ text: '你好!', voiceType: 'BV002_streaming' });
681
696
  * ```
682
697
  */
683
- speak(text: string): Promise<void>;
698
+ speak(text: string, options?: Omit<Tts, "text">): Promise<TtsResult>;
699
+ speak(ttsData: Tts): Promise<TtsResult>;
684
700
  /**
685
- * Stop TTS (Text-to-Speech)
686
- * @returns Promise that resolves when TTS has stopped speaking
701
+ * Stop TTS (Text-to-Speech) on the device.
702
+ *
703
+ * @returns Promise that resolves when the stop command has been sent
687
704
  *
688
705
  * @example
689
706
  * ```typescript
@@ -692,16 +709,82 @@ export declare class ShellX {
692
709
  */
693
710
  stopSpeaking(): Promise<void>;
694
711
  /**
695
- * Static factory method to create shell output handler
696
- * @deprecated This method is kept for backward compatibility but is no longer needed
697
- * @param shellx - The ShellX instance
698
- * @returns Message handler function
712
+ * Close a terminal session by ID
713
+ *
714
+ * This method closes a PTY (pseudo-terminal) session and releases its resources.
715
+ * The terminal ID is the same as the one used in the `terminalId` parameter
716
+ * when executing commands.
717
+ *
718
+ * @param terminalId - Terminal ID (sid) to close
719
+ * @returns Promise that resolves when the close command has been sent
699
720
  *
700
721
  * @example
701
722
  * ```typescript
702
- * const shellx = new ShellX({ deviceId: 'device-id' });
703
- * const handler = ShellX.createShellOutputHandler(shellx);
723
+ * // Execute command in a specific terminal
724
+ * await shellx.command({ cmd: 'pwd', ptyType: 1, terminalId: 12345 });
725
+ *
726
+ * // Close the terminal when done
727
+ * await shellx.closeTerminal(12345);
728
+ * ```
729
+ */
730
+ closeTerminal(terminalId: number): Promise<void>;
731
+ /**
732
+ * Close the WebSocket connection and cleanup resources
733
+ *
734
+ * This method closes the underlying WebSocket connection to the device.
735
+ * All pending tasks will be cancelled with a connection-closed result.
736
+ *
737
+ * After calling this method, the ShellX instance cannot be used for further
738
+ * operations until a new connection is established.
739
+ *
740
+ * @example
741
+ * ```typescript
742
+ * // Close the connection when done
743
+ * await shellx.close();
744
+ * ```
745
+ */
746
+ close(): void;
747
+ /**
748
+ * Disconnect from the device (alias for close)
749
+ *
750
+ * @example
751
+ * ```typescript
752
+ * await shellx.disconnect();
753
+ * ```
754
+ */
755
+ disconnect(): void;
756
+ /**
757
+ * Send a data request to the device
758
+ *
759
+ * This is a generic method that allows sending data requests to the device
760
+ * and receiving typed responses. It can be used to implement custom data
761
+ * access functionality beyond what's provided by the ShellX API.
762
+ *
763
+ * @template TResponse - Type of the response
764
+ * @param requestType - The type of data request (e.g., "addAlarm", "getCalls", etc.)
765
+ * @param params - Request parameters
766
+ * @returns Promise resolving to the typed response
767
+ *
768
+ * @example
769
+ * ```typescript
770
+ * // Add an alarm
771
+ * const alarmResult = await shellx.sendDataRequest<AddAlarmResponse>(
772
+ * "addAlarm",
773
+ * { hour: 7, minute: 30, label: "Wake up", enabled: true }
774
+ * );
775
+ *
776
+ * // Get call records
777
+ * const callsResult = await shellx.sendDataRequest<GetCallsResponse>(
778
+ * "getCalls",
779
+ * { limit: 20 }
780
+ * );
781
+ *
782
+ * // Search contacts
783
+ * const contactsResult = await shellx.sendDataRequest<SearchContactsResponse>(
784
+ * "searchContacts",
785
+ * { query: "张三", limit: 10 }
786
+ * );
704
787
  * ```
705
788
  */
706
- static createShellOutputHandler(shellx: ShellX): (message: WsServer) => void;
789
+ sendDataRequest<TResponse>(requestType: string, params: unknown): Promise<TResponse>;
707
790
  }