shellx-ai 1.1.1 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +201 -666
- package/dist/automation/element-finder.d.ts +0 -8
- package/dist/automation/element-finder.js +0 -7
- package/dist/automation/element-finder.js.map +1 -1
- package/dist/automation/ui-action-handler.d.ts +1 -1
- package/dist/automation/ui-action-handler.js +66 -32
- package/dist/automation/ui-action-handler.js.map +1 -1
- package/dist/cbor-compat.js +8 -10
- package/dist/cbor-compat.js.map +1 -1
- package/dist/data/index.d.ts +9 -0
- package/dist/data/index.js +11 -0
- package/dist/data/index.js.map +1 -0
- package/dist/data/types.d.ts +351 -0
- package/dist/data/types.js +8 -0
- package/dist/data/types.js.map +1 -0
- package/dist/domain-manager.js +4 -5
- package/dist/domain-manager.js.map +1 -1
- package/dist/errors.js +4 -2
- package/dist/errors.js.map +1 -1
- package/dist/index.d.ts +48 -25
- package/dist/index.js +226 -51
- package/dist/index.js.map +1 -1
- package/dist/protocol.d.ts +28 -5
- package/dist/shellx.d.ts +139 -56
- package/dist/shellx.js +211 -93
- package/dist/shellx.js.map +1 -1
- package/dist/types.d.ts +38 -1
- package/dist/ui-analyzer.d.ts +90 -0
- package/dist/ui-analyzer.js +333 -0
- package/dist/ui-analyzer.js.map +1 -0
- package/package.json +25 -4
- package/dist/shell/output-buffer.d.ts +0 -152
- package/dist/shell/output-buffer.js +0 -176
- package/dist/shell/output-buffer.js.map +0 -1
- package/dist/shell/shell-command-executor.d.ts +0 -182
- package/dist/shell/shell-command-executor.js +0 -404
- package/dist/shell/shell-command-executor.js.map +0 -1
package/dist/protocol.d.ts
CHANGED
|
@@ -161,7 +161,7 @@ export type JSONData = {
|
|
|
161
161
|
findElement?: UIHierarchy;
|
|
162
162
|
findAllElement?: string;
|
|
163
163
|
waitElement?: WAITElement;
|
|
164
|
-
|
|
164
|
+
screenshot?: ScreenShotResponse;
|
|
165
165
|
screenInfo?: ScreenInfoResponse;
|
|
166
166
|
screenChange?: ScreenChangeEvent;
|
|
167
167
|
appList?: AppListResponse;
|
|
@@ -170,6 +170,17 @@ export type JSONData = {
|
|
|
170
170
|
action_event?: ClickAction | KeyAction | SwipeAction;
|
|
171
171
|
/** Forwarded openclaw gateway message */
|
|
172
172
|
gatewayMsg?: GatewayMsg;
|
|
173
|
+
/** Data module response */
|
|
174
|
+
dataRequest?: {
|
|
175
|
+
requestType: string;
|
|
176
|
+
result: unknown;
|
|
177
|
+
};
|
|
178
|
+
/** Shell command execution result (ptyType=0) */
|
|
179
|
+
command?: {
|
|
180
|
+
success: boolean;
|
|
181
|
+
result?: string;
|
|
182
|
+
errorMessage?: string;
|
|
183
|
+
};
|
|
173
184
|
};
|
|
174
185
|
/** Position and size of a window, see the Rust version. */
|
|
175
186
|
export type WsWinsize = {
|
|
@@ -206,6 +217,8 @@ export type WsServer = {
|
|
|
206
217
|
jsonData?: JSONData;
|
|
207
218
|
uiClick?: [string, number, number];
|
|
208
219
|
uiRefresh?: boolean;
|
|
220
|
+
/** Gateway message is sent as JSON string from Java, needs parsing in SDK */
|
|
221
|
+
gatewayMsg?: string;
|
|
209
222
|
};
|
|
210
223
|
/** Screen capture region */
|
|
211
224
|
export type ScreenRegion = {
|
|
@@ -223,6 +236,7 @@ export type ScreenShotOptions = {
|
|
|
223
236
|
isRecording?: boolean;
|
|
224
237
|
saveToFile?: boolean;
|
|
225
238
|
duration?: number;
|
|
239
|
+
timeout?: number;
|
|
226
240
|
};
|
|
227
241
|
/** Client message type, see the Rust version. */
|
|
228
242
|
export type WsClient = {
|
|
@@ -240,7 +254,7 @@ export type WsClient = {
|
|
|
240
254
|
ping?: bigint;
|
|
241
255
|
findElement?: findAction;
|
|
242
256
|
waitElement?: WaitAction;
|
|
243
|
-
|
|
257
|
+
screenshot?: ScreenShotOptions;
|
|
244
258
|
screenInfo?: {
|
|
245
259
|
keepScreenOn?: boolean;
|
|
246
260
|
wakeApp?: boolean;
|
|
@@ -264,6 +278,11 @@ export type WsClient = {
|
|
|
264
278
|
switchNode?: string;
|
|
265
279
|
asrControl?: AsrControl;
|
|
266
280
|
ttsControl?: TtsControl;
|
|
281
|
+
/** Data module request */
|
|
282
|
+
dataRequest?: {
|
|
283
|
+
requestType: string;
|
|
284
|
+
params: unknown;
|
|
285
|
+
};
|
|
267
286
|
/** Openclaw gateway request to forward to the gateway */
|
|
268
287
|
gatewayReq?: {
|
|
269
288
|
type: "req";
|
|
@@ -276,7 +295,7 @@ export type ShellXActions = {
|
|
|
276
295
|
taskId?: string;
|
|
277
296
|
title?: string;
|
|
278
297
|
thought?: string;
|
|
279
|
-
action: ShellCommandAction | ClickAction | InputAction | SwipeAction | KeyAction | WaitAction | findAction | ClipboardAction | getAppInfoAction | completeAction;
|
|
298
|
+
action: ShellCommandAction | ClickAction | InputAction | SwipeAction | KeyAction | WaitAction | findAction | ClipboardAction | getAppInfoAction | ScreenShotAction | completeAction;
|
|
280
299
|
};
|
|
281
300
|
export interface ShellCommandAction {
|
|
282
301
|
title?: string;
|
|
@@ -285,10 +304,12 @@ export interface ShellCommandAction {
|
|
|
285
304
|
thinking?: string;
|
|
286
305
|
note?: string;
|
|
287
306
|
command: string;
|
|
307
|
+
ptyType?: 0 | 1;
|
|
308
|
+
terminalId?: number;
|
|
288
309
|
}
|
|
289
310
|
export interface ScreenShotAction {
|
|
290
311
|
title?: string;
|
|
291
|
-
type: "
|
|
312
|
+
type: "screenshot";
|
|
292
313
|
activity?: string;
|
|
293
314
|
thinking?: string;
|
|
294
315
|
note?: string;
|
|
@@ -301,7 +322,7 @@ export interface ScreenShotAction {
|
|
|
301
322
|
}
|
|
302
323
|
export interface getAppInfoAction {
|
|
303
324
|
title?: string;
|
|
304
|
-
type: "
|
|
325
|
+
type: "getAppInfo";
|
|
305
326
|
activity?: string;
|
|
306
327
|
thinking?: string;
|
|
307
328
|
note?: string;
|
|
@@ -462,5 +483,7 @@ export type AsrControl = {
|
|
|
462
483
|
export type TtsControl = {
|
|
463
484
|
action: "tts_speak" | "tts_stop";
|
|
464
485
|
text?: string;
|
|
486
|
+
voice_type?: string;
|
|
487
|
+
volume?: number;
|
|
465
488
|
};
|
|
466
489
|
export {};
|
package/dist/shellx.d.ts
CHANGED
|
@@ -7,14 +7,13 @@
|
|
|
7
7
|
* @module shellx
|
|
8
8
|
*/
|
|
9
9
|
import type { ElementSelector, UIElement, WsServer } from "./protocol.js";
|
|
10
|
-
import type { Actions, AppInfo, AppInfoResult, BaseResult, Click, ClickResult, Clipboard, ClipboardResult, Command, CommandResult as TypesCommandResult, Element, ExecuteActionsResult, Find, FindResult as TypesFindResult, Input, InputResult, Key, KeyResult, ScreenInfoResult, Screenshot, ScreenshotResult, Swipe, SwipeResult, Wait, WaitResult } from "./types.js";
|
|
10
|
+
import type { Actions, AppInfo, AppInfoResult, Asr, AsrResult, BaseResult, Click, ClickResult, Clipboard, ClipboardResult, Command, CommandResult as TypesCommandResult, Element, ExecuteActionsResult, Find, FindResult as TypesFindResult, Input, InputResult, Key, KeyResult, ScreenInfoResult, Screenshot, ScreenshotResult, Swipe, SwipeResult, Tts, TtsResult, Wait, WaitResult } from "./types.js";
|
|
11
11
|
import ConnectionClient from "./index.js";
|
|
12
12
|
import { type LogLevel } from "./logger.js";
|
|
13
13
|
import type { WsClient } from "./protocol.js";
|
|
14
14
|
export type { UIElement, ElementSelector };
|
|
15
15
|
export type { LogLevel } from "./logger.js";
|
|
16
|
-
export type { Click, Input, Swipe, Key, Wait, Find, Command, AppInfo, Screenshot, Clipboard, Actions, ClickResult, InputResult, SwipeResult, KeyResult, WaitResult, TypesFindResult as FindResult, TypesCommandResult as CommandResult, ClipboardResult, AppInfoResult, ScreenshotResult, ScreenInfoResult, ExecuteActionsResult, BaseResult, Element, };
|
|
17
|
-
export type { ShellCommandResult, ShellCommandOptions } from "./shell/output-buffer.js";
|
|
16
|
+
export type { Click, Input, Swipe, Key, Wait, Find, Command, AppInfo, Screenshot, Clipboard, Asr, AsrResult, Tts, TtsResult, Actions, ClickResult, InputResult, SwipeResult, KeyResult, WaitResult, TypesFindResult as FindResult, TypesCommandResult as CommandResult, ClipboardResult, AppInfoResult, ScreenshotResult, ScreenInfoResult, ExecuteActionsResult, BaseResult, Element, };
|
|
18
17
|
/**
|
|
19
18
|
* Configuration options for ShellX instance
|
|
20
19
|
*/
|
|
@@ -43,6 +42,23 @@ export interface ShellXOptions {
|
|
|
43
42
|
onReconnectFailed?: () => void;
|
|
44
43
|
/** Callback for WebSocket messages */
|
|
45
44
|
onMessage?: (message: WsServer) => void;
|
|
45
|
+
onGatewayMessage?: (message: any) => void;
|
|
46
|
+
/**
|
|
47
|
+
* Called every time the device sends an ASR recognition result.
|
|
48
|
+
* Use this to receive speech-to-text output without handling raw messages.
|
|
49
|
+
*
|
|
50
|
+
* @example
|
|
51
|
+
* ```typescript
|
|
52
|
+
* const shellx = new ShellX({
|
|
53
|
+
* deviceId: 'your-device-id',
|
|
54
|
+
* onAsrResult: (text) => console.log('Recognised:', text)
|
|
55
|
+
* });
|
|
56
|
+
* await shellx.startAsr();
|
|
57
|
+
* // ...user speaks...
|
|
58
|
+
* await shellx.stopAsr();
|
|
59
|
+
* ```
|
|
60
|
+
*/
|
|
61
|
+
onAsrResult?: (text: string) => void;
|
|
46
62
|
}
|
|
47
63
|
/**
|
|
48
64
|
* ShellX - High-level Android automation SDK (Recommended API)
|
|
@@ -96,8 +112,6 @@ export declare class ShellX {
|
|
|
96
112
|
private uiActionHandler;
|
|
97
113
|
/** Element finder for locating UI elements */
|
|
98
114
|
private elementFinder;
|
|
99
|
-
/** Shell command executor for running shell commands */
|
|
100
|
-
private shellCommandExecutor;
|
|
101
115
|
/** Connection client for device communication */
|
|
102
116
|
private client;
|
|
103
117
|
/**
|
|
@@ -349,7 +363,7 @@ export declare class ShellX {
|
|
|
349
363
|
*
|
|
350
364
|
* @example
|
|
351
365
|
* ```typescript
|
|
352
|
-
* // Simplified - execute command
|
|
366
|
+
* // Simplified - execute command (ptyType=0, shell execution with result)
|
|
353
367
|
* const result = await shellx.command('ls -la');
|
|
354
368
|
*
|
|
355
369
|
* // Simplified - execute with timeout
|
|
@@ -358,6 +372,12 @@ export declare class ShellX {
|
|
|
358
372
|
* // Full - execute with all options
|
|
359
373
|
* const result = await shellx.command({ cmd: 'ls -la', timeout: 5000 });
|
|
360
374
|
*
|
|
375
|
+
* // PTY execution (ptyType=1, one-way execution without result)
|
|
376
|
+
* const result = await shellx.command({ cmd: 'ls -la', ptyType: 1 });
|
|
377
|
+
*
|
|
378
|
+
* // PTY execution with specific terminal (ptyType=1 with terminalId)
|
|
379
|
+
* const result = await shellx.command({ cmd: 'ls -la', ptyType: 1, terminalId: 12345 });
|
|
380
|
+
*
|
|
361
381
|
* console.log(result.output);
|
|
362
382
|
* ```
|
|
363
383
|
*/
|
|
@@ -542,14 +562,6 @@ export declare class ShellX {
|
|
|
542
562
|
visibleOnly?: boolean;
|
|
543
563
|
clickableOnly?: boolean;
|
|
544
564
|
}): Promise<UIElement[]>;
|
|
545
|
-
/**
|
|
546
|
-
* Wait for any of multiple elements to appear
|
|
547
|
-
* @deprecated Use waitAnyElement() instead for consistency
|
|
548
|
-
*/
|
|
549
|
-
waitForAnyElement(selectors: ElementSelector[], timeout?: number): Promise<{
|
|
550
|
-
element: UIElement;
|
|
551
|
-
selectorIndex: number;
|
|
552
|
-
} | null>;
|
|
553
565
|
/**
|
|
554
566
|
* Wait for any of multiple elements to appear
|
|
555
567
|
*
|
|
@@ -591,25 +603,6 @@ export declare class ShellX {
|
|
|
591
603
|
* ```
|
|
592
604
|
*/
|
|
593
605
|
scrollToFindElement(selector: ElementSelector, maxScrolls?: number, direction?: "up" | "down"): Promise<UIElement | null>;
|
|
594
|
-
/**
|
|
595
|
-
* Handle shell output from WebSocket messages
|
|
596
|
-
*
|
|
597
|
-
* This method should be called in the WebSocket message handler.
|
|
598
|
-
*
|
|
599
|
-
* @param chunks - Shell output chunks
|
|
600
|
-
*
|
|
601
|
-
* @example
|
|
602
|
-
* ```typescript
|
|
603
|
-
* const client = new ConnectionClient(deviceId, {
|
|
604
|
-
* onMessage: (message) => {
|
|
605
|
-
* if (message.chunks) {
|
|
606
|
-
* shellx.handleShellOutput(message.chunks);
|
|
607
|
-
* }
|
|
608
|
-
* }
|
|
609
|
-
* });
|
|
610
|
-
* ```
|
|
611
|
-
*/
|
|
612
|
-
handleShellOutput(chunks: [number, number, Uint8Array[]]): void;
|
|
613
606
|
/**
|
|
614
607
|
* Send raw WebSocket message (for advanced use cases)
|
|
615
608
|
*
|
|
@@ -649,41 +642,65 @@ export declare class ShellX {
|
|
|
649
642
|
*/
|
|
650
643
|
sendRawMessage(message: WsClient): Promise<unknown>;
|
|
651
644
|
/**
|
|
652
|
-
* Start ASR (Automatic Speech Recognition)
|
|
653
|
-
*
|
|
645
|
+
* Start ASR (Automatic Speech Recognition) on the device.
|
|
646
|
+
*
|
|
647
|
+
* The device will begin capturing microphone audio and streaming recognition
|
|
648
|
+
* results back. Each result is delivered via the `onAsrResult` callback
|
|
649
|
+
* configured in `ShellXOptions`, or you can listen to raw messages via
|
|
650
|
+
* `onMessage` and check `message.jsonData?.asr`.
|
|
651
|
+
*
|
|
652
|
+
* @param options - Optional ASR options (e.g. per-call `onResult` callback)
|
|
653
|
+
* @returns Promise that resolves when the start command has been sent
|
|
654
654
|
*
|
|
655
655
|
* @example
|
|
656
656
|
* ```typescript
|
|
657
|
-
*
|
|
658
|
-
*
|
|
657
|
+
* const shellx = new ShellX({
|
|
658
|
+
* deviceId: 'your-device-id',
|
|
659
|
+
* onAsrResult: (text) => console.log('ASR:', text)
|
|
660
|
+
* });
|
|
661
|
+
* await shellx.ready();
|
|
662
|
+
* await shellx.startAsr();
|
|
663
|
+
* // ...user speaks into device microphone...
|
|
664
|
+
* await shellx.stopAsr();
|
|
659
665
|
* ```
|
|
660
666
|
*/
|
|
661
|
-
|
|
667
|
+
startAsr(options?: Asr): Promise<void>;
|
|
662
668
|
/**
|
|
663
|
-
* Stop ASR (Automatic Speech Recognition)
|
|
664
|
-
*
|
|
669
|
+
* Stop ASR (Automatic Speech Recognition) on the device.
|
|
670
|
+
*
|
|
671
|
+
* @returns Promise that resolves when the stop command has been sent
|
|
665
672
|
*
|
|
666
673
|
* @example
|
|
667
674
|
* ```typescript
|
|
668
|
-
* await shellx.
|
|
669
|
-
* console.log('Speech recognition stopped');
|
|
675
|
+
* await shellx.stopAsr();
|
|
670
676
|
* ```
|
|
671
677
|
*/
|
|
672
|
-
|
|
678
|
+
stopAsr(): Promise<void>;
|
|
673
679
|
/**
|
|
674
|
-
* Speak text aloud using TTS (Text-to-Speech)
|
|
675
|
-
*
|
|
676
|
-
* @
|
|
680
|
+
* Speak text aloud using TTS (Text-to-Speech) on the device.
|
|
681
|
+
*
|
|
682
|
+
* @param text - The text to synthesise and play
|
|
683
|
+
* @param options - Optional TTS options (e.g. voice type override)
|
|
684
|
+
* @returns Promise resolving to TtsResult when the command is sent
|
|
677
685
|
*
|
|
678
686
|
* @example
|
|
679
687
|
* ```typescript
|
|
688
|
+
* // Simple usage
|
|
680
689
|
* await shellx.speak('Hello, world!');
|
|
690
|
+
*
|
|
691
|
+
* // With voice type
|
|
692
|
+
* await shellx.speak('你好,世界!', { voiceType: 'BV002_streaming' });
|
|
693
|
+
*
|
|
694
|
+
* // Or pass a Tts object
|
|
695
|
+
* await shellx.speak({ text: '你好!', voiceType: 'BV002_streaming' });
|
|
681
696
|
* ```
|
|
682
697
|
*/
|
|
683
|
-
speak(text: string): Promise<
|
|
698
|
+
speak(text: string, options?: Omit<Tts, "text">): Promise<TtsResult>;
|
|
699
|
+
speak(ttsData: Tts): Promise<TtsResult>;
|
|
684
700
|
/**
|
|
685
|
-
* Stop TTS (Text-to-Speech)
|
|
686
|
-
*
|
|
701
|
+
* Stop TTS (Text-to-Speech) on the device.
|
|
702
|
+
*
|
|
703
|
+
* @returns Promise that resolves when the stop command has been sent
|
|
687
704
|
*
|
|
688
705
|
* @example
|
|
689
706
|
* ```typescript
|
|
@@ -692,16 +709,82 @@ export declare class ShellX {
|
|
|
692
709
|
*/
|
|
693
710
|
stopSpeaking(): Promise<void>;
|
|
694
711
|
/**
|
|
695
|
-
*
|
|
696
|
-
*
|
|
697
|
-
*
|
|
698
|
-
*
|
|
712
|
+
* Close a terminal session by ID
|
|
713
|
+
*
|
|
714
|
+
* This method closes a PTY (pseudo-terminal) session and releases its resources.
|
|
715
|
+
* The terminal ID is the same as the one used in the `terminalId` parameter
|
|
716
|
+
* when executing commands.
|
|
717
|
+
*
|
|
718
|
+
* @param terminalId - Terminal ID (sid) to close
|
|
719
|
+
* @returns Promise that resolves when the close command has been sent
|
|
699
720
|
*
|
|
700
721
|
* @example
|
|
701
722
|
* ```typescript
|
|
702
|
-
*
|
|
703
|
-
*
|
|
723
|
+
* // Execute command in a specific terminal
|
|
724
|
+
* await shellx.command({ cmd: 'pwd', ptyType: 1, terminalId: 12345 });
|
|
725
|
+
*
|
|
726
|
+
* // Close the terminal when done
|
|
727
|
+
* await shellx.closeTerminal(12345);
|
|
728
|
+
* ```
|
|
729
|
+
*/
|
|
730
|
+
closeTerminal(terminalId: number): Promise<void>;
|
|
731
|
+
/**
|
|
732
|
+
* Close the WebSocket connection and cleanup resources
|
|
733
|
+
*
|
|
734
|
+
* This method closes the underlying WebSocket connection to the device.
|
|
735
|
+
* All pending tasks will be cancelled with a connection-closed result.
|
|
736
|
+
*
|
|
737
|
+
* After calling this method, the ShellX instance cannot be used for further
|
|
738
|
+
* operations until a new connection is established.
|
|
739
|
+
*
|
|
740
|
+
* @example
|
|
741
|
+
* ```typescript
|
|
742
|
+
* // Close the connection when done
|
|
743
|
+
* await shellx.close();
|
|
744
|
+
* ```
|
|
745
|
+
*/
|
|
746
|
+
close(): void;
|
|
747
|
+
/**
|
|
748
|
+
* Disconnect from the device (alias for close)
|
|
749
|
+
*
|
|
750
|
+
* @example
|
|
751
|
+
* ```typescript
|
|
752
|
+
* await shellx.disconnect();
|
|
753
|
+
* ```
|
|
754
|
+
*/
|
|
755
|
+
disconnect(): void;
|
|
756
|
+
/**
|
|
757
|
+
* Send a data request to the device
|
|
758
|
+
*
|
|
759
|
+
* This is a generic method that allows sending data requests to the device
|
|
760
|
+
* and receiving typed responses. It can be used to implement custom data
|
|
761
|
+
* access functionality beyond what's provided by the ShellX API.
|
|
762
|
+
*
|
|
763
|
+
* @template TResponse - Type of the response
|
|
764
|
+
* @param requestType - The type of data request (e.g., "addAlarm", "getCalls", etc.)
|
|
765
|
+
* @param params - Request parameters
|
|
766
|
+
* @returns Promise resolving to the typed response
|
|
767
|
+
*
|
|
768
|
+
* @example
|
|
769
|
+
* ```typescript
|
|
770
|
+
* // Add an alarm
|
|
771
|
+
* const alarmResult = await shellx.sendDataRequest<AddAlarmResponse>(
|
|
772
|
+
* "addAlarm",
|
|
773
|
+
* { hour: 7, minute: 30, label: "Wake up", enabled: true }
|
|
774
|
+
* );
|
|
775
|
+
*
|
|
776
|
+
* // Get call records
|
|
777
|
+
* const callsResult = await shellx.sendDataRequest<GetCallsResponse>(
|
|
778
|
+
* "getCalls",
|
|
779
|
+
* { limit: 20 }
|
|
780
|
+
* );
|
|
781
|
+
*
|
|
782
|
+
* // Search contacts
|
|
783
|
+
* const contactsResult = await shellx.sendDataRequest<SearchContactsResponse>(
|
|
784
|
+
* "searchContacts",
|
|
785
|
+
* { query: "张三", limit: 10 }
|
|
786
|
+
* );
|
|
704
787
|
* ```
|
|
705
788
|
*/
|
|
706
|
-
|
|
789
|
+
sendDataRequest<TResponse>(requestType: string, params: unknown): Promise<TResponse>;
|
|
707
790
|
}
|