@amaster.ai/client 1.0.0-alpha.2 → 1.0.0-beta.72

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/types/asr.d.ts CHANGED
@@ -1,3 +1,4 @@
1
+ import type{ HttpClient } from "./http";
1
2
  /**
2
3
  * ASR Realtime WebSocket Client for Qwen-ASR Realtime API
3
4
  *
@@ -6,6 +7,8 @@
6
7
  *
7
8
  * @example
8
9
  * ```typescript
10
+ * import { createClient } from "@amaster.ai/client";
11
+ * export const client = createClient({});
9
12
  * const asrClient = client.asr({
10
13
  * language: "zh",
11
14
  * enableVAD: true,
@@ -27,211 +30,298 @@
27
30
  * await asrClient.close();
28
31
  * ```
29
32
  */
30
- export type ASRLanguage = "zh" | "yue" | "en" | "ja" | "de" | "ko" | "ru" | "fr" | "pt" | "ar" | "it" | "es" | "hi" | "id" | "th" | "tr" | "uk" | "vi" | "cs" | "da" | "fil" | "fi" | "is" | "ms" | "no" | "pl" | "sv";
31
- export type ClientEventType = "session.update" | "input_audio_buffer.append" | "input_audio_buffer.commit" | "session.finish";
32
- export type ServerEventType = "session.created" | "session.updated" | "input_audio_buffer.speech_started" | "input_audio_buffer.speech_stopped" | "input_audio_buffer.committed" | "conversation.item.input_audio_transcription.text" | "conversation.item.input_audio_transcription.completed" | "session.finished" | "error";
33
+ export type ASRLanguage =
34
+ | "zh"
35
+ | "yue"
36
+ | "en"
37
+ | "ja"
38
+ | "de"
39
+ | "ko"
40
+ | "ru"
41
+ | "fr"
42
+ | "pt"
43
+ | "ar"
44
+ | "it"
45
+ | "es"
46
+ | "hi"
47
+ | "id"
48
+ | "th"
49
+ | "tr"
50
+ | "uk"
51
+ | "vi"
52
+ | "cs"
53
+ | "da"
54
+ | "fil"
55
+ | "fi"
56
+ | "is"
57
+ | "ms"
58
+ | "no"
59
+ | "pl"
60
+ | "sv";
61
+ export type ClientEventType =
62
+ | "session.update"
63
+ | "input_audio_buffer.append"
64
+ | "input_audio_buffer.commit"
65
+ | "session.finish";
66
+ export type ServerEventType =
67
+ | "session.created"
68
+ | "session.updated"
69
+ | "input_audio_buffer.speech_started"
70
+ | "input_audio_buffer.speech_stopped"
71
+ | "input_audio_buffer.committed"
72
+ | "conversation.item.input_audio_transcription.text"
73
+ | "conversation.item.input_audio_transcription.completed"
74
+ | "session.finished"
75
+ | "error";
33
76
  export interface BaseEvent {
34
- event_id: string;
35
- type: ClientEventType | ServerEventType;
77
+ event_id: string;
78
+ type: ClientEventType | ServerEventType;
36
79
  }
37
80
  export interface SessionUpdateEvent extends BaseEvent {
38
- type: "session.update";
39
- session: SessionConfig;
81
+ type: "session.update";
82
+ session: SessionConfig;
40
83
  }
41
84
  export interface InputAudioBufferAppendEvent extends BaseEvent {
42
- type: "input_audio_buffer.append";
43
- audio: string;
85
+ type: "input_audio_buffer.append";
86
+ audio: string;
44
87
  }
45
88
  export interface InputAudioBufferCommitEvent extends BaseEvent {
46
- type: "input_audio_buffer.commit";
89
+ type: "input_audio_buffer.commit";
47
90
  }
48
91
  export interface SessionFinishEvent extends BaseEvent {
49
- type: "session.finish";
92
+ type: "session.finish";
50
93
  }
51
- type ClientEvent = SessionUpdateEvent | InputAudioBufferAppendEvent | InputAudioBufferCommitEvent | SessionFinishEvent;
94
+ type ClientEvent =
95
+ | SessionUpdateEvent
96
+ | InputAudioBufferAppendEvent
97
+ | InputAudioBufferCommitEvent
98
+ | SessionFinishEvent;
52
99
  export interface SessionCreatedEvent extends BaseEvent {
53
- type: "session.created";
54
- session: {
55
- id: string;
56
- };
100
+ type: "session.created";
101
+ session: {
102
+ id: string;
103
+ };
57
104
  }
58
105
  export interface SessionUpdatedEvent extends BaseEvent {
59
- type: "session.updated";
60
- session: SessionConfig;
106
+ type: "session.updated";
107
+ session: SessionConfig;
61
108
  }
62
109
  export interface SpeechStartedEvent extends BaseEvent {
63
- type: "input_audio_buffer.speech_started";
110
+ type: "input_audio_buffer.speech_started";
64
111
  }
65
112
  export interface SpeechStoppedEvent extends BaseEvent {
66
- type: "input_audio_buffer.speech_stopped";
113
+ type: "input_audio_buffer.speech_stopped";
67
114
  }
68
115
  export interface InputAudioBufferCommittedEvent extends BaseEvent {
69
- type: "input_audio_buffer.committed";
116
+ type: "input_audio_buffer.committed";
70
117
  }
71
118
  export interface TranscriptionTextEvent extends BaseEvent {
72
- type: "conversation.item.input_audio_transcription.text";
73
- text?: string;
74
- stash?: string;
75
- transcript?: string;
119
+ type: "conversation.item.input_audio_transcription.text";
120
+ text?: string;
121
+ stash?: string;
122
+ transcript?: string;
76
123
  }
77
124
  export interface TranscriptionCompletedEvent extends BaseEvent {
78
- type: "conversation.item.input_audio_transcription.completed";
79
- text?: string;
80
- transcript?: string;
125
+ type: "conversation.item.input_audio_transcription.completed";
126
+ text?: string;
127
+ transcript?: string;
81
128
  }
82
129
  export interface SessionFinishedEvent extends BaseEvent {
83
- type: "session.finished";
130
+ type: "session.finished";
84
131
  }
85
132
  export interface ErrorEvent extends BaseEvent {
86
- type: "error";
87
- error: {
88
- message: string;
89
- code?: string;
90
- };
133
+ type: "error";
134
+ error: {
135
+ message: string;
136
+ code?: string;
137
+ };
91
138
  }
92
- export type ServerEvent = SessionCreatedEvent | SessionUpdatedEvent | SpeechStartedEvent | SpeechStoppedEvent | InputAudioBufferCommittedEvent | TranscriptionTextEvent | TranscriptionCompletedEvent | SessionFinishedEvent | ErrorEvent;
139
+ export type ServerEvent =
140
+ | SessionCreatedEvent
141
+ | SessionUpdatedEvent
142
+ | SpeechStartedEvent
143
+ | SpeechStoppedEvent
144
+ | InputAudioBufferCommittedEvent
145
+ | TranscriptionTextEvent
146
+ | TranscriptionCompletedEvent
147
+ | SessionFinishedEvent
148
+ | ErrorEvent;
93
149
  export interface TurnDetectionConfig {
94
- type: "server_vad";
95
- /** VAD检测阈值,推荐设为 0.0,默认值 0.2,范围 [-1, 1] */
96
- threshold?: number;
97
- /** VAD断句检测阈值(ms),推荐设为 400,默认值 800,范围 [200, 6000] */
98
- silence_duration_ms?: number;
150
+ type: "server_vad";
151
+ /** VAD检测阈值,推荐设为 0.0,默认值 0.2,范围 [-1, 1] */
152
+ threshold?: number;
153
+ /** VAD断句检测阈值(ms),推荐设为 400,默认值 800,范围 [200, 6000] */
154
+ silence_duration_ms?: number;
99
155
  }
100
156
  export interface InputAudioTranscriptionConfig {
101
- language?: ASRLanguage;
157
+ language?: ASRLanguage;
102
158
  }
103
159
  export interface SessionConfig {
104
- input_audio_format?: "pcm" | "opus";
105
- sample_rate?: 16000 | 8000;
106
- input_audio_transcription?: InputAudioTranscriptionConfig;
107
- turn_detection?: TurnDetectionConfig | null;
160
+ input_audio_format?: "pcm" | "opus";
161
+ sample_rate?: 16000 | 8000;
162
+ input_audio_transcription?: InputAudioTranscriptionConfig;
163
+ turn_detection?: TurnDetectionConfig | null;
108
164
  }
109
165
  export interface ASRClientConfig {
110
- /**
111
- * Audio format
112
- * @default "pcm"
113
- */
114
- audioFormat?: "pcm" | "opus";
115
- /**
116
- * Sample rate in Hz
117
- * @default 16000
118
- * @description 支持 16000 和 8000。设置为 8000 时,服务端会先升采样到16000Hz再进行识别,可能引入微小延迟。
119
- */
120
- sampleRate?: 16000 | 8000;
121
- /**
122
- * Audio source language
123
- * @default "zh"
124
- * @description 支持多种语言,包括 zh(中文)、yue(粤语)、en(英文)、ja(日语)等
125
- */
126
- language?: ASRLanguage;
127
- /**
128
- * Enable VAD (Voice Activity Detection) mode
129
- * @default true
130
- * @description true = VAD模式(服务端自动检测语音开始/结束),false = Manual模式(客户端手动控制)
131
- */
132
- enableVAD?: boolean;
133
- /**
134
- * VAD detection threshold
135
- * @default 0.2
136
- * @description 推荐设为 0.0。取值范围 [-1, 1]。较低的阈值会提高 VAD 的灵敏度。
137
- */
138
- vadThreshold?: number;
139
- /**
140
- * VAD silence duration threshold in milliseconds
141
- * @default 800
142
- * @description 推荐设为 400。取值范围 [200, 6000]。静音持续时长超过该阈值将被认为是语句结束。
143
- */
144
- vadSilenceDurationMs?: number;
145
- /**
146
- * Get access token for WebSocket authentication
147
- */
148
- getAccessToken?: () => string | null;
149
- /**
150
- * Called when connection is ready (session.created received and session.update sent)
151
- */
152
- onReady?: () => void;
153
- /**
154
- * Called when speech is detected (VAD mode only)
155
- */
156
- onSpeechStart?: () => void;
157
- /**
158
- * Called when speech stops (VAD mode only)
159
- */
160
- onSpeechEnd?: () => void;
161
- /**
162
- * Called on transcript result
163
- * @param text - Transcribed text
164
- * @param isFinal - Whether this is the final result
165
- */
166
- onTranscript?: (text: string, isFinal: boolean) => void;
167
- /**
168
- * Called when audio buffer is committed (non-VAD mode only)
169
- */
170
- onAudioBufferCommitted?: () => void;
171
- /**
172
- * Called when session is finished
173
- */
174
- onSessionFinished?: () => void;
175
- /**
176
- * Called on error
177
- */
178
- onError?: (error: Error) => void;
179
- /**
180
- * Called on close
181
- */
182
- onClose?: () => void;
166
+ /**
167
+ * Audio format
168
+ * @default "pcm"
169
+ */
170
+ audioFormat?: "pcm" | "opus";
171
+ /**
172
+ * Sample rate in Hz
173
+ * @default 16000
174
+ * @description 支持 16000 和 8000。设置为 8000 时,服务端会先升采样到16000Hz再进行识别,可能引入微小延迟。
175
+ */
176
+ sampleRate?: 16000 | 8000;
177
+ /**
178
+ * Audio source language
179
+ * @default "zh"
180
+ * @description 支持多种语言,包括 zh(中文)、yue(粤语)、en(英文)、ja(日语)等
181
+ */
182
+ language?: ASRLanguage;
183
+ /**
184
+ * Enable VAD (Voice Activity Detection) mode
185
+ * @default true
186
+ * @description true = VAD模式(服务端自动检测语音开始/结束),false = Manual模式(客户端手动控制)
187
+ */
188
+ enableVAD?: boolean;
189
+ /**
190
+ * VAD detection threshold
191
+ * @default 0.2
192
+ * @description 推荐设为 0.0。取值范围 [-1, 1]。较低的阈值会提高 VAD 的灵敏度。
193
+ */
194
+ vadThreshold?: number;
195
+ /**
196
+ * VAD silence duration threshold in milliseconds
197
+ * @default 800
198
+ * @description 推荐设为 400。取值范围 [200, 6000]。静音持续时长超过该阈值将被认为是语句结束。
199
+ */
200
+ vadSilenceDurationMs?: number;
201
+ /**
202
+ * Get access token for WebSocket authentication
203
+ */
204
+ getAccessToken?: () => string | null;
205
+ /**
206
+ * Called when connection is ready (session.created received and session.update sent)
207
+ */
208
+ onReady?: () => void;
209
+ /**
210
+ * Called when speech is detected (VAD mode only)
211
+ */
212
+ onSpeechStart?: () => void;
213
+ /**
214
+ * Called when speech stops (VAD mode only)
215
+ */
216
+ onSpeechEnd?: () => void;
217
+ /**
218
+ * Called on transcript result
219
+ * @param text - Transcribed text
220
+ * @param isFinal - Whether this is the final result
221
+ */
222
+ onTranscript?: (text: string, isFinal: boolean) => void;
223
+ /**
224
+ * Called when audio buffer is committed (non-VAD mode only)
225
+ */
226
+ onAudioBufferCommitted?: () => void;
227
+ /**
228
+ * Called when session is finished
229
+ */
230
+ onSessionFinished?: () => void;
231
+ /**
232
+ * Called on error
233
+ */
234
+ onError?: (error: Error) => void;
235
+ /**
236
+ * Called on close
237
+ */
238
+ onClose?: () => void;
183
239
  }
184
240
  export interface ASRClient {
185
- /** Connect to ASR service and establish session */
186
- connect(): Promise<void>;
187
- /** Start recording from microphone */
188
- startRecording(): Promise<void>;
189
- /**
190
- * Stop recording
191
- * @description In non-VAD mode, this triggers recognition by sending input_audio_buffer.commit
192
- */
193
- stopRecording(): Promise<void>;
194
- /**
195
- * Close connection gracefully
196
- * @description Sends session.finish and waits for session.finished before closing
197
- */
198
- close(): Promise<void>;
199
- /**
200
- * Check if currently recording
201
- */
202
- isRecording(): boolean;
203
- /**
204
- * Check if connected to server
205
- */
206
- isConnected(): boolean;
207
- }
208
- declare const _default$1: (authConfig: Pick<ASRClientConfig, "getAccessToken">) => (config: ASRClientConfig) => ASRClient;
241
+ /** Connect to ASR service and establish session */
242
+ connect(): Promise<void>;
243
+ /** Start recording from microphone */
244
+ startRecording(): Promise<void>;
245
+ /**
246
+ * Stop recording
247
+ * @description In non-VAD mode, this triggers recognition by sending input_audio_buffer.commit
248
+ */
249
+ stopRecording(): Promise<void>;
250
+ /**
251
+ * Close connection gracefully
252
+ * @description Sends session.finish and waits for session.finished before closing
253
+ */
254
+ close(): Promise<void>;
255
+ /**
256
+ * Check if currently recording
257
+ */
258
+ isRecording(): boolean;
259
+ /**
260
+ * Check if connected to server
261
+ */
262
+ isConnected(): boolean;
263
+ }
264
+ declare const _default$1: (
265
+ authConfig: Pick<ASRClientConfig, "getAccessToken">
266
+ ) => (config: ASRClientConfig) => ASRClient;
267
+
268
+ export interface Recorder {
269
+ /** Start recording */
270
+ start(): Promise<void>;
271
+ /**
272
+ * Stop recording and get base64-encoded WAV audio data. You can use this data to call the ASR API.
273
+ *
274
+ * @returns Base64-encoded WAV audio data
275
+ */
276
+ stop(): Promise<void>;
277
+ }
278
+
279
+ export interface RecorderOptions {
280
+ /** Called when recording starts */
281
+ onStart?: () => void;
282
+ /**
283
+ * Called when recording stops, with base64-encoded WAV audio data. You can use this data to call the ASR API.
284
+ *
285
+ * @param base64 - Base64-encoded WAV audio data
286
+ * @returns void
287
+ */
288
+ onStop?: (base64: string) => void;
289
+ onError?: (error: Error) => void;
290
+ }
291
+
292
+ export type ASRHttpStatus = "idle" | "recording" | "recognizing";
209
293
 
210
294
  export interface ASRHttpClientConfig {
211
- /** Get access token */
212
- getAccessToken?(): string | null;
213
- /** Language, default 'zh' */
214
- language?: string;
215
- /** Sample rate, default 16000 */
216
- sampleRate?: number;
217
- /** Called when recording starts */
218
- onRecordingStart?: () => void;
219
- /** Called when recording stops */
220
- onRecordingStop?: () => void;
221
- /** Called with recognition result */
222
- onResult?: (text: string) => void;
223
- /** Called on error */
224
- onError?: (error: Error) => void;
295
+ /** Get access token */
296
+ http?: HttpClient;
297
+ /** Get access token */
298
+ getAccessToken?(): string | null;
299
+ /** Create custom recorder */
300
+ createRecorder?(options?: RecorderOptions): Promise<Recorder>;
301
+ /** Language, default 'zh' */
302
+ language?: string;
303
+ /** Sample rate, default 16000 */
304
+ sampleRate?: number;
305
+ /** Called when recording starts */
306
+ onRecordingStart?: () => void;
307
+ /** Called when recording stops */
308
+ onRecordingStop?: () => void;
309
+ /** Called with recognition result */
310
+ onResult?: (text: string) => void;
311
+ /** Called on error */
312
+ onError?: (error: Error) => void;
313
+ /** Called when status changes */
314
+ onStatusChange?: (status: ASRHttpStatus) => void;
225
315
  }
226
316
  export interface ASRHttpClient {
227
- /** Start recording (press-to-talk) */
228
- startRecording(): Promise<void>;
229
- /** Stop recording and get result */
230
- stopRecording(): Promise<string>;
231
- /** Record for specific duration then recognize */
232
- recordAndRecognize(durationMs: number): Promise<string>;
233
- /** Recognize audio file (File or Blob) */
234
- recognizeFile(file: File | Blob): Promise<string>;
235
- /** Recognize audio from URL */
236
- recognizeUrl(audioUrl: string): Promise<string>;
237
- }
317
+ /** Start recording (press-to-talk) */
318
+ startRecording(): Promise<void>;
319
+ /** Stop recording and get result */
320
+ stopRecording(): Promise<string>;
321
+ /** Record for specific duration then recognize */
322
+ recordAndRecognize(durationMs: number): Promise<string>;
323
+ /** Recognize audio file (File or Blob) */
324
+ recognizeFile(file: File | Blob): Promise<string>;
325
+ /** Recognize audio from URL */
326
+ recognizeUrl(audioUrl: string): Promise<string>;
327
+ }
@@ -71,14 +71,14 @@ export interface CaptchaResponse {
71
71
  export interface CodeAuthAPI {
72
72
  /**
73
73
  * Login with verification code
74
- *
74
+ *
75
75
  * Authenticates using a verification code sent to email or phone.
76
- *
76
+ *
77
77
  * @param params - Email/phone and verification code
78
78
  * @returns User info and access token
79
79
  *
80
80
  */
81
- codeLogin(params: CodeLoginParams): Promise<ClientResult<LoginResponse>>;
81
+ loginWithCode(params: CodeLoginParams): Promise<ClientResult<LoginResponse>>;
82
82
 
83
83
  /**
84
84
  * Send verification code
@@ -98,6 +98,33 @@ export interface AuthClientAPI
98
98
  */
99
99
  refreshToken(): Promise<ClientResult<RefreshTokenResponse>>;
100
100
 
101
+ // ==================== Event Handling ====================
102
+
103
+ /**
104
+ * Subscribe to authentication events
105
+ *
106
+ * Available events:
107
+ * - `login` - Fired after successful login, receives User object
108
+ * - `logout` - Fired after logout
109
+ * - `tokenExpired` - Fired when access token expires
110
+ * - `tokenRefreshed` - Fired after token refresh, receives new token
111
+ * - `unauthorized` - Fired on 401 response
112
+ *
113
+ * @param event - Event name to subscribe to
114
+ * @param handler - Callback function
115
+ *
116
+ */
117
+ on(event: "login" | "logout" | "tokenExpired" | "tokenRefreshed" | "unauthorized", handler: (...args: unknown[]) => void): void;
118
+
119
+ /**
120
+ * Unsubscribe from authentication events
121
+ *
122
+ * @param event - Event name to unsubscribe from
123
+ * @param handler - The same callback function that was passed to `on()`
124
+ *
125
+ */
126
+ off(event: "login" | "logout" | "tokenExpired" | "tokenRefreshed" | "unauthorized", handler: (...args: unknown[]) => void): void;
127
+
101
128
  // ==================== Permission Checks ====================
102
129
 
103
130
  /**
@@ -12,7 +12,7 @@
12
12
  *
13
13
  * Roles and permissions are returned as simple string arrays for efficient client-side use:
14
14
  * - `roles`: ["admin", "user", "manager"]
15
- * - `permissions`: ["user:read", "user:write", "order:read"]
15
+ * - `permissions`: ["user.read", "user.write", "order.read"]
16
16
  *
17
17
  */
18
18
  export interface User {
@@ -61,7 +61,7 @@ export interface User {
61
61
 
62
62
  /**
63
63
  * Permission names granted to user
64
- * Format: "resource:action"
64
+ * Format: "resource.action"
65
65
  */
66
66
  permissions: string[];
67
67