@amaster.ai/client 1.1.0-beta.7 → 1.1.0-beta.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/types/asr.d.ts CHANGED
@@ -1,192 +1,327 @@
1
+ import type{ HttpClient } from "./http";
1
2
  /**
2
- * ============================================================================
3
- * ASR (Automatic Speech Recognition) - Type Definitions
4
- * ============================================================================
5
- *
6
- * Real-time speech recognition using WebSocket connection.
7
- *
8
- * @module asr
9
- */
10
-
11
- /**
12
- * ASR Client Configuration
13
- *
3
+ * ASR Realtime WebSocket Client for Qwen-ASR Realtime API
4
+ *
5
+ * WebSocket-based real-time speech recognition for streaming transcription.
6
+ * Follows the Qwen-ASR Realtime API protocol with proper event handling.
7
+ *
14
8
  * @example
15
- * Configure ASR with custom voice settings:
16
9
  * ```typescript
17
- * const client = createClient({ baseURL: 'https://api.amaster.ai' });
18
- *
19
- * // Reconfigure ASR client
20
- * client.asr = createASRClient({
21
- * audioFormat: 'pcm16',
22
- * sampleRate: 16000,
23
- * onTranscript: (text, isFinal) => {
24
- * console.log(isFinal ? `Final: ${text}` : `Interim: ${text}`);
10
+ * import { createClient } from "@amaster.ai/client";
11
+ * export const client = createClient({});
12
+ * const asrClient = client.asr({
13
+ * language: "zh",
14
+ * enableVAD: true,
15
+ * onReady() {
16
+ * console.log("ASR connected");
17
+ * },
18
+ * onTranscript(text, isFinal) {
19
+ * console.log(isFinal ? "[Final]" : "[Interim]", text);
20
+ * },
21
+ * onError(err) {
22
+ * console.error("ASR error:", err);
25
23
  * },
26
- * onError: (error) => console.error('ASR Error:', error)
27
24
  * });
25
+ *
26
+ * await asrClient.connect();
27
+ * await asrClient.startRecording();
28
+ * // ... stop ...
29
+ * await asrClient.stopRecording();
30
+ * await asrClient.close();
28
31
  * ```
29
32
  */
33
+ export type ASRLanguage =
34
+ | "zh"
35
+ | "yue"
36
+ | "en"
37
+ | "ja"
38
+ | "de"
39
+ | "ko"
40
+ | "ru"
41
+ | "fr"
42
+ | "pt"
43
+ | "ar"
44
+ | "it"
45
+ | "es"
46
+ | "hi"
47
+ | "id"
48
+ | "th"
49
+ | "tr"
50
+ | "uk"
51
+ | "vi"
52
+ | "cs"
53
+ | "da"
54
+ | "fil"
55
+ | "fi"
56
+ | "is"
57
+ | "ms"
58
+ | "no"
59
+ | "pl"
60
+ | "sv";
61
+ export type ClientEventType =
62
+ | "session.update"
63
+ | "input_audio_buffer.append"
64
+ | "input_audio_buffer.commit"
65
+ | "session.finish";
66
+ export type ServerEventType =
67
+ | "session.created"
68
+ | "session.updated"
69
+ | "input_audio_buffer.speech_started"
70
+ | "input_audio_buffer.speech_stopped"
71
+ | "input_audio_buffer.committed"
72
+ | "conversation.item.input_audio_transcription.text"
73
+ | "conversation.item.input_audio_transcription.completed"
74
+ | "session.finished"
75
+ | "error";
76
+ export interface BaseEvent {
77
+ event_id: string;
78
+ type: ClientEventType | ServerEventType;
79
+ }
80
+ export interface SessionUpdateEvent extends BaseEvent {
81
+ type: "session.update";
82
+ session: SessionConfig;
83
+ }
84
+ export interface InputAudioBufferAppendEvent extends BaseEvent {
85
+ type: "input_audio_buffer.append";
86
+ audio: string;
87
+ }
88
+ export interface InputAudioBufferCommitEvent extends BaseEvent {
89
+ type: "input_audio_buffer.commit";
90
+ }
91
+ export interface SessionFinishEvent extends BaseEvent {
92
+ type: "session.finish";
93
+ }
94
+ type ClientEvent =
95
+ | SessionUpdateEvent
96
+ | InputAudioBufferAppendEvent
97
+ | InputAudioBufferCommitEvent
98
+ | SessionFinishEvent;
99
+ export interface SessionCreatedEvent extends BaseEvent {
100
+ type: "session.created";
101
+ session: {
102
+ id: string;
103
+ };
104
+ }
105
+ export interface SessionUpdatedEvent extends BaseEvent {
106
+ type: "session.updated";
107
+ session: SessionConfig;
108
+ }
109
+ export interface SpeechStartedEvent extends BaseEvent {
110
+ type: "input_audio_buffer.speech_started";
111
+ }
112
+ export interface SpeechStoppedEvent extends BaseEvent {
113
+ type: "input_audio_buffer.speech_stopped";
114
+ }
115
+ export interface InputAudioBufferCommittedEvent extends BaseEvent {
116
+ type: "input_audio_buffer.committed";
117
+ }
118
+ export interface TranscriptionTextEvent extends BaseEvent {
119
+ type: "conversation.item.input_audio_transcription.text";
120
+ text?: string;
121
+ stash?: string;
122
+ transcript?: string;
123
+ }
124
+ export interface TranscriptionCompletedEvent extends BaseEvent {
125
+ type: "conversation.item.input_audio_transcription.completed";
126
+ text?: string;
127
+ transcript?: string;
128
+ }
129
+ export interface SessionFinishedEvent extends BaseEvent {
130
+ type: "session.finished";
131
+ }
132
+ export interface ErrorEvent extends BaseEvent {
133
+ type: "error";
134
+ error: {
135
+ message: string;
136
+ code?: string;
137
+ };
138
+ }
139
+ export type ServerEvent =
140
+ | SessionCreatedEvent
141
+ | SessionUpdatedEvent
142
+ | SpeechStartedEvent
143
+ | SpeechStoppedEvent
144
+ | InputAudioBufferCommittedEvent
145
+ | TranscriptionTextEvent
146
+ | TranscriptionCompletedEvent
147
+ | SessionFinishedEvent
148
+ | ErrorEvent;
149
+ export interface TurnDetectionConfig {
150
+ type: "server_vad";
151
+ /** VAD检测阈值,推荐设为 0.0,默认值 0.2,范围 [-1, 1] */
152
+ threshold?: number;
153
+ /** VAD断句检测阈值(ms),推荐设为 400,默认值 800,范围 [200, 6000] */
154
+ silence_duration_ms?: number;
155
+ }
156
+ export interface InputAudioTranscriptionConfig {
157
+ language?: ASRLanguage;
158
+ }
159
+ export interface SessionConfig {
160
+ input_audio_format?: "pcm" | "opus";
161
+ sample_rate?: 16000 | 8000;
162
+ input_audio_transcription?: InputAudioTranscriptionConfig;
163
+ turn_detection?: TurnDetectionConfig | null;
164
+ }
30
165
  export interface ASRClientConfig {
31
- /** Audio format, default 'pcm16' */
32
- audioFormat?: "pcm16" | "g711a" | "g711u";
33
-
34
- /** Sample rate, default 16000 */
35
- sampleRate?: number;
36
-
37
- /** Called when connection is ready */
166
+ /**
167
+ * Audio format
168
+ * @default "pcm"
169
+ */
170
+ audioFormat?: "pcm" | "opus";
171
+ /**
172
+ * Sample rate in Hz
173
+ * @default 16000
174
+ * @description 支持 16000 和 8000。设置为 8000 时,服务端会先升采样到16000Hz再进行识别,可能引入微小延迟。
175
+ */
176
+ sampleRate?: 16000 | 8000;
177
+ /**
178
+ * Audio source language
179
+ * @default "zh"
180
+ * @description 支持多种语言,包括 zh(中文)、yue(粤语)、en(英文)、ja(日语)等
181
+ */
182
+ language?: ASRLanguage;
183
+ /**
184
+ * Enable VAD (Voice Activity Detection) mode
185
+ * @default true
186
+ * @description true = VAD模式(服务端自动检测语音开始/结束),false = Manual模式(客户端手动控制)
187
+ */
188
+ enableVAD?: boolean;
189
+ /**
190
+ * VAD detection threshold
191
+ * @default 0.2
192
+ * @description 推荐设为 0.0。取值范围 [-1, 1]。较低的阈值会提高 VAD 的灵敏度。
193
+ */
194
+ vadThreshold?: number;
195
+ /**
196
+ * VAD silence duration threshold in milliseconds
197
+ * @default 800
198
+ * @description 推荐设为 400。取值范围 [200, 6000]。静音持续时长超过该阈值将被认为是语句结束。
199
+ */
200
+ vadSilenceDurationMs?: number;
201
+ /**
202
+ * Get access token for WebSocket authentication
203
+ */
204
+ getAccessToken?: () => string | null;
205
+ /**
206
+ * Called when connection is ready (session.created received and session.update sent)
207
+ */
38
208
  onReady?: () => void;
39
-
40
- /** Called when speech is detected */
209
+ /**
210
+ * Called when speech is detected (VAD mode only)
211
+ */
41
212
  onSpeechStart?: () => void;
42
-
43
- /** Called when speech stops */
213
+ /**
214
+ * Called when speech stops (VAD mode only)
215
+ */
44
216
  onSpeechEnd?: () => void;
45
-
46
- /**
217
+ /**
47
218
  * Called on transcript result
48
- * @param text - Recognized text
219
+ * @param text - Transcribed text
49
220
  * @param isFinal - Whether this is the final result
50
221
  */
51
222
  onTranscript?: (text: string, isFinal: boolean) => void;
52
-
53
- /** Called on error */
223
+ /**
224
+ * Called when audio buffer is committed (non-VAD mode only)
225
+ */
226
+ onAudioBufferCommitted?: () => void;
227
+ /**
228
+ * Called when session is finished
229
+ */
230
+ onSessionFinished?: () => void;
231
+ /**
232
+ * Called on error
233
+ */
54
234
  onError?: (error: Error) => void;
55
-
56
- /** Called on connection close */
235
+ /**
236
+ * Called on close
237
+ */
57
238
  onClose?: () => void;
58
-
59
- /** WebSocket path override */
60
- path?: string;
61
239
  }
62
-
63
- /**
64
- * ASR Client API
65
- *
66
- * Real-time speech recognition client using WebSocket.
67
- *
68
- * @example
69
- * Basic usage:
70
- * ```typescript
71
- * const client = createClient({ baseURL: 'https://api.amaster.ai' });
72
- *
73
- * // Configure callbacks
74
- * client.asr = createASRClient({
75
- * onTranscript: (text, isFinal) => {
76
- * if (isFinal) {
77
- * console.log('Final transcript:', text);
78
- * } else {
79
- * console.log('Interim transcript:', text);
80
- * }
81
- * }
82
- * });
83
- *
84
- * // Connect and start recording
85
- * await client.asr.connect();
86
- * await client.asr.startRecording();
87
- *
88
- * // Stop recording
89
- * client.asr.stopRecording();
90
- *
91
- * // Close connection
92
- * client.asr.close();
93
- * ```
94
- *
95
- * @example
96
- * With error handling:
97
- * ```typescript
98
- * const client = createClient({ baseURL: 'https://api.amaster.ai' });
99
- *
100
- * client.asr = createASRClient({
101
- * onReady: () => console.log('ASR ready'),
102
- * onSpeechStart: () => console.log('Speech detected'),
103
- * onSpeechEnd: () => console.log('Speech ended'),
104
- * onTranscript: (text, isFinal) => {
105
- * console.log(isFinal ? `[FINAL] ${text}` : `[INTERIM] ${text}`);
106
- * },
107
- * onError: (error) => {
108
- * console.error('ASR Error:', error.message);
109
- * },
110
- * onClose: () => {
111
- * console.log('ASR connection closed');
112
- * }
113
- * });
114
- *
115
- * try {
116
- * await client.asr.connect();
117
- * await client.asr.startRecording();
118
- * } catch (error) {
119
- * console.error('Failed to start ASR:', error);
120
- * }
121
- * ```
122
- */
123
240
  export interface ASRClient {
241
+ /** Connect to ASR service and establish session */
242
+ connect(): Promise<void>;
243
+ /** Start recording from microphone */
244
+ startRecording(): Promise<void>;
124
245
  /**
125
- * Connect to ASR service
126
- *
127
- * Establishes WebSocket connection to the speech recognition service.
128
- *
129
- * @returns Promise that resolves when connected
130
- *
131
- * @example
132
- * ```typescript
133
- * await client.asr.connect();
134
- * console.log('Connected to ASR service');
135
- * ```
246
+ * Stop recording
247
+ * @description In non-VAD mode, this triggers recognition by sending input_audio_buffer.commit
136
248
  */
137
- connect(): Promise<void>;
138
-
249
+ stopRecording(): Promise<void>;
139
250
  /**
140
- * Start recording from microphone
141
- *
142
- * Begins capturing audio from the user's microphone and sends it to the ASR service.
143
- * Requires microphone permission from the user.
144
- *
145
- * @returns Promise that resolves when recording starts
146
- *
147
- * @example
148
- * ```typescript
149
- * // Request microphone permission and start recording
150
- * try {
151
- * await client.asr.startRecording();
152
- * console.log('Recording started');
153
- * } catch (error) {
154
- * if (error.name === 'NotAllowedError') {
155
- * console.error('Microphone permission denied');
156
- * }
157
- * }
158
- * ```
251
+ * Close connection gracefully
252
+ * @description Sends session.finish and waits for session.finished before closing
159
253
  */
160
- startRecording(): Promise<void>;
254
+ close(): Promise<void>;
255
+ /**
256
+ * Check if currently recording
257
+ */
258
+ isRecording(): boolean;
259
+ /**
260
+ * Check if connected to server
261
+ */
262
+ isConnected(): boolean;
263
+ }
264
+ declare const _default$1: (
265
+ authConfig: Pick<ASRClientConfig, "getAccessToken">
266
+ ) => (config: ASRClientConfig) => ASRClient;
161
267
 
268
+ export interface Recorder {
269
+ /** Start recording */
270
+ start(): Promise<void>;
162
271
  /**
163
- * Stop recording
164
- *
165
- * Stops capturing audio from the microphone but keeps the WebSocket connection open.
166
- *
167
- * @example
168
- * ```typescript
169
- * // Stop recording after 10 seconds
170
- * await client.asr.startRecording();
171
- * setTimeout(() => {
172
- * client.asr.stopRecording();
173
- * console.log('Recording stopped');
174
- * }, 10000);
175
- * ```
176
- */
177
- stopRecording(): void;
272
+ * Stop recording and get base64-encoded WAV audio data. You can use this data to call the ASR API.
273
+ *
274
+ * @returns Base64-encoded WAV audio data
275
+ */
276
+ stop(): Promise<void>;
277
+ }
178
278
 
279
+ export interface RecorderOptions {
280
+ /** Called when recording starts */
281
+ onStart?: () => void;
179
282
  /**
180
- * Close connection
181
- *
182
- * Closes the WebSocket connection and releases resources.
183
- *
184
- * @example
185
- * ```typescript
186
- * // Cleanup when done
187
- * client.asr.stopRecording();
188
- * client.asr.close();
189
- * ```
190
- */
191
- close(): void;
283
+ * Called when recording stops, with base64-encoded WAV audio data. You can use this data to call the ASR API.
284
+ *
285
+ * @param base64 - Base64-encoded WAV audio data
286
+ * @returns void
287
+ */
288
+ onStop?: (base64: string) => void;
289
+ onError?: (error: Error) => void;
290
+ }
291
+
292
+ export type ASRHttpStatus = "idle" | "recording" | "recognizing";
293
+
294
+ export interface ASRHttpClientConfig {
295
+ /** Get access token */
296
+ http?: HttpClient;
297
+ /** Get access token */
298
+ getAccessToken?(): string | null;
299
+ /** Create custom recorder */
300
+ createRecorder?(options?: RecorderOptions): Promise<Recorder>;
301
+ /** Language, default 'zh' */
302
+ language?: string;
303
+ /** Sample rate, default 16000 */
304
+ sampleRate?: number;
305
+ /** Called when recording starts */
306
+ onRecordingStart?: () => void;
307
+ /** Called when recording stops */
308
+ onRecordingStop?: () => void;
309
+ /** Called with recognition result */
310
+ onResult?: (text: string) => void;
311
+ /** Called on error */
312
+ onError?: (error: Error) => void;
313
+ /** Called when status changes */
314
+ onStatusChange?: (status: ASRHttpStatus) => void;
315
+ }
316
+ export interface ASRHttpClient {
317
+ /** Start recording (press-to-talk) */
318
+ startRecording(): Promise<void>;
319
+ /** Stop recording and get result */
320
+ stopRecording(): Promise<string>;
321
+ /** Record for specific duration then recognize */
322
+ recordAndRecognize(durationMs: number): Promise<string>;
323
+ /** Recognize audio file (File or Blob) */
324
+ recognizeFile(file: File | Blob): Promise<string>;
325
+ /** Recognize audio from URL */
326
+ recognizeUrl(audioUrl: string): Promise<string>;
192
327
  }
@@ -1,9 +1,5 @@
1
1
  /**
2
- * ============================================================================
3
- * Verification Code Authentication - Type Definitions
4
- * ============================================================================
5
- *
6
- * Verification code-based authentication including:
2
+ * * Verification code-based authentication including:
7
3
  * - Email verification code login
8
4
  * - SMS verification code login
9
5
  * - Send verification code
@@ -24,46 +20,7 @@ export type CodeLoginType = 'email' | 'phone';
24
20
 
25
21
  /**
26
22
  * Verification code login parameters
27
- *
28
- * @example
29
- * Email code login:
30
- * ```typescript
31
- * // 1. Send verification code
32
- * await client.auth.sendCode({
33
- * type: 'email',
34
- * email: 'user@example.com'
35
- * });
36
- *
37
- * // 2. User receives code via email: "123456"
38
- *
39
- * // 3. Login with code
40
- * const result = await client.auth.codeLogin({
41
- * email: 'user@example.com',
42
- * code: '123456'
43
- * });
44
- *
45
- * if (result.data) {
46
- * console.log('Logged in successfully!');
47
- * }
48
- * ```
49
- *
50
- * @example
51
- * Phone code login:
52
- * ```typescript
53
- * // 1. Send SMS code
54
- * await client.auth.sendCode({
55
- * type: 'phone',
56
- * phone: '+86-13800138000'
57
- * });
58
- *
59
- * // 2. User receives SMS: "654321"
60
- *
61
- * // 3. Login with code
62
- * await client.auth.codeLogin({
63
- * phone: '+86-13800138000',
64
- * code: '654321'
65
- * });
66
- * ```
23
+ *
67
24
  */
68
25
  export interface CodeLoginParams {
69
26
  /** Login method (optional, auto-detected) */
@@ -114,52 +71,14 @@ export interface CaptchaResponse {
114
71
  export interface CodeAuthAPI {
115
72
  /**
116
73
  * Login with verification code
117
- *
74
+ *
118
75
  * Authenticates using a verification code sent to email or phone.
119
- *
76
+ *
120
77
  * @param params - Email/phone and verification code
121
78
  * @returns User info and access token
122
- *
123
- * @example
124
- * Complete code login flow:
125
- * ```typescript
126
- * // 1. Send verification code
127
- * await client.auth.sendCode({
128
- * type: 'email',
129
- * email: 'user@example.com'
130
- * });
131
- *
132
- * // 2. User receives code: "123456"
133
- *
134
- * // 3. Login with code
135
- * const result = await client.auth.codeLogin({
136
- * email: 'user@example.com',
137
- * code: '123456'
138
- * });
139
- *
140
- * if (result.data) {
141
- * console.log('Logged in successfully!');
142
- * }
143
- * ```
144
- *
145
- * @example
146
- * SMS code login with error handling:
147
- * ```typescript
148
- * const result = await client.auth.codeLogin({
149
- * phone: '+86-13800138000',
150
- * code: userInputCode
151
- * });
152
- *
153
- * if (result.error) {
154
- * if (result.status === 400) {
155
- * console.error('Invalid or expired code');
156
- * } else {
157
- * console.error('Login failed:', result.error.message);
158
- * }
159
- * }
160
- * ```
79
+ *
161
80
  */
162
- codeLogin(params: CodeLoginParams): Promise<ClientResult<LoginResponse>>;
81
+ loginWithCode(params: CodeLoginParams): Promise<ClientResult<LoginResponse>>;
163
82
 
164
83
  /**
165
84
  * Send verification code
@@ -169,43 +88,7 @@ export interface CodeAuthAPI {
169
88
  *
170
89
  * @param params - Email or phone to send code to
171
90
  * @returns Success status
172
- *
173
- * @example
174
- * Send email verification code:
175
- * ```typescript
176
- * const result = await client.auth.sendCode({
177
- * type: 'email',
178
- * email: 'user@example.com'
179
- * });
180
- *
181
- * if (result.data?.success) {
182
- * console.log('Code sent to email');
183
- * showCodeInputForm();
184
- * }
185
- * ```
186
- *
187
- * @example
188
- * Send SMS verification code:
189
- * ```typescript
190
- * await client.auth.sendCode({
191
- * type: 'phone',
192
- * phone: '+86-13800138000'
193
- * });
194
- * console.log('SMS sent');
195
- * ```
196
- *
197
- * @example
198
- * With rate limiting handling:
199
- * ```typescript
200
- * const result = await client.auth.sendCode({
201
- * type: 'email',
202
- * email: 'user@example.com'
203
- * });
204
- *
205
- * if (result.status === 429) {
206
- * console.error('Too many requests. Please try again later.');
207
- * }
208
- * ```
91
+ *
209
92
  */
210
93
  sendCode(params: SendCodeParams): Promise<ClientResult<SuccessResponse>>;
211
94
 
@@ -216,39 +99,7 @@ export interface CodeAuthAPI {
216
99
  * Used during registration or sensitive operations.
217
100
  *
218
101
  * @returns Captcha ID and image (base64)
219
- *
220
- * @example
221
- * Display captcha to user:
222
- * ```typescript
223
- * const result = await client.auth.getCaptcha();
224
- * if (result.data) {
225
- * // Display image to user
226
- * const img = document.createElement('img');
227
- * img.src = result.data.captchaImage;
228
- * document.body.appendChild(img);
229
- *
230
- * // Save captchaId for later verification
231
- * const captchaId = result.data.captchaId;
232
- * }
233
- * ```
234
- *
235
- * @example
236
- * Use with registration:
237
- * ```typescript
238
- * // 1. Get captcha
239
- * const captchaResult = await client.auth.getCaptcha();
240
- * showCaptchaImage(captchaResult.data.captchaImage);
241
- *
242
- * // 2. Get user input
243
- * const userInput = await promptUserForCaptcha();
244
- *
245
- * // 3. Register with captcha
246
- * await client.auth.register({
247
- * email: 'user@example.com',
248
- * password: 'Password@123',
249
- * captcha: `${captchaResult.data.captchaId}:${userInput}`
250
- * });
251
- * ```
102
+ *
252
103
  */
253
104
  getCaptcha(): Promise<ClientResult<CaptchaResponse>>;
254
105
  }