@amaster.ai/client 1.0.0-alpha.2 → 1.0.0-beta.72
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +11 -5
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +24 -2
- package/dist/index.d.ts +24 -2
- package/dist/index.js +11 -5
- package/dist/index.js.map +1 -1
- package/package.json +11 -11
- package/types/asr.d.ts +257 -167
- package/types/auth/code-auth.d.ts +3 -3
- package/types/auth/index.d.ts +27 -0
- package/types/auth/user.d.ts +2 -2
- package/types/bpm.d.ts +123 -0
- package/types/copilot.d.ts +156 -37
- package/types/http.d.ts +7 -1
- package/types/index.d.ts +7 -2
- package/types/tts.d.ts +6 -0
package/types/asr.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type{ HttpClient } from "./http";
|
|
1
2
|
/**
|
|
2
3
|
* ASR Realtime WebSocket Client for Qwen-ASR Realtime API
|
|
3
4
|
*
|
|
@@ -6,6 +7,8 @@
|
|
|
6
7
|
*
|
|
7
8
|
* @example
|
|
8
9
|
* ```typescript
|
|
10
|
+
* import { createClient } from "@amaster.ai/client";
|
|
11
|
+
* export const client = createClient({});
|
|
9
12
|
* const asrClient = client.asr({
|
|
10
13
|
* language: "zh",
|
|
11
14
|
* enableVAD: true,
|
|
@@ -27,211 +30,298 @@
|
|
|
27
30
|
* await asrClient.close();
|
|
28
31
|
* ```
|
|
29
32
|
*/
|
|
30
|
-
export type ASRLanguage =
|
|
31
|
-
|
|
32
|
-
|
|
33
|
+
export type ASRLanguage =
|
|
34
|
+
| "zh"
|
|
35
|
+
| "yue"
|
|
36
|
+
| "en"
|
|
37
|
+
| "ja"
|
|
38
|
+
| "de"
|
|
39
|
+
| "ko"
|
|
40
|
+
| "ru"
|
|
41
|
+
| "fr"
|
|
42
|
+
| "pt"
|
|
43
|
+
| "ar"
|
|
44
|
+
| "it"
|
|
45
|
+
| "es"
|
|
46
|
+
| "hi"
|
|
47
|
+
| "id"
|
|
48
|
+
| "th"
|
|
49
|
+
| "tr"
|
|
50
|
+
| "uk"
|
|
51
|
+
| "vi"
|
|
52
|
+
| "cs"
|
|
53
|
+
| "da"
|
|
54
|
+
| "fil"
|
|
55
|
+
| "fi"
|
|
56
|
+
| "is"
|
|
57
|
+
| "ms"
|
|
58
|
+
| "no"
|
|
59
|
+
| "pl"
|
|
60
|
+
| "sv";
|
|
61
|
+
export type ClientEventType =
|
|
62
|
+
| "session.update"
|
|
63
|
+
| "input_audio_buffer.append"
|
|
64
|
+
| "input_audio_buffer.commit"
|
|
65
|
+
| "session.finish";
|
|
66
|
+
export type ServerEventType =
|
|
67
|
+
| "session.created"
|
|
68
|
+
| "session.updated"
|
|
69
|
+
| "input_audio_buffer.speech_started"
|
|
70
|
+
| "input_audio_buffer.speech_stopped"
|
|
71
|
+
| "input_audio_buffer.committed"
|
|
72
|
+
| "conversation.item.input_audio_transcription.text"
|
|
73
|
+
| "conversation.item.input_audio_transcription.completed"
|
|
74
|
+
| "session.finished"
|
|
75
|
+
| "error";
|
|
33
76
|
export interface BaseEvent {
|
|
34
|
-
|
|
35
|
-
|
|
77
|
+
event_id: string;
|
|
78
|
+
type: ClientEventType | ServerEventType;
|
|
36
79
|
}
|
|
37
80
|
export interface SessionUpdateEvent extends BaseEvent {
|
|
38
|
-
|
|
39
|
-
|
|
81
|
+
type: "session.update";
|
|
82
|
+
session: SessionConfig;
|
|
40
83
|
}
|
|
41
84
|
export interface InputAudioBufferAppendEvent extends BaseEvent {
|
|
42
|
-
|
|
43
|
-
|
|
85
|
+
type: "input_audio_buffer.append";
|
|
86
|
+
audio: string;
|
|
44
87
|
}
|
|
45
88
|
export interface InputAudioBufferCommitEvent extends BaseEvent {
|
|
46
|
-
|
|
89
|
+
type: "input_audio_buffer.commit";
|
|
47
90
|
}
|
|
48
91
|
export interface SessionFinishEvent extends BaseEvent {
|
|
49
|
-
|
|
92
|
+
type: "session.finish";
|
|
50
93
|
}
|
|
51
|
-
type ClientEvent =
|
|
94
|
+
type ClientEvent =
|
|
95
|
+
| SessionUpdateEvent
|
|
96
|
+
| InputAudioBufferAppendEvent
|
|
97
|
+
| InputAudioBufferCommitEvent
|
|
98
|
+
| SessionFinishEvent;
|
|
52
99
|
export interface SessionCreatedEvent extends BaseEvent {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
100
|
+
type: "session.created";
|
|
101
|
+
session: {
|
|
102
|
+
id: string;
|
|
103
|
+
};
|
|
57
104
|
}
|
|
58
105
|
export interface SessionUpdatedEvent extends BaseEvent {
|
|
59
|
-
|
|
60
|
-
|
|
106
|
+
type: "session.updated";
|
|
107
|
+
session: SessionConfig;
|
|
61
108
|
}
|
|
62
109
|
export interface SpeechStartedEvent extends BaseEvent {
|
|
63
|
-
|
|
110
|
+
type: "input_audio_buffer.speech_started";
|
|
64
111
|
}
|
|
65
112
|
export interface SpeechStoppedEvent extends BaseEvent {
|
|
66
|
-
|
|
113
|
+
type: "input_audio_buffer.speech_stopped";
|
|
67
114
|
}
|
|
68
115
|
export interface InputAudioBufferCommittedEvent extends BaseEvent {
|
|
69
|
-
|
|
116
|
+
type: "input_audio_buffer.committed";
|
|
70
117
|
}
|
|
71
118
|
export interface TranscriptionTextEvent extends BaseEvent {
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
119
|
+
type: "conversation.item.input_audio_transcription.text";
|
|
120
|
+
text?: string;
|
|
121
|
+
stash?: string;
|
|
122
|
+
transcript?: string;
|
|
76
123
|
}
|
|
77
124
|
export interface TranscriptionCompletedEvent extends BaseEvent {
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
125
|
+
type: "conversation.item.input_audio_transcription.completed";
|
|
126
|
+
text?: string;
|
|
127
|
+
transcript?: string;
|
|
81
128
|
}
|
|
82
129
|
export interface SessionFinishedEvent extends BaseEvent {
|
|
83
|
-
|
|
130
|
+
type: "session.finished";
|
|
84
131
|
}
|
|
85
132
|
export interface ErrorEvent extends BaseEvent {
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
133
|
+
type: "error";
|
|
134
|
+
error: {
|
|
135
|
+
message: string;
|
|
136
|
+
code?: string;
|
|
137
|
+
};
|
|
91
138
|
}
|
|
92
|
-
export type ServerEvent =
|
|
139
|
+
export type ServerEvent =
|
|
140
|
+
| SessionCreatedEvent
|
|
141
|
+
| SessionUpdatedEvent
|
|
142
|
+
| SpeechStartedEvent
|
|
143
|
+
| SpeechStoppedEvent
|
|
144
|
+
| InputAudioBufferCommittedEvent
|
|
145
|
+
| TranscriptionTextEvent
|
|
146
|
+
| TranscriptionCompletedEvent
|
|
147
|
+
| SessionFinishedEvent
|
|
148
|
+
| ErrorEvent;
|
|
93
149
|
export interface TurnDetectionConfig {
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
150
|
+
type: "server_vad";
|
|
151
|
+
/** VAD检测阈值,推荐设为 0.0,默认值 0.2,范围 [-1, 1] */
|
|
152
|
+
threshold?: number;
|
|
153
|
+
/** VAD断句检测阈值(ms),推荐设为 400,默认值 800,范围 [200, 6000] */
|
|
154
|
+
silence_duration_ms?: number;
|
|
99
155
|
}
|
|
100
156
|
export interface InputAudioTranscriptionConfig {
|
|
101
|
-
|
|
157
|
+
language?: ASRLanguage;
|
|
102
158
|
}
|
|
103
159
|
export interface SessionConfig {
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
160
|
+
input_audio_format?: "pcm" | "opus";
|
|
161
|
+
sample_rate?: 16000 | 8000;
|
|
162
|
+
input_audio_transcription?: InputAudioTranscriptionConfig;
|
|
163
|
+
turn_detection?: TurnDetectionConfig | null;
|
|
108
164
|
}
|
|
109
165
|
export interface ASRClientConfig {
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
166
|
+
/**
|
|
167
|
+
* Audio format
|
|
168
|
+
* @default "pcm"
|
|
169
|
+
*/
|
|
170
|
+
audioFormat?: "pcm" | "opus";
|
|
171
|
+
/**
|
|
172
|
+
* Sample rate in Hz
|
|
173
|
+
* @default 16000
|
|
174
|
+
* @description 支持 16000 和 8000。设置为 8000 时,服务端会先升采样到16000Hz再进行识别,可能引入微小延迟。
|
|
175
|
+
*/
|
|
176
|
+
sampleRate?: 16000 | 8000;
|
|
177
|
+
/**
|
|
178
|
+
* Audio source language
|
|
179
|
+
* @default "zh"
|
|
180
|
+
* @description 支持多种语言,包括 zh(中文)、yue(粤语)、en(英文)、ja(日语)等
|
|
181
|
+
*/
|
|
182
|
+
language?: ASRLanguage;
|
|
183
|
+
/**
|
|
184
|
+
* Enable VAD (Voice Activity Detection) mode
|
|
185
|
+
* @default true
|
|
186
|
+
* @description true = VAD模式(服务端自动检测语音开始/结束),false = Manual模式(客户端手动控制)
|
|
187
|
+
*/
|
|
188
|
+
enableVAD?: boolean;
|
|
189
|
+
/**
|
|
190
|
+
* VAD detection threshold
|
|
191
|
+
* @default 0.2
|
|
192
|
+
* @description 推荐设为 0.0。取值范围 [-1, 1]。较低的阈值会提高 VAD 的灵敏度。
|
|
193
|
+
*/
|
|
194
|
+
vadThreshold?: number;
|
|
195
|
+
/**
|
|
196
|
+
* VAD silence duration threshold in milliseconds
|
|
197
|
+
* @default 800
|
|
198
|
+
* @description 推荐设为 400。取值范围 [200, 6000]。静音持续时长超过该阈值将被认为是语句结束。
|
|
199
|
+
*/
|
|
200
|
+
vadSilenceDurationMs?: number;
|
|
201
|
+
/**
|
|
202
|
+
* Get access token for WebSocket authentication
|
|
203
|
+
*/
|
|
204
|
+
getAccessToken?: () => string | null;
|
|
205
|
+
/**
|
|
206
|
+
* Called when connection is ready (session.created received and session.update sent)
|
|
207
|
+
*/
|
|
208
|
+
onReady?: () => void;
|
|
209
|
+
/**
|
|
210
|
+
* Called when speech is detected (VAD mode only)
|
|
211
|
+
*/
|
|
212
|
+
onSpeechStart?: () => void;
|
|
213
|
+
/**
|
|
214
|
+
* Called when speech stops (VAD mode only)
|
|
215
|
+
*/
|
|
216
|
+
onSpeechEnd?: () => void;
|
|
217
|
+
/**
|
|
218
|
+
* Called on transcript result
|
|
219
|
+
* @param text - Transcribed text
|
|
220
|
+
* @param isFinal - Whether this is the final result
|
|
221
|
+
*/
|
|
222
|
+
onTranscript?: (text: string, isFinal: boolean) => void;
|
|
223
|
+
/**
|
|
224
|
+
* Called when audio buffer is committed (non-VAD mode only)
|
|
225
|
+
*/
|
|
226
|
+
onAudioBufferCommitted?: () => void;
|
|
227
|
+
/**
|
|
228
|
+
* Called when session is finished
|
|
229
|
+
*/
|
|
230
|
+
onSessionFinished?: () => void;
|
|
231
|
+
/**
|
|
232
|
+
* Called on error
|
|
233
|
+
*/
|
|
234
|
+
onError?: (error: Error) => void;
|
|
235
|
+
/**
|
|
236
|
+
* Called on close
|
|
237
|
+
*/
|
|
238
|
+
onClose?: () => void;
|
|
183
239
|
}
|
|
184
240
|
export interface ASRClient {
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
}
|
|
208
|
-
declare const _default$1: (
|
|
241
|
+
/** Connect to ASR service and establish session */
|
|
242
|
+
connect(): Promise<void>;
|
|
243
|
+
/** Start recording from microphone */
|
|
244
|
+
startRecording(): Promise<void>;
|
|
245
|
+
/**
|
|
246
|
+
* Stop recording
|
|
247
|
+
* @description In non-VAD mode, this triggers recognition by sending input_audio_buffer.commit
|
|
248
|
+
*/
|
|
249
|
+
stopRecording(): Promise<void>;
|
|
250
|
+
/**
|
|
251
|
+
* Close connection gracefully
|
|
252
|
+
* @description Sends session.finish and waits for session.finished before closing
|
|
253
|
+
*/
|
|
254
|
+
close(): Promise<void>;
|
|
255
|
+
/**
|
|
256
|
+
* Check if currently recording
|
|
257
|
+
*/
|
|
258
|
+
isRecording(): boolean;
|
|
259
|
+
/**
|
|
260
|
+
* Check if connected to server
|
|
261
|
+
*/
|
|
262
|
+
isConnected(): boolean;
|
|
263
|
+
}
|
|
264
|
+
declare const _default$1: (
|
|
265
|
+
authConfig: Pick<ASRClientConfig, "getAccessToken">
|
|
266
|
+
) => (config: ASRClientConfig) => ASRClient;
|
|
267
|
+
|
|
268
|
+
export interface Recorder {
|
|
269
|
+
/** Start recording */
|
|
270
|
+
start(): Promise<void>;
|
|
271
|
+
/**
|
|
272
|
+
* Stop recording and get base64-encoded WAV audio data. You can use this data to call the ASR API.
|
|
273
|
+
*
|
|
274
|
+
* @returns Base64-encoded WAV audio data
|
|
275
|
+
*/
|
|
276
|
+
stop(): Promise<void>;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
export interface RecorderOptions {
|
|
280
|
+
/** Called when recording starts */
|
|
281
|
+
onStart?: () => void;
|
|
282
|
+
/**
|
|
283
|
+
* Called when recording stops, with base64-encoded WAV audio data. You can use this data to call the ASR API.
|
|
284
|
+
*
|
|
285
|
+
* @param base64 - Base64-encoded WAV audio data
|
|
286
|
+
* @returns void
|
|
287
|
+
*/
|
|
288
|
+
onStop?: (base64: string) => void;
|
|
289
|
+
onError?: (error: Error) => void;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
export type ASRHttpStatus = "idle" | "recording" | "recognizing";
|
|
209
293
|
|
|
210
294
|
export interface ASRHttpClientConfig {
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
295
|
+
/** Get access token */
|
|
296
|
+
http?: HttpClient;
|
|
297
|
+
/** Get access token */
|
|
298
|
+
getAccessToken?(): string | null;
|
|
299
|
+
/** Create custom recorder */
|
|
300
|
+
createRecorder?(options?: RecorderOptions): Promise<Recorder>;
|
|
301
|
+
/** Language, default 'zh' */
|
|
302
|
+
language?: string;
|
|
303
|
+
/** Sample rate, default 16000 */
|
|
304
|
+
sampleRate?: number;
|
|
305
|
+
/** Called when recording starts */
|
|
306
|
+
onRecordingStart?: () => void;
|
|
307
|
+
/** Called when recording stops */
|
|
308
|
+
onRecordingStop?: () => void;
|
|
309
|
+
/** Called with recognition result */
|
|
310
|
+
onResult?: (text: string) => void;
|
|
311
|
+
/** Called on error */
|
|
312
|
+
onError?: (error: Error) => void;
|
|
313
|
+
/** Called when status changes */
|
|
314
|
+
onStatusChange?: (status: ASRHttpStatus) => void;
|
|
225
315
|
}
|
|
226
316
|
export interface ASRHttpClient {
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
}
|
|
317
|
+
/** Start recording (press-to-talk) */
|
|
318
|
+
startRecording(): Promise<void>;
|
|
319
|
+
/** Stop recording and get result */
|
|
320
|
+
stopRecording(): Promise<string>;
|
|
321
|
+
/** Record for specific duration then recognize */
|
|
322
|
+
recordAndRecognize(durationMs: number): Promise<string>;
|
|
323
|
+
/** Recognize audio file (File or Blob) */
|
|
324
|
+
recognizeFile(file: File | Blob): Promise<string>;
|
|
325
|
+
/** Recognize audio from URL */
|
|
326
|
+
recognizeUrl(audioUrl: string): Promise<string>;
|
|
327
|
+
}
|
|
@@ -71,14 +71,14 @@ export interface CaptchaResponse {
|
|
|
71
71
|
export interface CodeAuthAPI {
|
|
72
72
|
/**
|
|
73
73
|
* Login with verification code
|
|
74
|
-
*
|
|
74
|
+
*
|
|
75
75
|
* Authenticates using a verification code sent to email or phone.
|
|
76
|
-
*
|
|
76
|
+
*
|
|
77
77
|
* @param params - Email/phone and verification code
|
|
78
78
|
* @returns User info and access token
|
|
79
79
|
*
|
|
80
80
|
*/
|
|
81
|
-
|
|
81
|
+
loginWithCode(params: CodeLoginParams): Promise<ClientResult<LoginResponse>>;
|
|
82
82
|
|
|
83
83
|
/**
|
|
84
84
|
* Send verification code
|
package/types/auth/index.d.ts
CHANGED
|
@@ -98,6 +98,33 @@ export interface AuthClientAPI
|
|
|
98
98
|
*/
|
|
99
99
|
refreshToken(): Promise<ClientResult<RefreshTokenResponse>>;
|
|
100
100
|
|
|
101
|
+
// ==================== Event Handling ====================
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Subscribe to authentication events
|
|
105
|
+
*
|
|
106
|
+
* Available events:
|
|
107
|
+
* - `login` - Fired after successful login, receives User object
|
|
108
|
+
* - `logout` - Fired after logout
|
|
109
|
+
* - `tokenExpired` - Fired when access token expires
|
|
110
|
+
* - `tokenRefreshed` - Fired after token refresh, receives new token
|
|
111
|
+
* - `unauthorized` - Fired on 401 response
|
|
112
|
+
*
|
|
113
|
+
* @param event - Event name to subscribe to
|
|
114
|
+
* @param handler - Callback function
|
|
115
|
+
*
|
|
116
|
+
*/
|
|
117
|
+
on(event: "login" | "logout" | "tokenExpired" | "tokenRefreshed" | "unauthorized", handler: (...args: unknown[]) => void): void;
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Unsubscribe from authentication events
|
|
121
|
+
*
|
|
122
|
+
* @param event - Event name to unsubscribe from
|
|
123
|
+
* @param handler - The same callback function that was passed to `on()`
|
|
124
|
+
*
|
|
125
|
+
*/
|
|
126
|
+
off(event: "login" | "logout" | "tokenExpired" | "tokenRefreshed" | "unauthorized", handler: (...args: unknown[]) => void): void;
|
|
127
|
+
|
|
101
128
|
// ==================== Permission Checks ====================
|
|
102
129
|
|
|
103
130
|
/**
|
package/types/auth/user.d.ts
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
*
|
|
13
13
|
* Roles and permissions are returned as simple string arrays for efficient client-side use:
|
|
14
14
|
* - `roles`: ["admin", "user", "manager"]
|
|
15
|
-
* - `permissions`: ["user
|
|
15
|
+
* - `permissions`: ["user.read", "user.write", "order.read"]
|
|
16
16
|
*
|
|
17
17
|
*/
|
|
18
18
|
export interface User {
|
|
@@ -61,7 +61,7 @@ export interface User {
|
|
|
61
61
|
|
|
62
62
|
/**
|
|
63
63
|
* Permission names granted to user
|
|
64
|
-
* Format: "resource
|
|
64
|
+
* Format: "resource.action"
|
|
65
65
|
*/
|
|
66
66
|
permissions: string[];
|
|
67
67
|
|