@amaster.ai/asr-client 1.0.0-beta.7 → 1.0.0-beta.72
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +394 -78
- package/dist/index.cjs +499 -153
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +284 -24
- package/dist/index.d.ts +284 -24
- package/dist/index.js +498 -151
- package/dist/index.js.map +1 -1
- package/package.json +53 -45
package/dist/index.cjs
CHANGED
|
@@ -20,217 +20,563 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/index.ts
|
|
21
21
|
var index_exports = {};
|
|
22
22
|
__export(index_exports, {
|
|
23
|
-
createASRClient: () =>
|
|
24
|
-
|
|
25
|
-
listen: () => listen
|
|
23
|
+
createASRClient: () => asr_client_default,
|
|
24
|
+
createASRHttpClient: () => http_asr_client_default
|
|
26
25
|
});
|
|
27
26
|
module.exports = __toCommonJS(index_exports);
|
|
28
27
|
|
|
29
28
|
// src/asr-client.ts
|
|
30
29
|
var ASR_PATH = "/api/proxy/builtin/platform/qwen-asr-realtime/api-ws/v1/realtime";
|
|
31
|
-
async function
|
|
32
|
-
|
|
33
|
-
let
|
|
34
|
-
let
|
|
30
|
+
async function createRealtimeRecorder() {
|
|
31
|
+
let stream = null;
|
|
32
|
+
let ctx = null;
|
|
33
|
+
let source = null;
|
|
35
34
|
let processor = null;
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
35
|
+
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
|
|
36
|
+
throw new Error("\u672A\u627E\u5230\u9EA6\u514B\u98CE\u6216\u65E0\u6743\u9650");
|
|
37
|
+
}
|
|
38
|
+
return {
|
|
39
|
+
async start(onAudio) {
|
|
40
|
+
stream = await navigator.mediaDevices.getUserMedia({
|
|
41
|
+
audio: { sampleRate: 16e3, channelCount: 1, echoCancellation: true }
|
|
42
|
+
});
|
|
43
|
+
log("\u2705 \u9EA6\u514B\u98CE\u5DF2\u542F\u52A8", "success");
|
|
44
|
+
log("\u{1F4AC} \u8BF7\u5BF9\u7740\u9EA6\u514B\u98CE\u8BF4\u8BDD\uFF0C\u5B9E\u65F6\u8BC6\u522B\u4E2D...", "success");
|
|
45
|
+
ctx = new AudioContext({ sampleRate: 16e3 });
|
|
46
|
+
source = ctx.createMediaStreamSource(stream);
|
|
47
|
+
processor = ctx.createScriptProcessor(4096, 1, 1);
|
|
48
|
+
processor.onaudioprocess = (e) => {
|
|
49
|
+
const inputData = e.inputBuffer.getChannelData(0);
|
|
50
|
+
const pcm = new Int16Array(inputData.length);
|
|
51
|
+
for (let i = 0; i < inputData.length; i++) {
|
|
52
|
+
const s = Math.max(-1, Math.min(1, inputData[i] || 0));
|
|
53
|
+
pcm[i] = s < 0 ? s * 32768 : s * 32767;
|
|
54
|
+
}
|
|
55
|
+
const bytes = new Uint8Array(pcm.buffer);
|
|
56
|
+
let binary = "";
|
|
57
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
58
|
+
binary += String.fromCharCode(bytes[i] || 0);
|
|
59
|
+
}
|
|
60
|
+
onAudio(btoa(binary));
|
|
61
|
+
};
|
|
62
|
+
source.connect(processor);
|
|
63
|
+
processor.connect(ctx.destination);
|
|
64
|
+
},
|
|
65
|
+
async stop() {
|
|
66
|
+
stream?.getTracks().forEach((t) => t.stop());
|
|
67
|
+
source?.disconnect();
|
|
68
|
+
processor?.disconnect();
|
|
69
|
+
if (ctx) {
|
|
70
|
+
await ctx.close();
|
|
71
|
+
}
|
|
72
|
+
stream = null;
|
|
73
|
+
ctx = null;
|
|
74
|
+
source = null;
|
|
43
75
|
processor = null;
|
|
44
76
|
}
|
|
45
|
-
if (audioContext) {
|
|
46
|
-
audioContext.close();
|
|
47
|
-
audioContext = null;
|
|
48
|
-
}
|
|
49
|
-
if (ws.readyState === WebSocket.OPEN) {
|
|
50
|
-
ws.send(JSON.stringify({ type: "input_audio_buffer.commit" }));
|
|
51
|
-
ws.close();
|
|
52
|
-
}
|
|
53
77
|
};
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
});
|
|
62
|
-
audioContext = new AudioContext({ sampleRate: 16e3 });
|
|
63
|
-
const source = audioContext.createMediaStreamSource(mediaStream);
|
|
64
|
-
processor = audioContext.createScriptProcessor(4096, 1, 1);
|
|
65
|
-
processor.onaudioprocess = (e) => {
|
|
66
|
-
if (ws.readyState !== WebSocket.OPEN) return;
|
|
67
|
-
const input = e.inputBuffer.getChannelData(0);
|
|
68
|
-
const pcm = new Int16Array(input.length);
|
|
69
|
-
for (let i = 0; i < input.length; i++) {
|
|
70
|
-
const s = Math.max(-1, Math.min(1, input[i]));
|
|
71
|
-
pcm[i] = s < 0 ? s * 32768 : s * 32767;
|
|
72
|
-
}
|
|
73
|
-
const bytes = new Uint8Array(pcm.buffer);
|
|
74
|
-
let binary = "";
|
|
75
|
-
for (let i = 0; i < bytes.length; i++) binary += String.fromCharCode(bytes[i]);
|
|
76
|
-
ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: btoa(binary) }));
|
|
77
|
-
};
|
|
78
|
-
source.connect(processor);
|
|
79
|
-
processor.connect(audioContext.destination);
|
|
80
|
-
resolve(stop);
|
|
81
|
-
} catch (err) {
|
|
82
|
-
reject(err);
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
if (data.type === "conversation.item.input_audio_transcription.text") {
|
|
86
|
-
onTranscript(data.text || "", false);
|
|
87
|
-
}
|
|
88
|
-
if (data.type === "conversation.item.input_audio_transcription.completed") {
|
|
89
|
-
onTranscript(data.text || data.transcript || "", true);
|
|
90
|
-
}
|
|
91
|
-
if (data.type === "error") {
|
|
92
|
-
stop();
|
|
93
|
-
reject(new Error(data.error?.message || "ASR error"));
|
|
94
|
-
}
|
|
95
|
-
};
|
|
96
|
-
ws.onerror = () => reject(new Error("ASR connection failed"));
|
|
97
|
-
});
|
|
78
|
+
}
|
|
79
|
+
var log = (message, type = "") => {
|
|
80
|
+
console.log(`[${type}]`, message);
|
|
81
|
+
};
|
|
82
|
+
var eventIdCounter = 0;
|
|
83
|
+
function generateEventId() {
|
|
84
|
+
return `event_${Date.now()}_${++eventIdCounter}`;
|
|
98
85
|
}
|
|
99
86
|
function createASRClient(config) {
|
|
100
87
|
const {
|
|
101
|
-
// audioFormat = 'pcm16',
|
|
102
|
-
sampleRate = 16e3,
|
|
103
88
|
onReady,
|
|
104
89
|
onSpeechStart,
|
|
105
90
|
onSpeechEnd,
|
|
106
91
|
onTranscript,
|
|
107
|
-
|
|
92
|
+
onAudioBufferCommitted,
|
|
93
|
+
onSessionFinished,
|
|
94
|
+
onError,
|
|
95
|
+
onClose,
|
|
96
|
+
getAccessToken,
|
|
97
|
+
audioFormat = "pcm",
|
|
98
|
+
sampleRate = 16e3,
|
|
99
|
+
language = "zh",
|
|
100
|
+
enableVAD = true,
|
|
101
|
+
vadThreshold = 0.2,
|
|
102
|
+
vadSilenceDurationMs = 400
|
|
108
103
|
} = config;
|
|
109
104
|
let ws = null;
|
|
110
|
-
let
|
|
111
|
-
let
|
|
112
|
-
let
|
|
105
|
+
let recorder = null;
|
|
106
|
+
let isRecordingFlag = false;
|
|
107
|
+
let isClosing = false;
|
|
108
|
+
const path = ASR_PATH;
|
|
109
|
+
function sendEvent(event) {
|
|
110
|
+
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
|
111
|
+
throw new Error("WebSocket not connected");
|
|
112
|
+
}
|
|
113
|
+
ws.send(JSON.stringify(event));
|
|
114
|
+
}
|
|
115
|
+
function buildSessionConfig() {
|
|
116
|
+
const sessionConfig = {
|
|
117
|
+
input_audio_format: audioFormat,
|
|
118
|
+
sample_rate: sampleRate,
|
|
119
|
+
input_audio_transcription: {
|
|
120
|
+
language
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
if (enableVAD) {
|
|
124
|
+
sessionConfig.turn_detection = {
|
|
125
|
+
type: "server_vad",
|
|
126
|
+
threshold: vadThreshold,
|
|
127
|
+
silence_duration_ms: vadSilenceDurationMs
|
|
128
|
+
};
|
|
129
|
+
} else {
|
|
130
|
+
sessionConfig.turn_detection = null;
|
|
131
|
+
}
|
|
132
|
+
return sessionConfig;
|
|
133
|
+
}
|
|
134
|
+
function sendSessionUpdate() {
|
|
135
|
+
const event = {
|
|
136
|
+
event_id: generateEventId(),
|
|
137
|
+
type: "session.update",
|
|
138
|
+
session: buildSessionConfig()
|
|
139
|
+
};
|
|
140
|
+
sendEvent(event);
|
|
141
|
+
}
|
|
142
|
+
function sendAudioBufferAppend(audio) {
|
|
143
|
+
const event = {
|
|
144
|
+
event_id: generateEventId(),
|
|
145
|
+
type: "input_audio_buffer.append",
|
|
146
|
+
audio
|
|
147
|
+
};
|
|
148
|
+
sendEvent(event);
|
|
149
|
+
}
|
|
150
|
+
function sendAudioBufferCommit() {
|
|
151
|
+
const event = {
|
|
152
|
+
event_id: generateEventId(),
|
|
153
|
+
type: "input_audio_buffer.commit"
|
|
154
|
+
};
|
|
155
|
+
sendEvent(event);
|
|
156
|
+
}
|
|
157
|
+
function sendSessionFinish() {
|
|
158
|
+
const event = {
|
|
159
|
+
event_id: generateEventId(),
|
|
160
|
+
type: "session.finish"
|
|
161
|
+
};
|
|
162
|
+
sendEvent(event);
|
|
163
|
+
}
|
|
164
|
+
function handleServerEvent(data) {
|
|
165
|
+
switch (data.type) {
|
|
166
|
+
case "session.created":
|
|
167
|
+
try {
|
|
168
|
+
sendSessionUpdate();
|
|
169
|
+
} catch (err2) {
|
|
170
|
+
onError?.(
|
|
171
|
+
new Error(
|
|
172
|
+
"Failed to send session.update: " + (err2 instanceof Error ? err2.message : String(err2))
|
|
173
|
+
)
|
|
174
|
+
);
|
|
175
|
+
}
|
|
176
|
+
break;
|
|
177
|
+
case "session.updated":
|
|
178
|
+
onReady?.();
|
|
179
|
+
break;
|
|
180
|
+
case "input_audio_buffer.speech_started":
|
|
181
|
+
onSpeechStart?.();
|
|
182
|
+
break;
|
|
183
|
+
case "input_audio_buffer.speech_stopped":
|
|
184
|
+
onSpeechEnd?.();
|
|
185
|
+
break;
|
|
186
|
+
case "input_audio_buffer.committed":
|
|
187
|
+
onAudioBufferCommitted?.();
|
|
188
|
+
break;
|
|
189
|
+
case "conversation.item.input_audio_transcription.text":
|
|
190
|
+
onTranscript?.(data.text || data.stash || data.transcript || "", false);
|
|
191
|
+
break;
|
|
192
|
+
case "conversation.item.input_audio_transcription.completed":
|
|
193
|
+
onTranscript?.(data.text || data.transcript || "", true);
|
|
194
|
+
break;
|
|
195
|
+
case "session.finished":
|
|
196
|
+
onSessionFinished?.();
|
|
197
|
+
close();
|
|
198
|
+
break;
|
|
199
|
+
case "error":
|
|
200
|
+
const err = new Error(data.error?.message || "ASR error");
|
|
201
|
+
onError?.(err);
|
|
202
|
+
break;
|
|
203
|
+
default:
|
|
204
|
+
console.warn("[ASR] Unknown server event:", data.type);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
113
207
|
async function connect() {
|
|
208
|
+
let wsUrl = path;
|
|
209
|
+
if (getAccessToken) {
|
|
210
|
+
const token = getAccessToken();
|
|
211
|
+
if (token) {
|
|
212
|
+
const separator = path.includes("?") ? "&" : "?";
|
|
213
|
+
wsUrl = `${path}${separator}token=${encodeURIComponent(token)}`;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
if (typeof window !== "undefined" && window.location) {
|
|
217
|
+
const protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
|
|
218
|
+
if (!wsUrl.startsWith("ws://") && !wsUrl.startsWith("wss://")) {
|
|
219
|
+
wsUrl = `${protocol}//${window.location.host}${wsUrl}`;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
ws = new WebSocket(wsUrl);
|
|
114
223
|
return new Promise((resolve, reject) => {
|
|
115
|
-
|
|
224
|
+
if (!ws) {
|
|
225
|
+
reject(new Error("Failed to create WebSocket"));
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
116
228
|
ws.onopen = () => {
|
|
229
|
+
log("WebSocket connected", "success");
|
|
117
230
|
};
|
|
118
231
|
ws.onmessage = (event) => {
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
onTranscript?.(data.text || "", false);
|
|
132
|
-
}
|
|
133
|
-
if (data.type === "conversation.item.input_audio_transcription.completed") {
|
|
134
|
-
onTranscript?.(data.text || data.transcript || "", true);
|
|
135
|
-
}
|
|
136
|
-
if (data.type === "error") {
|
|
137
|
-
const err = new Error(data.error?.message || "Unknown error");
|
|
138
|
-
onError?.(err);
|
|
139
|
-
reject(err);
|
|
232
|
+
try {
|
|
233
|
+
const data = JSON.parse(event.data);
|
|
234
|
+
handleServerEvent(data);
|
|
235
|
+
if (data.type === "session.updated") {
|
|
236
|
+
resolve();
|
|
237
|
+
}
|
|
238
|
+
} catch (err) {
|
|
239
|
+
const error = new Error(
|
|
240
|
+
"Failed to parse server message: " + (err instanceof Error ? err.message : String(err))
|
|
241
|
+
);
|
|
242
|
+
onError?.(error);
|
|
243
|
+
reject(error);
|
|
140
244
|
}
|
|
141
245
|
};
|
|
142
|
-
ws.onerror = () => {
|
|
143
|
-
|
|
246
|
+
ws.onerror = (error) => {
|
|
247
|
+
console.error("WebSocket error:", error);
|
|
248
|
+
const err = new Error("WebSocket error");
|
|
144
249
|
onError?.(err);
|
|
145
250
|
reject(err);
|
|
146
251
|
};
|
|
147
252
|
ws.onclose = () => {
|
|
253
|
+
isRecordingFlag = false;
|
|
148
254
|
ws = null;
|
|
255
|
+
onClose?.();
|
|
149
256
|
};
|
|
150
257
|
});
|
|
151
258
|
}
|
|
152
259
|
async function startRecording() {
|
|
153
|
-
if (typeof window === "undefined") {
|
|
154
|
-
throw new Error("Recording only supported in browser");
|
|
155
|
-
}
|
|
156
260
|
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
|
157
261
|
throw new Error("WebSocket not connected");
|
|
158
262
|
}
|
|
263
|
+
if (isRecordingFlag) {
|
|
264
|
+
return;
|
|
265
|
+
}
|
|
159
266
|
try {
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
channelCount: 1,
|
|
164
|
-
echoCancellation: true,
|
|
165
|
-
noiseSuppression: true
|
|
166
|
-
}
|
|
167
|
-
});
|
|
168
|
-
audioContext = new AudioContext({ sampleRate });
|
|
169
|
-
const source = audioContext.createMediaStreamSource(mediaStream);
|
|
170
|
-
processor = audioContext.createScriptProcessor(4096, 1, 1);
|
|
171
|
-
processor.onaudioprocess = (e) => {
|
|
267
|
+
recorder = await createRealtimeRecorder();
|
|
268
|
+
isRecordingFlag = true;
|
|
269
|
+
await recorder.start((audio) => {
|
|
172
270
|
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
const s = Math.max(-1, Math.min(1, inputData[i]));
|
|
178
|
-
pcm[i] = s < 0 ? s * 32768 : s * 32767;
|
|
179
|
-
}
|
|
180
|
-
const bytes = new Uint8Array(pcm.buffer);
|
|
181
|
-
const len = bytes.length;
|
|
182
|
-
let binary = "";
|
|
183
|
-
for (let i = 0; i < len; i++) {
|
|
184
|
-
binary += String.fromCharCode(bytes[i]);
|
|
271
|
+
try {
|
|
272
|
+
sendAudioBufferAppend(audio);
|
|
273
|
+
} catch (err) {
|
|
274
|
+
console.error("[ASR] Failed to send audio:", err);
|
|
185
275
|
}
|
|
186
|
-
|
|
187
|
-
ws.send(JSON.stringify({
|
|
188
|
-
type: "input_audio_buffer.append",
|
|
189
|
-
audio: base64
|
|
190
|
-
}));
|
|
191
|
-
};
|
|
192
|
-
source.connect(processor);
|
|
193
|
-
processor.connect(audioContext.destination);
|
|
276
|
+
});
|
|
194
277
|
} catch (err) {
|
|
195
|
-
|
|
278
|
+
console.error("[ASR] Failed to start recorder:", err);
|
|
279
|
+
onError?.(err instanceof Error ? err : new Error(String(err)));
|
|
196
280
|
throw err;
|
|
197
281
|
}
|
|
198
282
|
}
|
|
199
|
-
function stopRecording() {
|
|
200
|
-
if (
|
|
201
|
-
|
|
202
|
-
mediaStream = null;
|
|
203
|
-
}
|
|
204
|
-
if (processor) {
|
|
205
|
-
processor.disconnect();
|
|
206
|
-
processor = null;
|
|
283
|
+
async function stopRecording() {
|
|
284
|
+
if (!isRecordingFlag) {
|
|
285
|
+
return;
|
|
207
286
|
}
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
287
|
+
try {
|
|
288
|
+
await recorder?.stop();
|
|
289
|
+
} catch (err) {
|
|
290
|
+
console.error("[ASR] Error stopping recorder:", err);
|
|
211
291
|
}
|
|
212
|
-
|
|
213
|
-
|
|
292
|
+
recorder = null;
|
|
293
|
+
isRecordingFlag = false;
|
|
294
|
+
if (!enableVAD && ws?.readyState === WebSocket.OPEN) {
|
|
295
|
+
try {
|
|
296
|
+
sendAudioBufferCommit();
|
|
297
|
+
} catch (err) {
|
|
298
|
+
console.error("[ASR] Failed to send commit:", err);
|
|
299
|
+
}
|
|
214
300
|
}
|
|
215
301
|
}
|
|
216
|
-
function close() {
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
302
|
+
async function close() {
|
|
303
|
+
if (isClosing) {
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
isClosing = true;
|
|
307
|
+
await stopRecording();
|
|
308
|
+
if (ws?.readyState === WebSocket.OPEN) {
|
|
309
|
+
try {
|
|
310
|
+
sendSessionFinish();
|
|
311
|
+
await new Promise((resolve) => setTimeout(resolve, 1e3));
|
|
312
|
+
} catch (err) {
|
|
313
|
+
console.error("[ASR] Failed to send session.finish:", err);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
if (ws && ws?.readyState !== WebSocket.CLOSING && ws?.readyState !== WebSocket.CLOSED) {
|
|
317
|
+
ws?.close();
|
|
221
318
|
}
|
|
319
|
+
ws = null;
|
|
320
|
+
isClosing = false;
|
|
321
|
+
}
|
|
322
|
+
function isRecording() {
|
|
323
|
+
return isRecordingFlag;
|
|
324
|
+
}
|
|
325
|
+
function isConnected() {
|
|
326
|
+
return ws !== null && ws.readyState === WebSocket.OPEN;
|
|
222
327
|
}
|
|
223
328
|
return {
|
|
224
329
|
connect,
|
|
225
330
|
startRecording,
|
|
226
331
|
stopRecording,
|
|
227
|
-
close
|
|
332
|
+
close,
|
|
333
|
+
isRecording,
|
|
334
|
+
isConnected
|
|
228
335
|
};
|
|
229
336
|
}
|
|
337
|
+
var asr_client_default = (authConfig) => (config) => createASRClient({ ...authConfig, ...config });
|
|
338
|
+
|
|
339
|
+
// src/http-asr-client.ts
|
|
340
|
+
var import_http_client = require("@amaster.ai/http-client");
|
|
341
|
+
var ASR_HTTP_PATH = "/api/proxy/builtin/platform/qwen-asr/compatible-mode/v1/chat/completions";
|
|
342
|
+
var RECORDER_WORKLET = `
|
|
343
|
+
class RecorderProcessor extends AudioWorkletProcessor {
|
|
344
|
+
process(inputs) {
|
|
345
|
+
const input = inputs[0];
|
|
346
|
+
if (input && input[0]) {
|
|
347
|
+
this.port.postMessage(input[0].slice(0));
|
|
348
|
+
}
|
|
349
|
+
return true;
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
registerProcessor('recorder-processor', RecorderProcessor);
|
|
353
|
+
`;
|
|
354
|
+
async function createWebRecorder(props) {
|
|
355
|
+
let stream;
|
|
356
|
+
let ctx;
|
|
357
|
+
let node;
|
|
358
|
+
let source;
|
|
359
|
+
const chunks = [];
|
|
360
|
+
const cleanup = () => {
|
|
361
|
+
try {
|
|
362
|
+
source?.disconnect();
|
|
363
|
+
node?.disconnect();
|
|
364
|
+
stream?.getTracks().forEach((t) => t.stop());
|
|
365
|
+
ctx?.close();
|
|
366
|
+
} catch (e) {
|
|
367
|
+
}
|
|
368
|
+
};
|
|
369
|
+
return {
|
|
370
|
+
async start() {
|
|
371
|
+
try {
|
|
372
|
+
stream = await navigator.mediaDevices.getUserMedia({
|
|
373
|
+
audio: {
|
|
374
|
+
channelCount: 1,
|
|
375
|
+
echoCancellation: true,
|
|
376
|
+
noiseSuppression: true,
|
|
377
|
+
autoGainControl: true
|
|
378
|
+
}
|
|
379
|
+
});
|
|
380
|
+
ctx = new AudioContext();
|
|
381
|
+
const blob = new Blob([RECORDER_WORKLET], {
|
|
382
|
+
type: "application/javascript"
|
|
383
|
+
});
|
|
384
|
+
const url = URL.createObjectURL(blob);
|
|
385
|
+
await ctx.audioWorklet.addModule(url);
|
|
386
|
+
URL.revokeObjectURL(url);
|
|
387
|
+
source = ctx.createMediaStreamSource(stream);
|
|
388
|
+
node = new AudioWorkletNode(ctx, "recorder-processor");
|
|
389
|
+
node.port.onmessage = (e) => {
|
|
390
|
+
const input = e.data;
|
|
391
|
+
const pcm = new Int16Array(input.length);
|
|
392
|
+
for (let i = 0; i < input.length; i++) {
|
|
393
|
+
const s = Math.max(-1, Math.min(1, input[i] || 0));
|
|
394
|
+
pcm[i] = s < 0 ? s * 32768 : s * 32767;
|
|
395
|
+
}
|
|
396
|
+
chunks.push(pcm);
|
|
397
|
+
};
|
|
398
|
+
source.connect(node);
|
|
399
|
+
props?.onStart?.();
|
|
400
|
+
} catch (error) {
|
|
401
|
+
props?.onError?.(
|
|
402
|
+
error instanceof Error ? error : new Error(String(error))
|
|
403
|
+
);
|
|
404
|
+
cleanup();
|
|
405
|
+
}
|
|
406
|
+
},
|
|
407
|
+
async stop() {
|
|
408
|
+
cleanup();
|
|
409
|
+
const total = chunks.reduce((s, c) => s + c.length, 0);
|
|
410
|
+
const pcm = new Int16Array(total);
|
|
411
|
+
let offset = 0;
|
|
412
|
+
for (const c of chunks) {
|
|
413
|
+
pcm.set(c, offset);
|
|
414
|
+
offset += c.length;
|
|
415
|
+
}
|
|
416
|
+
const result = { pcm, sampleRate: ctx?.sampleRate ?? 16e3 };
|
|
417
|
+
const base64 = await blobToBase64(
|
|
418
|
+
pcmToWav(result.pcm, result.sampleRate)
|
|
419
|
+
);
|
|
420
|
+
props?.onStop?.(base64);
|
|
421
|
+
}
|
|
422
|
+
};
|
|
423
|
+
}
|
|
424
|
+
function pcmToWav(pcm, sampleRate) {
|
|
425
|
+
const buffer = new ArrayBuffer(44 + pcm.length * 2);
|
|
426
|
+
const view = new DataView(buffer);
|
|
427
|
+
const write = (o, s) => {
|
|
428
|
+
for (let i = 0; i < s.length; i++) view.setUint8(o + i, s.charCodeAt(i));
|
|
429
|
+
};
|
|
430
|
+
write(0, "RIFF");
|
|
431
|
+
view.setUint32(4, 36 + pcm.length * 2, true);
|
|
432
|
+
write(8, "WAVE");
|
|
433
|
+
write(12, "fmt ");
|
|
434
|
+
view.setUint32(16, 16, true);
|
|
435
|
+
view.setUint16(20, 1, true);
|
|
436
|
+
view.setUint16(22, 1, true);
|
|
437
|
+
view.setUint32(24, sampleRate, true);
|
|
438
|
+
view.setUint32(28, sampleRate * 2, true);
|
|
439
|
+
view.setUint16(32, 2, true);
|
|
440
|
+
view.setUint16(34, 16, true);
|
|
441
|
+
write(36, "data");
|
|
442
|
+
view.setUint32(40, pcm.length * 2, true);
|
|
443
|
+
for (let i = 0; i < pcm.length; i++) {
|
|
444
|
+
view.setInt16(44 + i * 2, pcm[i] || 0, true);
|
|
445
|
+
}
|
|
446
|
+
return new Blob([buffer], { type: "audio/wav" });
|
|
447
|
+
}
|
|
448
|
+
function blobToBase64(blob) {
|
|
449
|
+
return new Promise((resolve, reject) => {
|
|
450
|
+
const reader = new FileReader();
|
|
451
|
+
reader.onloadend = () => {
|
|
452
|
+
const result = reader.result;
|
|
453
|
+
resolve(result.split(",")[1] || "");
|
|
454
|
+
};
|
|
455
|
+
reader.onerror = reject;
|
|
456
|
+
reader.readAsDataURL(blob);
|
|
457
|
+
});
|
|
458
|
+
}
|
|
459
|
+
var AsrHttpClient = class {
|
|
460
|
+
constructor(config, path) {
|
|
461
|
+
this.recorder = null;
|
|
462
|
+
this.path = "";
|
|
463
|
+
this.recognizing = false;
|
|
464
|
+
this.http = config.http ?? (0, import_http_client.createHttpClient)();
|
|
465
|
+
this.config = config;
|
|
466
|
+
this.path = path;
|
|
467
|
+
}
|
|
468
|
+
async startRecording() {
|
|
469
|
+
if (this.recorder) {
|
|
470
|
+
return;
|
|
471
|
+
}
|
|
472
|
+
const options = {
|
|
473
|
+
onStart: () => {
|
|
474
|
+
this.config.onRecordingStart?.();
|
|
475
|
+
this.config.onStatusChange?.("recording");
|
|
476
|
+
},
|
|
477
|
+
onStop: async (base64) => {
|
|
478
|
+
this.config.onStatusChange?.("recognizing");
|
|
479
|
+
const text = await this.recognizeFile(base64);
|
|
480
|
+
this.config.onResult?.(text);
|
|
481
|
+
this.config.onRecordingStop?.();
|
|
482
|
+
this.config.onStatusChange?.("idle");
|
|
483
|
+
this.recorder = null;
|
|
484
|
+
},
|
|
485
|
+
onError: (err) => {
|
|
486
|
+
this.config.onError?.(err);
|
|
487
|
+
this.config.onStatusChange?.("idle");
|
|
488
|
+
this.recorder = null;
|
|
489
|
+
}
|
|
490
|
+
};
|
|
491
|
+
this.recorder = await (this.config.createRecorder?.(options) ?? createWebRecorder(options));
|
|
492
|
+
await this.recorder.start();
|
|
493
|
+
}
|
|
494
|
+
async stopRecording() {
|
|
495
|
+
if (this.recorder) {
|
|
496
|
+
await this.recorder.stop();
|
|
497
|
+
this.recorder = null;
|
|
498
|
+
} else {
|
|
499
|
+
this.config.onResult?.("");
|
|
500
|
+
this.config.onRecordingStop?.();
|
|
501
|
+
this.config.onStatusChange?.("idle");
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
async recognizeFile(base64) {
|
|
505
|
+
if (this.recognizing) {
|
|
506
|
+
return "";
|
|
507
|
+
}
|
|
508
|
+
this.recognizing = true;
|
|
509
|
+
try {
|
|
510
|
+
const response = await this.http.request({
|
|
511
|
+
url: this.path,
|
|
512
|
+
method: "POST",
|
|
513
|
+
headers: { "Content-Type": "application/json" },
|
|
514
|
+
data: JSON.stringify({
|
|
515
|
+
model: "qwen3-asr-flash",
|
|
516
|
+
messages: [
|
|
517
|
+
{
|
|
518
|
+
role: "user",
|
|
519
|
+
content: [
|
|
520
|
+
{
|
|
521
|
+
type: "input_audio",
|
|
522
|
+
input_audio: { data: `data:audio/wav;base64,${base64}` }
|
|
523
|
+
}
|
|
524
|
+
]
|
|
525
|
+
}
|
|
526
|
+
]
|
|
527
|
+
})
|
|
528
|
+
});
|
|
529
|
+
return response?.data?.choices?.[0]?.message?.content || "";
|
|
530
|
+
} catch (e) {
|
|
531
|
+
console.error("ASR recognition error:", e);
|
|
532
|
+
return "";
|
|
533
|
+
} finally {
|
|
534
|
+
this.recognizing = false;
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
async recordAndRecognize(ms) {
|
|
538
|
+
await this.startRecording();
|
|
539
|
+
await new Promise((r) => setTimeout(r, ms));
|
|
540
|
+
await this.stopRecording();
|
|
541
|
+
}
|
|
542
|
+
async recognizeUrl(url) {
|
|
543
|
+
try {
|
|
544
|
+
const res = await this.http.request({
|
|
545
|
+
url: this.path,
|
|
546
|
+
method: "POST",
|
|
547
|
+
headers: { "Content-Type": "application/json" },
|
|
548
|
+
data: JSON.stringify({
|
|
549
|
+
model: "qwen3-asr-flash",
|
|
550
|
+
messages: [
|
|
551
|
+
{
|
|
552
|
+
role: "user",
|
|
553
|
+
content: [{ type: "input_audio", input_audio: { url } }]
|
|
554
|
+
}
|
|
555
|
+
]
|
|
556
|
+
})
|
|
557
|
+
});
|
|
558
|
+
return res?.data?.choices?.[0]?.message?.content || "";
|
|
559
|
+
} catch (e) {
|
|
560
|
+
console.error("ASR recognition error:", e);
|
|
561
|
+
return "";
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
};
|
|
565
|
+
function createASRHttpClient(config) {
|
|
566
|
+
let path = ASR_HTTP_PATH;
|
|
567
|
+
if (config.getAccessToken) {
|
|
568
|
+
const token = config.getAccessToken();
|
|
569
|
+
if (token) {
|
|
570
|
+
const separator = path.includes("?") ? "&" : "?";
|
|
571
|
+
path = `${path}${separator}token=${encodeURIComponent(token)}`;
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
return new AsrHttpClient(config, path);
|
|
575
|
+
}
|
|
576
|
+
var http_asr_client_default = (authConfig) => (config) => createASRHttpClient({ ...authConfig, ...config });
|
|
230
577
|
// Annotate the CommonJS export names for ESM import in node:
|
|
231
578
|
0 && (module.exports = {
|
|
232
579
|
createASRClient,
|
|
233
|
-
|
|
234
|
-
listen
|
|
580
|
+
createASRHttpClient
|
|
235
581
|
});
|
|
236
582
|
//# sourceMappingURL=index.cjs.map
|