@amaster.ai/asr-client 1.1.0-beta.7 → 1.1.0-beta.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -20,18 +20,21 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
20
20
  // src/index.ts
21
21
  var index_exports = {};
22
22
  __export(index_exports, {
23
- createASRClient: () => createASRClient,
24
- createAsrClient: () => createASRClient
23
+ createASRClient: () => asr_client_default,
24
+ createASRHttpClient: () => http_asr_client_default
25
25
  });
26
26
  module.exports = __toCommonJS(index_exports);
27
27
 
28
28
  // src/asr-client.ts
29
29
  var ASR_PATH = "/api/proxy/builtin/platform/qwen-asr-realtime/api-ws/v1/realtime";
30
30
  async function createRealtimeRecorder() {
31
- let stream;
32
- let ctx;
33
- let source;
34
- let processor;
31
+ let stream = null;
32
+ let ctx = null;
33
+ let source = null;
34
+ let processor = null;
35
+ if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
36
+ throw new Error("\u672A\u627E\u5230\u9EA6\u514B\u98CE\u6216\u65E0\u6743\u9650");
37
+ }
35
38
  return {
36
39
  async start(onAudio) {
37
40
  stream = await navigator.mediaDevices.getUserMedia({
@@ -63,60 +66,191 @@ async function createRealtimeRecorder() {
63
66
  stream?.getTracks().forEach((t) => t.stop());
64
67
  source?.disconnect();
65
68
  processor?.disconnect();
66
- await ctx?.close();
69
+ if (ctx) {
70
+ await ctx.close();
71
+ }
72
+ stream = null;
73
+ ctx = null;
74
+ source = null;
75
+ processor = null;
67
76
  }
68
77
  };
69
78
  }
70
79
  var log = (message, type = "") => {
71
80
  console.log(`[${type}]`, message);
72
81
  };
82
+ var eventIdCounter = 0;
83
+ function generateEventId() {
84
+ return `event_${Date.now()}_${++eventIdCounter}`;
85
+ }
73
86
  function createASRClient(config) {
74
87
  const {
75
88
  onReady,
76
89
  onSpeechStart,
77
90
  onSpeechEnd,
78
91
  onTranscript,
92
+ onAudioBufferCommitted,
93
+ onSessionFinished,
79
94
  onError,
80
95
  onClose,
81
- path = ASR_PATH
96
+ getAccessToken,
97
+ audioFormat = "pcm",
98
+ sampleRate = 16e3,
99
+ language = "zh",
100
+ enableVAD = true,
101
+ vadThreshold = 0.2,
102
+ vadSilenceDurationMs = 400
82
103
  } = config;
83
104
  let ws = null;
84
105
  let recorder = null;
106
+ let isRecordingFlag = false;
107
+ let isClosing = false;
108
+ const path = ASR_PATH;
109
+ function sendEvent(event) {
110
+ if (!ws || ws.readyState !== WebSocket.OPEN) {
111
+ throw new Error("WebSocket not connected");
112
+ }
113
+ ws.send(JSON.stringify(event));
114
+ }
115
+ function buildSessionConfig() {
116
+ const sessionConfig = {
117
+ input_audio_format: audioFormat,
118
+ sample_rate: sampleRate,
119
+ input_audio_transcription: {
120
+ language
121
+ }
122
+ };
123
+ if (enableVAD) {
124
+ sessionConfig.turn_detection = {
125
+ type: "server_vad",
126
+ threshold: vadThreshold,
127
+ silence_duration_ms: vadSilenceDurationMs
128
+ };
129
+ } else {
130
+ sessionConfig.turn_detection = null;
131
+ }
132
+ return sessionConfig;
133
+ }
134
+ function sendSessionUpdate() {
135
+ const event = {
136
+ event_id: generateEventId(),
137
+ type: "session.update",
138
+ session: buildSessionConfig()
139
+ };
140
+ sendEvent(event);
141
+ }
142
+ function sendAudioBufferAppend(audio) {
143
+ const event = {
144
+ event_id: generateEventId(),
145
+ type: "input_audio_buffer.append",
146
+ audio
147
+ };
148
+ sendEvent(event);
149
+ }
150
+ function sendAudioBufferCommit() {
151
+ const event = {
152
+ event_id: generateEventId(),
153
+ type: "input_audio_buffer.commit"
154
+ };
155
+ sendEvent(event);
156
+ }
157
+ function sendSessionFinish() {
158
+ const event = {
159
+ event_id: generateEventId(),
160
+ type: "session.finish"
161
+ };
162
+ sendEvent(event);
163
+ }
164
+ function handleServerEvent(data) {
165
+ switch (data.type) {
166
+ case "session.created":
167
+ try {
168
+ sendSessionUpdate();
169
+ } catch (err2) {
170
+ onError?.(
171
+ new Error(
172
+ "Failed to send session.update: " + (err2 instanceof Error ? err2.message : String(err2))
173
+ )
174
+ );
175
+ }
176
+ break;
177
+ case "session.updated":
178
+ onReady?.();
179
+ break;
180
+ case "input_audio_buffer.speech_started":
181
+ onSpeechStart?.();
182
+ break;
183
+ case "input_audio_buffer.speech_stopped":
184
+ onSpeechEnd?.();
185
+ break;
186
+ case "input_audio_buffer.committed":
187
+ onAudioBufferCommitted?.();
188
+ break;
189
+ case "conversation.item.input_audio_transcription.text":
190
+ onTranscript?.(data.text || data.stash || data.transcript || "", false);
191
+ break;
192
+ case "conversation.item.input_audio_transcription.completed":
193
+ onTranscript?.(data.text || data.transcript || "", true);
194
+ break;
195
+ case "session.finished":
196
+ onSessionFinished?.();
197
+ close();
198
+ break;
199
+ case "error":
200
+ const err = new Error(data.error?.message || "ASR error");
201
+ onError?.(err);
202
+ break;
203
+ default:
204
+ console.warn("[ASR] Unknown server event:", data.type);
205
+ }
206
+ }
85
207
  async function connect() {
86
- ws = new WebSocket(path);
208
+ let wsUrl = path;
209
+ if (getAccessToken) {
210
+ const token = getAccessToken();
211
+ if (token) {
212
+ const separator = path.includes("?") ? "&" : "?";
213
+ wsUrl = `${path}${separator}token=${encodeURIComponent(token)}`;
214
+ }
215
+ }
216
+ if (typeof window !== "undefined" && window.location) {
217
+ const protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
218
+ if (!wsUrl.startsWith("ws://") && !wsUrl.startsWith("wss://")) {
219
+ wsUrl = `${protocol}//${window.location.host}${wsUrl}`;
220
+ }
221
+ }
222
+ ws = new WebSocket(wsUrl);
87
223
  return new Promise((resolve, reject) => {
224
+ if (!ws) {
225
+ reject(new Error("Failed to create WebSocket"));
226
+ return;
227
+ }
228
+ ws.onopen = () => {
229
+ log("WebSocket connected", "success");
230
+ };
88
231
  ws.onmessage = (event) => {
89
- const data = JSON.parse(event.data);
90
- if (data.type === "session.created") {
91
- onReady?.();
92
- resolve();
93
- }
94
- if (data.type === "input_audio_buffer.speech_started") {
95
- onSpeechStart?.();
96
- }
97
- if (data.type === "input_audio_buffer.speech_stopped") {
98
- onSpeechEnd?.();
99
- }
100
- if (data.type === "conversation.item.input_audio_transcription.text") {
101
- onTranscript?.(data.text || data.stash || data.transcript || "", false);
102
- }
103
- if (data.type === "conversation.item.input_audio_transcription.completed") {
104
- onTranscript?.(data.text || data.transcript || "", true);
105
- }
106
- if (data.type === "error") {
107
- const err = new Error(data.error?.message || "ASR error");
108
- onError?.(err);
109
- reject(err);
232
+ try {
233
+ const data = JSON.parse(event.data);
234
+ handleServerEvent(data);
235
+ if (data.type === "session.updated") {
236
+ resolve();
237
+ }
238
+ } catch (err) {
239
+ const error = new Error(
240
+ "Failed to parse server message: " + (err instanceof Error ? err.message : String(err))
241
+ );
242
+ onError?.(error);
243
+ reject(error);
110
244
  }
111
245
  };
112
- ws.onerror = () => {
246
+ ws.onerror = (error) => {
247
+ console.error("WebSocket error:", error);
113
248
  const err = new Error("WebSocket error");
114
249
  onError?.(err);
115
250
  reject(err);
116
251
  };
117
252
  ws.onclose = () => {
118
- recorder?.stop();
119
- recorder = null;
253
+ isRecordingFlag = false;
120
254
  ws = null;
121
255
  onClose?.();
122
256
  };
@@ -126,42 +260,323 @@ function createASRClient(config) {
126
260
  if (!ws || ws.readyState !== WebSocket.OPEN) {
127
261
  throw new Error("WebSocket not connected");
128
262
  }
129
- recorder = await createRealtimeRecorder();
130
- await recorder.start((audio) => {
131
- if (!ws || ws.readyState !== WebSocket.OPEN) return;
132
- ws.send(
133
- JSON.stringify({
134
- type: "input_audio_buffer.append",
135
- audio
136
- })
137
- );
138
- });
263
+ if (isRecordingFlag) {
264
+ return;
265
+ }
266
+ try {
267
+ recorder = await createRealtimeRecorder();
268
+ isRecordingFlag = true;
269
+ await recorder.start((audio) => {
270
+ if (!ws || ws.readyState !== WebSocket.OPEN) return;
271
+ try {
272
+ sendAudioBufferAppend(audio);
273
+ } catch (err) {
274
+ console.error("[ASR] Failed to send audio:", err);
275
+ }
276
+ });
277
+ } catch (err) {
278
+ console.error("[ASR] Failed to start recorder:", err);
279
+ onError?.(err instanceof Error ? err : new Error(String(err)));
280
+ throw err;
281
+ }
139
282
  }
140
283
  async function stopRecording() {
284
+ if (!isRecordingFlag) {
285
+ return;
286
+ }
141
287
  try {
142
288
  await recorder?.stop();
143
289
  } catch (err) {
290
+ console.error("[ASR] Error stopping recorder:", err);
144
291
  }
145
292
  recorder = null;
146
- if (ws && ws.readyState === WebSocket.OPEN) {
147
- ws.send(JSON.stringify({ type: "input_audio_buffer.commit" }));
293
+ isRecordingFlag = false;
294
+ if (!enableVAD && ws?.readyState === WebSocket.OPEN) {
295
+ try {
296
+ sendAudioBufferCommit();
297
+ } catch (err) {
298
+ console.error("[ASR] Failed to send commit:", err);
299
+ }
148
300
  }
149
301
  }
150
- function close() {
151
- stopRecording();
152
- ws?.close();
302
+ async function close() {
303
+ if (isClosing) {
304
+ return;
305
+ }
306
+ isClosing = true;
307
+ await stopRecording();
308
+ if (ws?.readyState === WebSocket.OPEN) {
309
+ try {
310
+ sendSessionFinish();
311
+ await new Promise((resolve) => setTimeout(resolve, 1e3));
312
+ } catch (err) {
313
+ console.error("[ASR] Failed to send session.finish:", err);
314
+ }
315
+ }
316
+ if (ws && ws?.readyState !== WebSocket.CLOSING && ws?.readyState !== WebSocket.CLOSED) {
317
+ ws?.close();
318
+ }
153
319
  ws = null;
320
+ isClosing = false;
321
+ }
322
+ function isRecording() {
323
+ return isRecordingFlag;
324
+ }
325
+ function isConnected() {
326
+ return ws !== null && ws.readyState === WebSocket.OPEN;
154
327
  }
155
328
  return {
156
329
  connect,
157
330
  startRecording,
158
331
  stopRecording,
159
- close
332
+ close,
333
+ isRecording,
334
+ isConnected
160
335
  };
161
336
  }
337
+ var asr_client_default = (authConfig) => (config) => createASRClient({ ...authConfig, ...config });
338
+
339
+ // src/http-asr-client.ts
340
+ var import_http_client = require("@amaster.ai/http-client");
341
+ var ASR_HTTP_PATH = "/api/proxy/builtin/platform/qwen-asr/compatible-mode/v1/chat/completions";
342
+ var RECORDER_WORKLET = `
343
+ class RecorderProcessor extends AudioWorkletProcessor {
344
+ process(inputs) {
345
+ const input = inputs[0];
346
+ if (input && input[0]) {
347
+ this.port.postMessage(input[0].slice(0));
348
+ }
349
+ return true;
350
+ }
351
+ }
352
+ registerProcessor('recorder-processor', RecorderProcessor);
353
+ `;
354
+ async function createWebRecorder(props) {
355
+ let stream;
356
+ let ctx;
357
+ let node;
358
+ let source;
359
+ const chunks = [];
360
+ const cleanup = () => {
361
+ try {
362
+ source?.disconnect();
363
+ node?.disconnect();
364
+ stream?.getTracks().forEach((t) => t.stop());
365
+ ctx?.close();
366
+ } catch (e) {
367
+ }
368
+ };
369
+ return {
370
+ async start() {
371
+ try {
372
+ stream = await navigator.mediaDevices.getUserMedia({
373
+ audio: {
374
+ channelCount: 1,
375
+ echoCancellation: true,
376
+ noiseSuppression: true,
377
+ autoGainControl: true
378
+ }
379
+ });
380
+ ctx = new AudioContext();
381
+ const blob = new Blob([RECORDER_WORKLET], {
382
+ type: "application/javascript"
383
+ });
384
+ const url = URL.createObjectURL(blob);
385
+ await ctx.audioWorklet.addModule(url);
386
+ URL.revokeObjectURL(url);
387
+ source = ctx.createMediaStreamSource(stream);
388
+ node = new AudioWorkletNode(ctx, "recorder-processor");
389
+ node.port.onmessage = (e) => {
390
+ const input = e.data;
391
+ const pcm = new Int16Array(input.length);
392
+ for (let i = 0; i < input.length; i++) {
393
+ const s = Math.max(-1, Math.min(1, input[i] || 0));
394
+ pcm[i] = s < 0 ? s * 32768 : s * 32767;
395
+ }
396
+ chunks.push(pcm);
397
+ };
398
+ source.connect(node);
399
+ props?.onStart?.();
400
+ } catch (error) {
401
+ props?.onError?.(
402
+ error instanceof Error ? error : new Error(String(error))
403
+ );
404
+ cleanup();
405
+ }
406
+ },
407
+ async stop() {
408
+ cleanup();
409
+ const total = chunks.reduce((s, c) => s + c.length, 0);
410
+ const pcm = new Int16Array(total);
411
+ let offset = 0;
412
+ for (const c of chunks) {
413
+ pcm.set(c, offset);
414
+ offset += c.length;
415
+ }
416
+ const result = { pcm, sampleRate: ctx?.sampleRate ?? 16e3 };
417
+ const base64 = await blobToBase64(
418
+ pcmToWav(result.pcm, result.sampleRate)
419
+ );
420
+ props?.onStop?.(base64);
421
+ }
422
+ };
423
+ }
424
+ function pcmToWav(pcm, sampleRate) {
425
+ const buffer = new ArrayBuffer(44 + pcm.length * 2);
426
+ const view = new DataView(buffer);
427
+ const write = (o, s) => {
428
+ for (let i = 0; i < s.length; i++) view.setUint8(o + i, s.charCodeAt(i));
429
+ };
430
+ write(0, "RIFF");
431
+ view.setUint32(4, 36 + pcm.length * 2, true);
432
+ write(8, "WAVE");
433
+ write(12, "fmt ");
434
+ view.setUint32(16, 16, true);
435
+ view.setUint16(20, 1, true);
436
+ view.setUint16(22, 1, true);
437
+ view.setUint32(24, sampleRate, true);
438
+ view.setUint32(28, sampleRate * 2, true);
439
+ view.setUint16(32, 2, true);
440
+ view.setUint16(34, 16, true);
441
+ write(36, "data");
442
+ view.setUint32(40, pcm.length * 2, true);
443
+ for (let i = 0; i < pcm.length; i++) {
444
+ view.setInt16(44 + i * 2, pcm[i] || 0, true);
445
+ }
446
+ return new Blob([buffer], { type: "audio/wav" });
447
+ }
448
+ function blobToBase64(blob) {
449
+ return new Promise((resolve, reject) => {
450
+ const reader = new FileReader();
451
+ reader.onloadend = () => {
452
+ const result = reader.result;
453
+ resolve(result.split(",")[1] || "");
454
+ };
455
+ reader.onerror = reject;
456
+ reader.readAsDataURL(blob);
457
+ });
458
+ }
459
+ var AsrHttpClient = class {
460
+ constructor(config, path) {
461
+ this.recorder = null;
462
+ this.path = "";
463
+ this.recognizing = false;
464
+ this.http = config.http ?? (0, import_http_client.createHttpClient)();
465
+ this.config = config;
466
+ this.path = path;
467
+ }
468
+ async startRecording() {
469
+ if (this.recorder) {
470
+ return;
471
+ }
472
+ const options = {
473
+ onStart: () => {
474
+ this.config.onRecordingStart?.();
475
+ this.config.onStatusChange?.("recording");
476
+ },
477
+ onStop: async (base64) => {
478
+ this.config.onStatusChange?.("recognizing");
479
+ const text = await this.recognizeFile(base64);
480
+ this.config.onResult?.(text);
481
+ this.config.onRecordingStop?.();
482
+ this.config.onStatusChange?.("idle");
483
+ this.recorder = null;
484
+ },
485
+ onError: (err) => {
486
+ this.config.onError?.(err);
487
+ this.config.onStatusChange?.("idle");
488
+ this.recorder = null;
489
+ }
490
+ };
491
+ this.recorder = await (this.config.createRecorder?.(options) ?? createWebRecorder(options));
492
+ await this.recorder.start();
493
+ }
494
+ async stopRecording() {
495
+ if (this.recorder) {
496
+ await this.recorder.stop();
497
+ this.recorder = null;
498
+ } else {
499
+ this.config.onResult?.("");
500
+ this.config.onRecordingStop?.();
501
+ this.config.onStatusChange?.("idle");
502
+ }
503
+ }
504
+ async recognizeFile(base64) {
505
+ if (this.recognizing) {
506
+ return "";
507
+ }
508
+ this.recognizing = true;
509
+ try {
510
+ const response = await this.http.request({
511
+ url: this.path,
512
+ method: "POST",
513
+ headers: { "Content-Type": "application/json" },
514
+ data: JSON.stringify({
515
+ model: "qwen3-asr-flash",
516
+ messages: [
517
+ {
518
+ role: "user",
519
+ content: [
520
+ {
521
+ type: "input_audio",
522
+ input_audio: { data: `data:audio/wav;base64,${base64}` }
523
+ }
524
+ ]
525
+ }
526
+ ]
527
+ })
528
+ });
529
+ return response?.data?.choices?.[0]?.message?.content || "";
530
+ } catch (e) {
531
+ console.error("ASR recognition error:", e);
532
+ return "";
533
+ } finally {
534
+ this.recognizing = false;
535
+ }
536
+ }
537
+ async recordAndRecognize(ms) {
538
+ await this.startRecording();
539
+ await new Promise((r) => setTimeout(r, ms));
540
+ await this.stopRecording();
541
+ }
542
+ async recognizeUrl(url) {
543
+ try {
544
+ const res = await this.http.request({
545
+ url: this.path,
546
+ method: "POST",
547
+ headers: { "Content-Type": "application/json" },
548
+ data: JSON.stringify({
549
+ model: "qwen3-asr-flash",
550
+ messages: [
551
+ {
552
+ role: "user",
553
+ content: [{ type: "input_audio", input_audio: { url } }]
554
+ }
555
+ ]
556
+ })
557
+ });
558
+ return res?.data?.choices?.[0]?.message?.content || "";
559
+ } catch (e) {
560
+ console.error("ASR recognition error:", e);
561
+ return "";
562
+ }
563
+ }
564
+ };
565
+ function createASRHttpClient(config) {
566
+ let path = ASR_HTTP_PATH;
567
+ if (config.getAccessToken) {
568
+ const token = config.getAccessToken();
569
+ if (token) {
570
+ const separator = path.includes("?") ? "&" : "?";
571
+ path = `${path}${separator}token=${encodeURIComponent(token)}`;
572
+ }
573
+ }
574
+ return new AsrHttpClient(config, path);
575
+ }
576
+ var http_asr_client_default = (authConfig) => (config) => createASRHttpClient({ ...authConfig, ...config });
162
577
  // Annotate the CommonJS export names for ESM import in node:
163
578
  0 && (module.exports = {
164
579
  createASRClient,
165
- createAsrClient
580
+ createASRHttpClient
166
581
  });
167
582
  //# sourceMappingURL=index.cjs.map