@amaster.ai/asr-client 1.0.0-beta.7 → 1.1.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +125 -84
- package/dist/index.cjs +69 -138
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +6 -10
- package/dist/index.d.ts +6 -10
- package/dist/index.js +68 -136
- package/dist/index.js.map +1 -1
- package/package.json +45 -45
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Amaster Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -1,131 +1,172 @@
|
|
|
1
|
-
#
|
|
1
|
+
# ASR Realtime WebSocket Client SDK
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
基于 Web Audio + WebSocket 的 **实时语音识别(ASR)客户端 SDK**,用于将浏览器麦克风音频实时发送到 ASR 服务(如 Qwen ASR Realtime),并接收实时/最终转写结果。
|
|
4
|
+
|
|
5
|
+
---
|
|
4
6
|
|
|
5
7
|
## 特性
|
|
6
8
|
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
- ✅ 基于实际调试验证的实现
|
|
9
|
+
- 🎙 浏览器麦克风实时采集(16kHz / 单声道)
|
|
10
|
+
- 🔁 实时音频流式发送(Base64 PCM16)
|
|
11
|
+
- 🧠 支持语音开始 / 结束事件
|
|
12
|
+
- ✍️ 支持中间结果与最终转写结果
|
|
13
|
+
- 🔌 简单的 WebSocket 生命周期管理
|
|
13
14
|
|
|
14
|
-
|
|
15
|
+
---
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
## 安装与环境要求
|
|
18
|
+
|
|
19
|
+
### 浏览器要求
|
|
20
|
+
|
|
21
|
+
- 支持 `getUserMedia`
|
|
22
|
+
- 支持 `AudioContext`
|
|
23
|
+
- 支持 `WebSocket`
|
|
24
|
+
|
|
25
|
+
推荐使用 **Chrome / Edge 最新版本**。
|
|
26
|
+
|
|
27
|
+
---
|
|
19
28
|
|
|
20
29
|
## 快速开始
|
|
21
30
|
|
|
22
|
-
|
|
23
|
-
import { createASRClient } from '@amaster.ai/asr-client';
|
|
31
|
+
### 1️⃣ 创建 ASR Client
|
|
24
32
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
gatewayUrl: 'ws://www.appok.ai/api/proxy/builtin/platform/qwen-asr-realtime/api-ws/v1/realtime',
|
|
28
|
-
language: 'zh',
|
|
29
|
-
});
|
|
33
|
+
```ts
|
|
34
|
+
import { createASRClient } from "./asr-client";
|
|
30
35
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
}
|
|
36
|
+
const client = createASRClient({
|
|
37
|
+
onReady() {
|
|
38
|
+
console.log("ASR 连接成功");
|
|
39
|
+
},
|
|
35
40
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
}
|
|
41
|
+
onSpeechStart() {
|
|
42
|
+
console.log("检测到说话开始");
|
|
43
|
+
},
|
|
39
44
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
}
|
|
45
|
+
onSpeechEnd() {
|
|
46
|
+
console.log("检测到说话结束");
|
|
47
|
+
},
|
|
48
|
+
|
|
49
|
+
onTranscript(text, isFinal) {
|
|
50
|
+
console.log(isFinal ? "最终结果:" : "实时结果:", text);
|
|
51
|
+
},
|
|
43
52
|
|
|
44
|
-
|
|
45
|
-
|
|
53
|
+
onError(err) {
|
|
54
|
+
console.error("ASR 错误", err);
|
|
55
|
+
},
|
|
56
|
+
|
|
57
|
+
onClose() {
|
|
58
|
+
console.log("连接已关闭");
|
|
59
|
+
},
|
|
46
60
|
});
|
|
61
|
+
```
|
|
47
62
|
|
|
48
|
-
|
|
49
|
-
await asr.connect();
|
|
50
|
-
await asr.startRecording(); // 自动请求麦克风权限
|
|
63
|
+
---
|
|
51
64
|
|
|
52
|
-
|
|
53
|
-
asr.stopRecording();
|
|
65
|
+
### 2️⃣ 建立 WebSocket 连接
|
|
54
66
|
|
|
55
|
-
|
|
56
|
-
|
|
67
|
+
```ts
|
|
68
|
+
await client.connect();
|
|
57
69
|
```
|
|
58
70
|
|
|
59
|
-
|
|
71
|
+
连接成功后会触发 `onReady` 回调。
|
|
60
72
|
|
|
61
|
-
|
|
73
|
+
---
|
|
62
74
|
|
|
63
|
-
|
|
75
|
+
### 3️⃣ 开始录音并实时识别
|
|
64
76
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
- `audioFormat`: 音频格式,默认 `'pcm16'`
|
|
69
|
-
- `sampleRate`: 采样率,默认 `16000`
|
|
77
|
+
```ts
|
|
78
|
+
await client.startRecording();
|
|
79
|
+
```
|
|
70
80
|
|
|
71
|
-
|
|
81
|
+
* 自动请求麦克风权限
|
|
82
|
+
* 自动开始推送音频流
|
|
83
|
+
* 服务端会持续返回实时转写结果
|
|
72
84
|
|
|
73
|
-
|
|
85
|
+
---
|
|
74
86
|
|
|
75
|
-
|
|
87
|
+
### 4️⃣ 停止录音
|
|
76
88
|
|
|
77
|
-
|
|
89
|
+
```ts
|
|
90
|
+
client.stopRecording();
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
* 停止麦克风采集
|
|
94
|
+
* 向服务端发送 `input_audio_buffer.commit`
|
|
95
|
+
* 触发最终转写结果
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
### 5️⃣ 关闭连接
|
|
100
|
+
|
|
101
|
+
```ts
|
|
102
|
+
client.close();
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
---
|
|
78
106
|
|
|
79
|
-
|
|
107
|
+
## API 说明
|
|
80
108
|
|
|
81
|
-
|
|
109
|
+
### `createASRClient(config): ASRClient`
|
|
110
|
+
|
|
111
|
+
#### `ASRClientConfig`
|
|
112
|
+
|
|
113
|
+
| 参数 | 类型 | 说明 |
|
|
114
|
+
| --------------- | ------------------------------------------ | -------------- |
|
|
115
|
+
| `onReady` | `() => void` | 会话创建完成 |
|
|
116
|
+
| `onSpeechStart` | `() => void` | 检测到语音开始 |
|
|
117
|
+
| `onSpeechEnd` | `() => void` | 检测到语音结束 |
|
|
118
|
+
| `onTranscript` | `(text: string, isFinal: boolean) => void` | 转写回调 |
|
|
119
|
+
| `onError` | `(error: Error) => void` | 错误回调 |
|
|
120
|
+
| `onClose` | `() => void` | 连接关闭回调 |
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
### `ASRClient`
|
|
125
|
+
|
|
126
|
+
```ts
|
|
127
|
+
interface ASRClient {
|
|
128
|
+
connect(): Promise<void>;
|
|
129
|
+
startRecording(): Promise<void>;
|
|
130
|
+
stopRecording(): void;
|
|
131
|
+
close(): void;
|
|
132
|
+
}
|
|
133
|
+
```
|
|
82
134
|
|
|
83
|
-
|
|
135
|
+
---
|
|
84
136
|
|
|
85
|
-
|
|
137
|
+
## 音频参数说明
|
|
86
138
|
|
|
87
|
-
|
|
139
|
+
* **采样率**:16000 Hz
|
|
140
|
+
* **声道**:单声道
|
|
141
|
+
* **格式**:PCM16 → Base64
|
|
142
|
+
* **缓冲大小**:4096 frames
|
|
88
143
|
|
|
89
|
-
|
|
90
|
-
- `connected`: WebSocket 连接建立
|
|
91
|
-
- `session-created`: 会话创建成功
|
|
92
|
-
- `recording-started`: 录音已开始
|
|
93
|
-
- `speech-started`: VAD 检测到语音开始
|
|
94
|
-
- `speech-stopped`: VAD 检测到语音停止
|
|
95
|
-
- `transcript-partial`: 中间识别结果(实时更新)
|
|
96
|
-
- `transcript-final`: 最终识别结果(确认)
|
|
97
|
-
- `error`: 发生错误
|
|
98
|
-
- `closed`: 连接关闭
|
|
144
|
+
---
|
|
99
145
|
|
|
100
|
-
|
|
146
|
+
## 常见问题
|
|
101
147
|
|
|
102
|
-
|
|
148
|
+
### Q: 为什么必须是 16kHz?
|
|
103
149
|
|
|
104
|
-
|
|
150
|
+
ASR 服务通常要求 16kHz PCM 输入,否则会影响识别效果或直接报错。
|
|
105
151
|
|
|
106
|
-
|
|
152
|
+
---
|
|
107
153
|
|
|
108
|
-
|
|
109
|
-
- 格式:PCM 16-bit Mono
|
|
110
|
-
- 编码:Base64
|
|
111
|
-
- 发送频率:约每秒 4 次(每次 4096 采样点)
|
|
154
|
+
### Q: 支持移动端吗?
|
|
112
155
|
|
|
113
|
-
|
|
156
|
+
支持,但需注意:
|
|
114
157
|
|
|
115
|
-
|
|
116
|
-
|
|
158
|
+
* iOS Safari 需用户手势触发录音
|
|
159
|
+
* 后台会自动暂停音频采集
|
|
117
160
|
|
|
118
|
-
|
|
161
|
+
---
|
|
119
162
|
|
|
120
|
-
|
|
121
|
-
- `conversation.item.input_audio_transcription.text` - 中间结果
|
|
122
|
-
- `conversation.item.input_audio_transcription.completed` - 最终结果
|
|
163
|
+
## 注意事项
|
|
123
164
|
|
|
124
|
-
|
|
165
|
+
* WebSocket 必须在 HTTPS 页面下使用麦克风
|
|
166
|
+
* 页面关闭前建议调用 `client.close()`
|
|
167
|
+
* 不建议在多个 ASR Client 实例中共享麦克风
|
|
125
168
|
|
|
126
|
-
|
|
127
|
-
- `input_audio_buffer.speech_started` - 开始说话
|
|
128
|
-
- `input_audio_buffer.speech_stopped` - 停止说话
|
|
169
|
+
---
|
|
129
170
|
|
|
130
171
|
## License
|
|
131
172
|
|
package/dist/index.cjs
CHANGED
|
@@ -21,100 +21,70 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
21
21
|
var index_exports = {};
|
|
22
22
|
__export(index_exports, {
|
|
23
23
|
createASRClient: () => createASRClient,
|
|
24
|
-
createAsrClient: () => createASRClient
|
|
25
|
-
listen: () => listen
|
|
24
|
+
createAsrClient: () => createASRClient
|
|
26
25
|
});
|
|
27
26
|
module.exports = __toCommonJS(index_exports);
|
|
28
27
|
|
|
29
28
|
// src/asr-client.ts
|
|
30
29
|
var ASR_PATH = "/api/proxy/builtin/platform/qwen-asr-realtime/api-ws/v1/realtime";
|
|
31
|
-
async function
|
|
32
|
-
|
|
33
|
-
let
|
|
34
|
-
let
|
|
35
|
-
let processor
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
30
|
+
async function createRealtimeRecorder() {
|
|
31
|
+
let stream;
|
|
32
|
+
let ctx;
|
|
33
|
+
let source;
|
|
34
|
+
let processor;
|
|
35
|
+
return {
|
|
36
|
+
async start(onAudio) {
|
|
37
|
+
stream = await navigator.mediaDevices.getUserMedia({
|
|
38
|
+
audio: { sampleRate: 16e3, channelCount: 1, echoCancellation: true }
|
|
39
|
+
});
|
|
40
|
+
log("\u2705 \u9EA6\u514B\u98CE\u5DF2\u542F\u52A8", "success");
|
|
41
|
+
log("\u{1F4AC} \u8BF7\u5BF9\u7740\u9EA6\u514B\u98CE\u8BF4\u8BDD\uFF0C\u5B9E\u65F6\u8BC6\u522B\u4E2D...", "success");
|
|
42
|
+
ctx = new AudioContext({ sampleRate: 16e3 });
|
|
43
|
+
source = ctx.createMediaStreamSource(stream);
|
|
44
|
+
processor = ctx.createScriptProcessor(4096, 1, 1);
|
|
45
|
+
processor.onaudioprocess = (e) => {
|
|
46
|
+
const inputData = e.inputBuffer.getChannelData(0);
|
|
47
|
+
const pcm = new Int16Array(inputData.length);
|
|
48
|
+
for (let i = 0; i < inputData.length; i++) {
|
|
49
|
+
const s = Math.max(-1, Math.min(1, inputData[i] || 0));
|
|
50
|
+
pcm[i] = s < 0 ? s * 32768 : s * 32767;
|
|
51
|
+
}
|
|
52
|
+
const bytes = new Uint8Array(pcm.buffer);
|
|
53
|
+
let binary = "";
|
|
54
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
55
|
+
binary += String.fromCharCode(bytes[i] || 0);
|
|
56
|
+
}
|
|
57
|
+
onAudio(btoa(binary));
|
|
58
|
+
};
|
|
59
|
+
source.connect(processor);
|
|
60
|
+
processor.connect(ctx.destination);
|
|
61
|
+
},
|
|
62
|
+
async stop() {
|
|
63
|
+
stream?.getTracks().forEach((t) => t.stop());
|
|
64
|
+
source?.disconnect();
|
|
65
|
+
processor?.disconnect();
|
|
66
|
+
await ctx?.close();
|
|
52
67
|
}
|
|
53
68
|
};
|
|
54
|
-
return new Promise((resolve, reject) => {
|
|
55
|
-
ws.onmessage = async (event) => {
|
|
56
|
-
const data = JSON.parse(event.data);
|
|
57
|
-
if (data.type === "session.created") {
|
|
58
|
-
try {
|
|
59
|
-
mediaStream = await navigator.mediaDevices.getUserMedia({
|
|
60
|
-
audio: { sampleRate: 16e3, channelCount: 1, echoCancellation: true }
|
|
61
|
-
});
|
|
62
|
-
audioContext = new AudioContext({ sampleRate: 16e3 });
|
|
63
|
-
const source = audioContext.createMediaStreamSource(mediaStream);
|
|
64
|
-
processor = audioContext.createScriptProcessor(4096, 1, 1);
|
|
65
|
-
processor.onaudioprocess = (e) => {
|
|
66
|
-
if (ws.readyState !== WebSocket.OPEN) return;
|
|
67
|
-
const input = e.inputBuffer.getChannelData(0);
|
|
68
|
-
const pcm = new Int16Array(input.length);
|
|
69
|
-
for (let i = 0; i < input.length; i++) {
|
|
70
|
-
const s = Math.max(-1, Math.min(1, input[i]));
|
|
71
|
-
pcm[i] = s < 0 ? s * 32768 : s * 32767;
|
|
72
|
-
}
|
|
73
|
-
const bytes = new Uint8Array(pcm.buffer);
|
|
74
|
-
let binary = "";
|
|
75
|
-
for (let i = 0; i < bytes.length; i++) binary += String.fromCharCode(bytes[i]);
|
|
76
|
-
ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: btoa(binary) }));
|
|
77
|
-
};
|
|
78
|
-
source.connect(processor);
|
|
79
|
-
processor.connect(audioContext.destination);
|
|
80
|
-
resolve(stop);
|
|
81
|
-
} catch (err) {
|
|
82
|
-
reject(err);
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
if (data.type === "conversation.item.input_audio_transcription.text") {
|
|
86
|
-
onTranscript(data.text || "", false);
|
|
87
|
-
}
|
|
88
|
-
if (data.type === "conversation.item.input_audio_transcription.completed") {
|
|
89
|
-
onTranscript(data.text || data.transcript || "", true);
|
|
90
|
-
}
|
|
91
|
-
if (data.type === "error") {
|
|
92
|
-
stop();
|
|
93
|
-
reject(new Error(data.error?.message || "ASR error"));
|
|
94
|
-
}
|
|
95
|
-
};
|
|
96
|
-
ws.onerror = () => reject(new Error("ASR connection failed"));
|
|
97
|
-
});
|
|
98
69
|
}
|
|
70
|
+
var log = (message, type = "") => {
|
|
71
|
+
console.log(`[${type}]`, message);
|
|
72
|
+
};
|
|
99
73
|
function createASRClient(config) {
|
|
100
74
|
const {
|
|
101
|
-
// audioFormat = 'pcm16',
|
|
102
|
-
sampleRate = 16e3,
|
|
103
75
|
onReady,
|
|
104
76
|
onSpeechStart,
|
|
105
77
|
onSpeechEnd,
|
|
106
78
|
onTranscript,
|
|
107
|
-
onError
|
|
79
|
+
onError,
|
|
80
|
+
onClose,
|
|
81
|
+
path = ASR_PATH
|
|
108
82
|
} = config;
|
|
109
83
|
let ws = null;
|
|
110
|
-
let
|
|
111
|
-
let audioContext = null;
|
|
112
|
-
let processor = null;
|
|
84
|
+
let recorder = null;
|
|
113
85
|
async function connect() {
|
|
86
|
+
ws = new WebSocket(path);
|
|
114
87
|
return new Promise((resolve, reject) => {
|
|
115
|
-
ws = new WebSocket(ASR_PATH);
|
|
116
|
-
ws.onopen = () => {
|
|
117
|
-
};
|
|
118
88
|
ws.onmessage = (event) => {
|
|
119
89
|
const data = JSON.parse(event.data);
|
|
120
90
|
if (data.type === "session.created") {
|
|
@@ -128,97 +98,59 @@ function createASRClient(config) {
|
|
|
128
98
|
onSpeechEnd?.();
|
|
129
99
|
}
|
|
130
100
|
if (data.type === "conversation.item.input_audio_transcription.text") {
|
|
131
|
-
onTranscript?.(data.text || "", false);
|
|
101
|
+
onTranscript?.(data.text || data.stash || data.transcript || "", false);
|
|
132
102
|
}
|
|
133
103
|
if (data.type === "conversation.item.input_audio_transcription.completed") {
|
|
134
104
|
onTranscript?.(data.text || data.transcript || "", true);
|
|
135
105
|
}
|
|
136
106
|
if (data.type === "error") {
|
|
137
|
-
const err = new Error(data.error?.message || "
|
|
107
|
+
const err = new Error(data.error?.message || "ASR error");
|
|
138
108
|
onError?.(err);
|
|
139
109
|
reject(err);
|
|
140
110
|
}
|
|
141
111
|
};
|
|
142
112
|
ws.onerror = () => {
|
|
143
|
-
const err = new Error("WebSocket
|
|
113
|
+
const err = new Error("WebSocket error");
|
|
144
114
|
onError?.(err);
|
|
145
115
|
reject(err);
|
|
146
116
|
};
|
|
147
117
|
ws.onclose = () => {
|
|
118
|
+
recorder?.stop();
|
|
119
|
+
recorder = null;
|
|
148
120
|
ws = null;
|
|
121
|
+
onClose?.();
|
|
149
122
|
};
|
|
150
123
|
});
|
|
151
124
|
}
|
|
152
125
|
async function startRecording() {
|
|
153
|
-
if (typeof window === "undefined") {
|
|
154
|
-
throw new Error("Recording only supported in browser");
|
|
155
|
-
}
|
|
156
126
|
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
|
157
127
|
throw new Error("WebSocket not connected");
|
|
158
128
|
}
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
echoCancellation: true,
|
|
165
|
-
noiseSuppression: true
|
|
166
|
-
}
|
|
167
|
-
});
|
|
168
|
-
audioContext = new AudioContext({ sampleRate });
|
|
169
|
-
const source = audioContext.createMediaStreamSource(mediaStream);
|
|
170
|
-
processor = audioContext.createScriptProcessor(4096, 1, 1);
|
|
171
|
-
processor.onaudioprocess = (e) => {
|
|
172
|
-
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
|
173
|
-
const inputData = e.inputBuffer.getChannelData(0);
|
|
174
|
-
const inputLen = inputData.length;
|
|
175
|
-
const pcm = new Int16Array(inputLen);
|
|
176
|
-
for (let i = 0; i < inputLen; i++) {
|
|
177
|
-
const s = Math.max(-1, Math.min(1, inputData[i]));
|
|
178
|
-
pcm[i] = s < 0 ? s * 32768 : s * 32767;
|
|
179
|
-
}
|
|
180
|
-
const bytes = new Uint8Array(pcm.buffer);
|
|
181
|
-
const len = bytes.length;
|
|
182
|
-
let binary = "";
|
|
183
|
-
for (let i = 0; i < len; i++) {
|
|
184
|
-
binary += String.fromCharCode(bytes[i]);
|
|
185
|
-
}
|
|
186
|
-
const base64 = btoa(binary);
|
|
187
|
-
ws.send(JSON.stringify({
|
|
129
|
+
recorder = await createRealtimeRecorder();
|
|
130
|
+
await recorder.start((audio) => {
|
|
131
|
+
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
|
132
|
+
ws.send(
|
|
133
|
+
JSON.stringify({
|
|
188
134
|
type: "input_audio_buffer.append",
|
|
189
|
-
audio
|
|
190
|
-
})
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
processor.connect(audioContext.destination);
|
|
194
|
-
} catch (err) {
|
|
195
|
-
onError?.(err);
|
|
196
|
-
throw err;
|
|
197
|
-
}
|
|
135
|
+
audio
|
|
136
|
+
})
|
|
137
|
+
);
|
|
138
|
+
});
|
|
198
139
|
}
|
|
199
|
-
function stopRecording() {
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
}
|
|
204
|
-
if (processor) {
|
|
205
|
-
processor.disconnect();
|
|
206
|
-
processor = null;
|
|
207
|
-
}
|
|
208
|
-
if (audioContext) {
|
|
209
|
-
audioContext.close();
|
|
210
|
-
audioContext = null;
|
|
140
|
+
async function stopRecording() {
|
|
141
|
+
try {
|
|
142
|
+
await recorder?.stop();
|
|
143
|
+
} catch (err) {
|
|
211
144
|
}
|
|
145
|
+
recorder = null;
|
|
212
146
|
if (ws && ws.readyState === WebSocket.OPEN) {
|
|
213
147
|
ws.send(JSON.stringify({ type: "input_audio_buffer.commit" }));
|
|
214
148
|
}
|
|
215
149
|
}
|
|
216
150
|
function close() {
|
|
217
151
|
stopRecording();
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
ws = null;
|
|
221
|
-
}
|
|
152
|
+
ws?.close();
|
|
153
|
+
ws = null;
|
|
222
154
|
}
|
|
223
155
|
return {
|
|
224
156
|
connect,
|
|
@@ -230,7 +162,6 @@ function createASRClient(config) {
|
|
|
230
162
|
// Annotate the CommonJS export names for ESM import in node:
|
|
231
163
|
0 && (module.exports = {
|
|
232
164
|
createASRClient,
|
|
233
|
-
createAsrClient
|
|
234
|
-
listen
|
|
165
|
+
createAsrClient
|
|
235
166
|
});
|
|
236
167
|
//# sourceMappingURL=index.cjs.map
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts","../src/asr-client.ts"],"sourcesContent":["export { listen } from './asr-client';\nexport type { ASRClient, ASRClientConfig } from './asr-client';\nexport { createASRClient, createASRClient as createAsrClient } from './asr-client';\n","/**\r\n * ASR Realtime WebSocket Client\r\n */\r\n\r\nconst ASR_PATH = '/api/proxy/builtin/platform/qwen-asr-realtime/api-ws/v1/realtime';\r\n\r\n/**\r\n * Simple ASR: start listening and get transcript\r\n * @returns stop function\r\n * @example\r\n * const stop = await listen((text, isFinal) => console.log(text))\r\n * // later: stop()\r\n */\r\nexport async function listen(\r\n onTranscript: (text: string, isFinal: boolean) => void\r\n): Promise<() => void> {\r\n const ws = new WebSocket(ASR_PATH);\r\n let mediaStream: MediaStream | null = null;\r\n let audioContext: AudioContext | null = null;\r\n let processor: ScriptProcessorNode | null = null;\r\n\r\n const stop = () => {\r\n if (mediaStream) { mediaStream.getTracks().forEach(t => t.stop()); mediaStream = null; }\r\n if (processor) { processor.disconnect(); processor = null; }\r\n if (audioContext) { audioContext.close(); audioContext = null; }\r\n if (ws.readyState === WebSocket.OPEN) {\r\n ws.send(JSON.stringify({ type: 'input_audio_buffer.commit' }));\r\n ws.close();\r\n }\r\n };\r\n\r\n return new Promise((resolve, reject) => {\r\n ws.onmessage = async (event) => {\r\n const data = JSON.parse(event.data);\r\n\r\n if (data.type === 'session.created') {\r\n // Start recording\r\n try {\r\n mediaStream = await navigator.mediaDevices.getUserMedia({\r\n audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true }\r\n });\r\n audioContext = new AudioContext({ sampleRate: 16000 });\r\n const source = audioContext.createMediaStreamSource(mediaStream);\r\n processor = audioContext.createScriptProcessor(4096, 1, 1);\r\n\r\n processor.onaudioprocess = (e) => {\r\n if (ws.readyState !== WebSocket.OPEN) return;\r\n const input = e.inputBuffer.getChannelData(0);\r\n const pcm = new Int16Array(input.length);\r\n for (let i = 0; i < input.length; i++) {\r\n const s = Math.max(-1, Math.min(1, input[i]!));\r\n pcm[i] = s < 0 ? s * 32768 : s * 32767;\r\n }\r\n const bytes = new Uint8Array(pcm.buffer);\r\n let binary = '';\r\n for (let i = 0; i < bytes.length; i++) binary += String.fromCharCode(bytes[i]!);\r\n ws.send(JSON.stringify({ type: 'input_audio_buffer.append', audio: btoa(binary) }));\r\n };\r\n\r\n source.connect(processor);\r\n processor.connect(audioContext.destination);\r\n resolve(stop);\r\n } catch (err) {\r\n reject(err);\r\n }\r\n }\r\n\r\n if (data.type === 'conversation.item.input_audio_transcription.text') {\r\n onTranscript(data.text || '', false);\r\n }\r\n\r\n if (data.type === 'conversation.item.input_audio_transcription.completed') {\r\n onTranscript(data.text || data.transcript || '', true);\r\n }\r\n\r\n if (data.type === 'error') {\r\n stop();\r\n reject(new Error(data.error?.message || 'ASR error'));\r\n }\r\n };\r\n\r\n ws.onerror = () => reject(new Error('ASR connection failed'));\r\n });\r\n}\r\n\r\nexport interface ASRClientConfig {\r\n /** Audio format, default 'pcm16' */\r\n audioFormat?: 'pcm16' | 'g711a' | 'g711u';\r\n /** Sample rate, default 16000 */\r\n sampleRate?: number;\r\n /** Called when connection is ready */\r\n onReady?: () => void;\r\n /** Called when speech is detected */\r\n onSpeechStart?: () => void;\r\n /** Called when speech stops */\r\n onSpeechEnd?: () => void;\r\n /** Called on transcript result */\r\n onTranscript?: (text: string, isFinal: boolean) => void;\r\n /** Called on error */\r\n onError?: (error: Error) => void;\r\n}\r\n\r\nexport interface ASRClient {\r\n /** Connect to ASR service */\r\n connect(): Promise<void>;\r\n /** Start recording from microphone */\r\n startRecording(): Promise<void>;\r\n /** Stop recording */\r\n stopRecording(): void;\r\n /** Close connection */\r\n close(): void;\r\n}\r\n\r\nexport function createASRClient(config: ASRClientConfig): ASRClient {\r\n const {\r\n // audioFormat = 'pcm16',\r\n sampleRate = 16000,\r\n onReady,\r\n onSpeechStart,\r\n onSpeechEnd,\r\n onTranscript,\r\n onError,\r\n } = config;\r\n\r\n let ws: WebSocket | null = null;\r\n let mediaStream: MediaStream | null = null;\r\n let audioContext: AudioContext | null = null;\r\n let processor: ScriptProcessorNode | null = null;\r\n\r\n async function connect(): Promise<void> {\r\n return new Promise((resolve, reject) => {\r\n ws = new WebSocket(ASR_PATH);\r\n\r\n ws.onopen = () => {};\r\n\r\n ws.onmessage = (event) => {\r\n const data = JSON.parse(event.data);\r\n\r\n if (data.type === 'session.created') {\r\n onReady?.();\r\n resolve();\r\n }\r\n\r\n if (data.type === 'input_audio_buffer.speech_started') {\r\n onSpeechStart?.();\r\n }\r\n\r\n if (data.type === 'input_audio_buffer.speech_stopped') {\r\n onSpeechEnd?.();\r\n }\r\n\r\n if (data.type === 'conversation.item.input_audio_transcription.text') {\r\n onTranscript?.(data.text || '', false);\r\n }\r\n\r\n if (data.type === 'conversation.item.input_audio_transcription.completed') {\r\n onTranscript?.(data.text || data.transcript || '', true);\r\n }\r\n\r\n if (data.type === 'error') {\r\n const err = new Error(data.error?.message || 'Unknown error');\r\n onError?.(err);\r\n reject(err);\r\n }\r\n };\r\n\r\n ws.onerror = () => {\r\n const err = new Error('WebSocket connection error');\r\n onError?.(err);\r\n reject(err);\r\n };\r\n\r\n ws.onclose = () => {\r\n ws = null;\r\n };\r\n });\r\n }\r\n\r\n async function startRecording(): Promise<void> {\r\n if (typeof window === 'undefined') {\r\n throw new Error('Recording only supported in browser');\r\n }\r\n\r\n if (!ws || ws.readyState !== WebSocket.OPEN) {\r\n throw new Error('WebSocket not connected');\r\n }\r\n\r\n try {\r\n mediaStream = await navigator.mediaDevices.getUserMedia({\r\n audio: {\r\n sampleRate,\r\n channelCount: 1,\r\n echoCancellation: true,\r\n noiseSuppression: true,\r\n },\r\n });\r\n\r\n audioContext = new AudioContext({ sampleRate });\r\n const source = audioContext.createMediaStreamSource(mediaStream);\r\n processor = audioContext.createScriptProcessor(4096, 1, 1);\r\n\r\n processor.onaudioprocess = (e) => {\r\n if (!ws || ws.readyState !== WebSocket.OPEN) return;\r\n\r\n const inputData = e.inputBuffer.getChannelData(0);\r\n const inputLen = inputData.length;\r\n\r\n const pcm = new Int16Array(inputLen);\r\n for (let i = 0; i < inputLen; i++) {\r\n const s = Math.max(-1, Math.min(1, inputData[i]!));\r\n pcm[i] = s < 0 ? s * 32768 : s * 32767;\r\n }\r\n\r\n const bytes = new Uint8Array(pcm.buffer);\r\n const len = bytes.length;\r\n let binary = '';\r\n for (let i = 0; i < len; i++) {\r\n binary += String.fromCharCode(bytes[i]!);\r\n }\r\n const base64 = btoa(binary);\r\n\r\n ws.send(JSON.stringify({\r\n type: 'input_audio_buffer.append',\r\n audio: base64,\r\n }));\r\n };\r\n\r\n source.connect(processor);\r\n processor.connect(audioContext.destination);\r\n } catch (err) {\r\n onError?.(err as Error);\r\n throw err;\r\n }\r\n }\r\n\r\n function stopRecording() {\r\n if (mediaStream) {\r\n mediaStream.getTracks().forEach(track => track.stop());\r\n mediaStream = null;\r\n }\r\n\r\n if (processor) {\r\n processor.disconnect();\r\n processor = null;\r\n }\r\n\r\n if (audioContext) {\r\n audioContext.close();\r\n audioContext = null;\r\n }\r\n\r\n if (ws && ws.readyState === WebSocket.OPEN) {\r\n ws.send(JSON.stringify({ type: 'input_audio_buffer.commit' }));\r\n }\r\n }\r\n\r\n function close() {\r\n stopRecording();\r\n if (ws) {\r\n ws.close();\r\n ws = null;\r\n }\r\n }\r\n\r\n return {\r\n connect,\r\n startRecording,\r\n stopRecording,\r\n close,\r\n };\r\n}\r\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACIA,IAAM,WAAW;AASjB,eAAsB,OACpB,cACqB;AACrB,QAAM,KAAK,IAAI,UAAU,QAAQ;AACjC,MAAI,cAAkC;AACtC,MAAI,eAAoC;AACxC,MAAI,YAAwC;AAE5C,QAAM,OAAO,MAAM;AACjB,QAAI,aAAa;AAAE,kBAAY,UAAU,EAAE,QAAQ,OAAK,EAAE,KAAK,CAAC;AAAG,oBAAc;AAAA,IAAM;AACvF,QAAI,WAAW;AAAE,gBAAU,WAAW;AAAG,kBAAY;AAAA,IAAM;AAC3D,QAAI,cAAc;AAAE,mBAAa,MAAM;AAAG,qBAAe;AAAA,IAAM;AAC/D,QAAI,GAAG,eAAe,UAAU,MAAM;AACpC,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,4BAA4B,CAAC,CAAC;AAC7D,SAAG,MAAM;AAAA,IACX;AAAA,EACF;AAEA,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,OAAG,YAAY,OAAO,UAAU;AAC9B,YAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,UAAI,KAAK,SAAS,mBAAmB;AAEnC,YAAI;AACF,wBAAc,MAAM,UAAU,aAAa,aAAa;AAAA,YACtD,OAAO,EAAE,YAAY,MAAO,cAAc,GAAG,kBAAkB,KAAK;AAAA,UACtE,CAAC;AACD,yBAAe,IAAI,aAAa,EAAE,YAAY,KAAM,CAAC;AACrD,gBAAM,SAAS,aAAa,wBAAwB,WAAW;AAC/D,sBAAY,aAAa,sBAAsB,MAAM,GAAG,CAAC;AAEzD,oBAAU,iBAAiB,CAAC,MAAM;AAChC,gBAAI,GAAG,eAAe,UAAU,KAAM;AACtC,kBAAM,QAAQ,EAAE,YAAY,eAAe,CAAC;AAC5C,kBAAM,MAAM,IAAI,WAAW,MAAM,MAAM;AACvC,qBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,oBAAM,IAAI,KAAK,IAAI,IAAI,KAAK,IAAI,GAAG,MAAM,CAAC,CAAE,CAAC;AAC7C,kBAAI,CAAC,IAAI,IAAI,IAAI,IAAI,QAAQ,IAAI;AAAA,YACnC;AACA,kBAAM,QAAQ,IAAI,WAAW,IAAI,MAAM;AACvC,gBAAI,SAAS;AACb,qBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,IAAK,WAAU,OAAO,aAAa,MAAM,CAAC,CAAE;AAC9E,eAAG,KAAK,KAAK,UAAU,EAAE,MAAM,6BAA6B,OAAO,KAAK,MAAM,EAAE,CAAC,CAAC;AAAA,UACpF;AAEA,iBAAO,QAAQ,SAAS;AACxB,oBAAU,QAAQ,aAAa,WAAW;AAC1C,kBAAQ,IAAI;AAAA,QACd,SAAS,KAAK;AACZ,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,UAAI,KAAK,SAAS,oDAAoD;AACpE,qBAAa,KAAK,QAAQ,IAAI,KAAK;AAAA,MACrC;AAEA,UAAI,KAAK,SAAS,yDAAyD;AACzE,qBAAa,KAAK,QAAQ,KAAK,cAAc,IAAI,IAAI;AAAA,MACvD;AAEA,UAAI,KAAK,SAAS,SAAS;AACzB,aAAK;AACL,eAAO,IAAI,MAAM,KAAK,OAAO,WAAW,WAAW,CAAC;AAAA,MACtD;AAAA,IACF;AAEA,OAAG,UAAU,MAAM,OAAO,IAAI,MAAM,uBAAuB,CAAC;AAAA,EAC9D,CAAC;AACH;AA8BO,SAAS,gBAAgB,QAAoC;AAClE,QAAM;AAAA;AAAA,IAEJ,aAAa;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,IAAI;AAEJ,MAAI,KAAuB;AAC3B,MAAI,cAAkC;AACtC,MAAI,eAAoC;AACxC,MAAI,YAAwC;AAE5C,iBAAe,UAAyB;AACtC,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,WAAK,IAAI,UAAU,QAAQ;AAE3B,SAAG,SAAS,MAAM;AAAA,MAAC;AAEnB,SAAG,YAAY,CAAC,UAAU;AACxB,cAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,YAAI,KAAK,SAAS,mBAAmB;AACnC,oBAAU;AACV,kBAAQ;AAAA,QACV;AAEA,YAAI,KAAK,SAAS,qCAAqC;AACrD,0BAAgB;AAAA,QAClB;AAEA,YAAI,KAAK,SAAS,qCAAqC;AACrD,wBAAc;AAAA,QAChB;AAEA,YAAI,KAAK,SAAS,oDAAoD;AACpE,yBAAe,KAAK,QAAQ,IAAI,KAAK;AAAA,QACvC;AAEA,YAAI,KAAK,SAAS,yDAAyD;AACzE,yBAAe,KAAK,QAAQ,KAAK,cAAc,IAAI,IAAI;AAAA,QACzD;AAEA,YAAI,KAAK,SAAS,SAAS;AACzB,gBAAM,MAAM,IAAI,MAAM,KAAK,OAAO,WAAW,eAAe;AAC5D,oBAAU,GAAG;AACb,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,SAAG,UAAU,MAAM;AACjB,cAAM,MAAM,IAAI,MAAM,4BAA4B;AAClD,kBAAU,GAAG;AACb,eAAO,GAAG;AAAA,MACZ;AAEA,SAAG,UAAU,MAAM;AACjB,aAAK;AAAA,MACP;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,iBAAgC;AAC7C,QAAI,OAAO,WAAW,aAAa;AACjC,YAAM,IAAI,MAAM,qCAAqC;AAAA,IACvD;AAEA,QAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC3C;AAEA,QAAI;AACF,oBAAc,MAAM,UAAU,aAAa,aAAa;AAAA,QACtD,OAAO;AAAA,UACL;AAAA,UACA,cAAc;AAAA,UACd,kBAAkB;AAAA,UAClB,kBAAkB;AAAA,QACpB;AAAA,MACF,CAAC;AAED,qBAAe,IAAI,aAAa,EAAE,WAAW,CAAC;AAC9C,YAAM,SAAS,aAAa,wBAAwB,WAAW;AAC/D,kBAAY,aAAa,sBAAsB,MAAM,GAAG,CAAC;AAEzD,gBAAU,iBAAiB,CAAC,MAAM;AAChC,YAAI,CAAC,MAAM,GAAG,eAAe,UAAU,KAAM;AAE7C,cAAM,YAAY,EAAE,YAAY,eAAe,CAAC;AAChD,cAAM,WAAW,UAAU;AAE3B,cAAM,MAAM,IAAI,WAAW,QAAQ;AACnC,iBAAS,IAAI,GAAG,IAAI,UAAU,KAAK;AACjC,gBAAM,IAAI,KAAK,IAAI,IAAI,KAAK,IAAI,GAAG,UAAU,CAAC,CAAE,CAAC;AACjD,cAAI,CAAC,IAAI,IAAI,IAAI,IAAI,QAAQ,IAAI;AAAA,QACnC;AAEA,cAAM,QAAQ,IAAI,WAAW,IAAI,MAAM;AACvC,cAAM,MAAM,MAAM;AAClB,YAAI,SAAS;AACb,iBAAS,IAAI,GAAG,IAAI,KAAK,KAAK;AAC5B,oBAAU,OAAO,aAAa,MAAM,CAAC,CAAE;AAAA,QACzC;AACA,cAAM,SAAS,KAAK,MAAM;AAE1B,WAAG,KAAK,KAAK,UAAU;AAAA,UACrB,MAAM;AAAA,UACN,OAAO;AAAA,QACT,CAAC,CAAC;AAAA,MACJ;AAEA,aAAO,QAAQ,SAAS;AACxB,gBAAU,QAAQ,aAAa,WAAW;AAAA,IAC5C,SAAS,KAAK;AACZ,gBAAU,GAAY;AACtB,YAAM;AAAA,IACR;AAAA,EACF;AAEA,WAAS,gBAAgB;AACvB,QAAI,aAAa;AACf,kBAAY,UAAU,EAAE,QAAQ,WAAS,MAAM,KAAK,CAAC;AACrD,oBAAc;AAAA,IAChB;AAEA,QAAI,WAAW;AACb,gBAAU,WAAW;AACrB,kBAAY;AAAA,IACd;AAEA,QAAI,cAAc;AAChB,mBAAa,MAAM;AACnB,qBAAe;AAAA,IACjB;AAEA,QAAI,MAAM,GAAG,eAAe,UAAU,MAAM;AAC1C,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,4BAA4B,CAAC,CAAC;AAAA,IAC/D;AAAA,EACF;AAEA,WAAS,QAAQ;AACf,kBAAc;AACd,QAAI,IAAI;AACN,SAAG,MAAM;AACT,WAAK;AAAA,IACP;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/asr-client.ts"],"sourcesContent":["export * from './asr-client';\nexport { createASRClient as createAsrClient } from './asr-client';\n","/**\n * ASR Realtime WebSocket Client\n */\n\nconst ASR_PATH = \"/api/proxy/builtin/platform/qwen-asr-realtime/api-ws/v1/realtime\";\n\ninterface RealtimeRecorder {\n start(onAudio: (base64: string) => void): Promise<void>;\n stop(): Promise<void>;\n}\n\nasync function createRealtimeRecorder(): Promise<RealtimeRecorder> {\n let stream: MediaStream;\n let ctx: AudioContext;\n let source: MediaStreamAudioSourceNode;\n let processor: ScriptProcessorNode;\n\n return {\n async start(onAudio) {\n stream = await navigator.mediaDevices.getUserMedia({\n audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true },\n });\n\n log(\"✅ 麦克风已启动\", \"success\");\n log(\"💬 请对着麦克风说话,实时识别中...\", \"success\");\n\n ctx = new AudioContext({ sampleRate: 16000 });\n source = ctx.createMediaStreamSource(stream);\n processor = ctx.createScriptProcessor(4096, 1, 1);\n\n processor.onaudioprocess = (e) => {\n const inputData = e.inputBuffer.getChannelData(0);\n const pcm = new Int16Array(inputData.length);\n for (let i = 0; i < inputData.length; i++) {\n const s = Math.max(-1, Math.min(1, inputData[i] || 0));\n pcm[i] = s < 0 ? s * 32768 : s * 32767;\n }\n\n const bytes = new Uint8Array(pcm.buffer);\n let binary = \"\";\n for (let i = 0; i < bytes.length; i++) {\n binary += String.fromCharCode(bytes[i] || 0);\n }\n\n onAudio(btoa(binary));\n };\n\n source.connect(processor);\n processor.connect(ctx.destination);\n },\n\n async stop() {\n stream?.getTracks().forEach((t) => t.stop());\n source?.disconnect();\n processor?.disconnect();\n await ctx?.close();\n },\n };\n}\n\nconst log = (message: string, type = \"\") => {\n console.log(`[${type}]`, message);\n};\n\nexport interface ASRClientConfig {\n /** Audio format, default 'pcm16' */\n audioFormat?: \"pcm16\" | \"g711a\" | \"g711u\";\n /** Sample rate, default 16000 */\n sampleRate?: number;\n /** Called when connection is ready */\n onReady?: () => void;\n /** Called when speech is detected */\n onSpeechStart?: () => void;\n /** Called when speech stops */\n onSpeechEnd?: () => void;\n /** Called on transcript result */\n onTranscript?: (text: string, isFinal: boolean) => void;\n /** Called on error */\n onError?: (error: Error) => void;\n /** Called on close */\n onClose?: () => void;\n /** WebSocket path, default ASR_PATH */\n path?: string;\n}\n\nexport interface ASRClient {\n /** Connect to ASR service */\n connect(): Promise<void>;\n /** Start recording from microphone */\n startRecording(): Promise<void>;\n /** Stop recording */\n stopRecording(): void;\n /** Close connection */\n close(): void;\n}\n\nexport function createASRClient(config: ASRClientConfig): ASRClient {\n const {\n onReady,\n onSpeechStart,\n onSpeechEnd,\n onTranscript,\n onError,\n onClose,\n path = ASR_PATH,\n } = config;\n\n let ws: WebSocket | null = null;\n let recorder: RealtimeRecorder | null = null;\n\n async function connect(): Promise<void> {\n ws = new WebSocket(path);\n\n return new Promise((resolve, reject) => {\n ws!.onmessage = (event) => {\n const data = JSON.parse(event.data);\n\n if (data.type === \"session.created\") {\n onReady?.();\n resolve();\n }\n\n if (data.type === \"input_audio_buffer.speech_started\") {\n onSpeechStart?.();\n }\n\n if (data.type === \"input_audio_buffer.speech_stopped\") {\n onSpeechEnd?.();\n }\n\n if (data.type === \"conversation.item.input_audio_transcription.text\") {\n onTranscript?.(data.text || data.stash || data.transcript || \"\", false);\n }\n\n if (data.type === \"conversation.item.input_audio_transcription.completed\") {\n onTranscript?.(data.text || data.transcript || \"\", true);\n }\n\n if (data.type === \"error\") {\n const err = new Error(data.error?.message || \"ASR error\");\n onError?.(err);\n reject(err);\n }\n };\n\n ws!.onerror = () => {\n const err = new Error(\"WebSocket error\");\n onError?.(err);\n reject(err);\n };\n\n ws!.onclose = () => {\n recorder?.stop();\n recorder = null;\n ws = null;\n onClose?.();\n };\n });\n }\n\n async function startRecording(): Promise<void> {\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n throw new Error(\"WebSocket not connected\");\n }\n\n recorder = await createRealtimeRecorder();\n await recorder.start((audio) => {\n if (!ws || ws.readyState !== WebSocket.OPEN) return;\n ws.send(\n JSON.stringify({\n type: \"input_audio_buffer.append\",\n audio,\n })\n );\n });\n }\n\n async function stopRecording() {\n try {\n await recorder?.stop();\n } catch (err) {}\n recorder = null;\n\n if (ws && ws.readyState === WebSocket.OPEN) {\n ws.send(JSON.stringify({ type: \"input_audio_buffer.commit\" }));\n }\n }\n\n function close() {\n stopRecording();\n ws?.close();\n ws = null;\n }\n\n return {\n connect,\n startRecording,\n stopRecording,\n close,\n };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACIA,IAAM,WAAW;AAOjB,eAAe,yBAAoD;AACjE,MAAI;AACJ,MAAI;AACJ,MAAI;AACJ,MAAI;AAEJ,SAAO;AAAA,IACL,MAAM,MAAM,SAAS;AACnB,eAAS,MAAM,UAAU,aAAa,aAAa;AAAA,QACjD,OAAO,EAAE,YAAY,MAAO,cAAc,GAAG,kBAAkB,KAAK;AAAA,MACtE,CAAC;AAED,UAAI,+CAAY,SAAS;AACzB,UAAI,qGAAwB,SAAS;AAErC,YAAM,IAAI,aAAa,EAAE,YAAY,KAAM,CAAC;AAC5C,eAAS,IAAI,wBAAwB,MAAM;AAC3C,kBAAY,IAAI,sBAAsB,MAAM,GAAG,CAAC;AAEhD,gBAAU,iBAAiB,CAAC,MAAM;AAChC,cAAM,YAAY,EAAE,YAAY,eAAe,CAAC;AAChD,cAAM,MAAM,IAAI,WAAW,UAAU,MAAM;AAC3C,iBAAS,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AACzC,gBAAM,IAAI,KAAK,IAAI,IAAI,KAAK,IAAI,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;AACrD,cAAI,CAAC,IAAI,IAAI,IAAI,IAAI,QAAQ,IAAI;AAAA,QACnC;AAEA,cAAM,QAAQ,IAAI,WAAW,IAAI,MAAM;AACvC,YAAI,SAAS;AACb,iBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,oBAAU,OAAO,aAAa,MAAM,CAAC,KAAK,CAAC;AAAA,QAC7C;AAEA,gBAAQ,KAAK,MAAM,CAAC;AAAA,MACtB;AAEA,aAAO,QAAQ,SAAS;AACxB,gBAAU,QAAQ,IAAI,WAAW;AAAA,IACnC;AAAA,IAEA,MAAM,OAAO;AACX,cAAQ,UAAU,EAAE,QAAQ,CAAC,MAAM,EAAE,KAAK,CAAC;AAC3C,cAAQ,WAAW;AACnB,iBAAW,WAAW;AACtB,YAAM,KAAK,MAAM;AAAA,IACnB;AAAA,EACF;AACF;AAEA,IAAM,MAAM,CAAC,SAAiB,OAAO,OAAO;AAC1C,UAAQ,IAAI,IAAI,IAAI,KAAK,OAAO;AAClC;AAkCO,SAAS,gBAAgB,QAAoC;AAClE,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,OAAO;AAAA,EACT,IAAI;AAEJ,MAAI,KAAuB;AAC3B,MAAI,WAAoC;AAExC,iBAAe,UAAyB;AACtC,SAAK,IAAI,UAAU,IAAI;AAEvB,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,SAAI,YAAY,CAAC,UAAU;AACzB,cAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,YAAI,KAAK,SAAS,mBAAmB;AACnC,oBAAU;AACV,kBAAQ;AAAA,QACV;AAEA,YAAI,KAAK,SAAS,qCAAqC;AACrD,0BAAgB;AAAA,QAClB;AAEA,YAAI,KAAK,SAAS,qCAAqC;AACrD,wBAAc;AAAA,QAChB;AAEA,YAAI,KAAK,SAAS,oDAAoD;AACpE,yBAAe,KAAK,QAAQ,KAAK,SAAS,KAAK,cAAc,IAAI,KAAK;AAAA,QACxE;AAEA,YAAI,KAAK,SAAS,yDAAyD;AACzE,yBAAe,KAAK,QAAQ,KAAK,cAAc,IAAI,IAAI;AAAA,QACzD;AAEA,YAAI,KAAK,SAAS,SAAS;AACzB,gBAAM,MAAM,IAAI,MAAM,KAAK,OAAO,WAAW,WAAW;AACxD,oBAAU,GAAG;AACb,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,SAAI,UAAU,MAAM;AAClB,cAAM,MAAM,IAAI,MAAM,iBAAiB;AACvC,kBAAU,GAAG;AACb,eAAO,GAAG;AAAA,MACZ;AAEA,SAAI,UAAU,MAAM;AAClB,kBAAU,KAAK;AACf,mBAAW;AACX,aAAK;AACL,kBAAU;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,iBAAgC;AAC7C,QAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC3C;AAEA,eAAW,MAAM,uBAAuB;AACxC,UAAM,SAAS,MAAM,CAAC,UAAU;AAC9B,UAAI,CAAC,MAAM,GAAG,eAAe,UAAU,KAAM;AAC7C,SAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,MAAM;AAAA,UACN;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,gBAAgB;AAC7B,QAAI;AACF,YAAM,UAAU,KAAK;AAAA,IACvB,SAAS,KAAK;AAAA,IAAC;AACf,eAAW;AAEX,QAAI,MAAM,GAAG,eAAe,UAAU,MAAM;AAC1C,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,4BAA4B,CAAC,CAAC;AAAA,IAC/D;AAAA,EACF;AAEA,WAAS,QAAQ;AACf,kBAAc;AACd,QAAI,MAAM;AACV,SAAK;AAAA,EACP;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;","names":[]}
|
package/dist/index.d.cts
CHANGED
|
@@ -1,17 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* ASR Realtime WebSocket Client
|
|
3
3
|
*/
|
|
4
|
-
/**
|
|
5
|
-
* Simple ASR: start listening and get transcript
|
|
6
|
-
* @returns stop function
|
|
7
|
-
* @example
|
|
8
|
-
* const stop = await listen((text, isFinal) => console.log(text))
|
|
9
|
-
* // later: stop()
|
|
10
|
-
*/
|
|
11
|
-
declare function listen(onTranscript: (text: string, isFinal: boolean) => void): Promise<() => void>;
|
|
12
4
|
interface ASRClientConfig {
|
|
13
5
|
/** Audio format, default 'pcm16' */
|
|
14
|
-
audioFormat?:
|
|
6
|
+
audioFormat?: "pcm16" | "g711a" | "g711u";
|
|
15
7
|
/** Sample rate, default 16000 */
|
|
16
8
|
sampleRate?: number;
|
|
17
9
|
/** Called when connection is ready */
|
|
@@ -24,6 +16,10 @@ interface ASRClientConfig {
|
|
|
24
16
|
onTranscript?: (text: string, isFinal: boolean) => void;
|
|
25
17
|
/** Called on error */
|
|
26
18
|
onError?: (error: Error) => void;
|
|
19
|
+
/** Called on close */
|
|
20
|
+
onClose?: () => void;
|
|
21
|
+
/** WebSocket path, default ASR_PATH */
|
|
22
|
+
path?: string;
|
|
27
23
|
}
|
|
28
24
|
interface ASRClient {
|
|
29
25
|
/** Connect to ASR service */
|
|
@@ -37,4 +33,4 @@ interface ASRClient {
|
|
|
37
33
|
}
|
|
38
34
|
declare function createASRClient(config: ASRClientConfig): ASRClient;
|
|
39
35
|
|
|
40
|
-
export { type ASRClient, type ASRClientConfig, createASRClient, createASRClient as createAsrClient
|
|
36
|
+
export { type ASRClient, type ASRClientConfig, createASRClient, createASRClient as createAsrClient };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,17 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* ASR Realtime WebSocket Client
|
|
3
3
|
*/
|
|
4
|
-
/**
|
|
5
|
-
* Simple ASR: start listening and get transcript
|
|
6
|
-
* @returns stop function
|
|
7
|
-
* @example
|
|
8
|
-
* const stop = await listen((text, isFinal) => console.log(text))
|
|
9
|
-
* // later: stop()
|
|
10
|
-
*/
|
|
11
|
-
declare function listen(onTranscript: (text: string, isFinal: boolean) => void): Promise<() => void>;
|
|
12
4
|
interface ASRClientConfig {
|
|
13
5
|
/** Audio format, default 'pcm16' */
|
|
14
|
-
audioFormat?:
|
|
6
|
+
audioFormat?: "pcm16" | "g711a" | "g711u";
|
|
15
7
|
/** Sample rate, default 16000 */
|
|
16
8
|
sampleRate?: number;
|
|
17
9
|
/** Called when connection is ready */
|
|
@@ -24,6 +16,10 @@ interface ASRClientConfig {
|
|
|
24
16
|
onTranscript?: (text: string, isFinal: boolean) => void;
|
|
25
17
|
/** Called on error */
|
|
26
18
|
onError?: (error: Error) => void;
|
|
19
|
+
/** Called on close */
|
|
20
|
+
onClose?: () => void;
|
|
21
|
+
/** WebSocket path, default ASR_PATH */
|
|
22
|
+
path?: string;
|
|
27
23
|
}
|
|
28
24
|
interface ASRClient {
|
|
29
25
|
/** Connect to ASR service */
|
|
@@ -37,4 +33,4 @@ interface ASRClient {
|
|
|
37
33
|
}
|
|
38
34
|
declare function createASRClient(config: ASRClientConfig): ASRClient;
|
|
39
35
|
|
|
40
|
-
export { type ASRClient, type ASRClientConfig, createASRClient, createASRClient as createAsrClient
|
|
36
|
+
export { type ASRClient, type ASRClientConfig, createASRClient, createASRClient as createAsrClient };
|
package/dist/index.js
CHANGED
|
@@ -1,92 +1,63 @@
|
|
|
1
1
|
// src/asr-client.ts
|
|
2
2
|
var ASR_PATH = "/api/proxy/builtin/platform/qwen-asr-realtime/api-ws/v1/realtime";
|
|
3
|
-
async function
|
|
4
|
-
|
|
5
|
-
let
|
|
6
|
-
let
|
|
7
|
-
let processor
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
3
|
+
async function createRealtimeRecorder() {
|
|
4
|
+
let stream;
|
|
5
|
+
let ctx;
|
|
6
|
+
let source;
|
|
7
|
+
let processor;
|
|
8
|
+
return {
|
|
9
|
+
async start(onAudio) {
|
|
10
|
+
stream = await navigator.mediaDevices.getUserMedia({
|
|
11
|
+
audio: { sampleRate: 16e3, channelCount: 1, echoCancellation: true }
|
|
12
|
+
});
|
|
13
|
+
log("\u2705 \u9EA6\u514B\u98CE\u5DF2\u542F\u52A8", "success");
|
|
14
|
+
log("\u{1F4AC} \u8BF7\u5BF9\u7740\u9EA6\u514B\u98CE\u8BF4\u8BDD\uFF0C\u5B9E\u65F6\u8BC6\u522B\u4E2D...", "success");
|
|
15
|
+
ctx = new AudioContext({ sampleRate: 16e3 });
|
|
16
|
+
source = ctx.createMediaStreamSource(stream);
|
|
17
|
+
processor = ctx.createScriptProcessor(4096, 1, 1);
|
|
18
|
+
processor.onaudioprocess = (e) => {
|
|
19
|
+
const inputData = e.inputBuffer.getChannelData(0);
|
|
20
|
+
const pcm = new Int16Array(inputData.length);
|
|
21
|
+
for (let i = 0; i < inputData.length; i++) {
|
|
22
|
+
const s = Math.max(-1, Math.min(1, inputData[i] || 0));
|
|
23
|
+
pcm[i] = s < 0 ? s * 32768 : s * 32767;
|
|
24
|
+
}
|
|
25
|
+
const bytes = new Uint8Array(pcm.buffer);
|
|
26
|
+
let binary = "";
|
|
27
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
28
|
+
binary += String.fromCharCode(bytes[i] || 0);
|
|
29
|
+
}
|
|
30
|
+
onAudio(btoa(binary));
|
|
31
|
+
};
|
|
32
|
+
source.connect(processor);
|
|
33
|
+
processor.connect(ctx.destination);
|
|
34
|
+
},
|
|
35
|
+
async stop() {
|
|
36
|
+
stream?.getTracks().forEach((t) => t.stop());
|
|
37
|
+
source?.disconnect();
|
|
38
|
+
processor?.disconnect();
|
|
39
|
+
await ctx?.close();
|
|
24
40
|
}
|
|
25
41
|
};
|
|
26
|
-
return new Promise((resolve, reject) => {
|
|
27
|
-
ws.onmessage = async (event) => {
|
|
28
|
-
const data = JSON.parse(event.data);
|
|
29
|
-
if (data.type === "session.created") {
|
|
30
|
-
try {
|
|
31
|
-
mediaStream = await navigator.mediaDevices.getUserMedia({
|
|
32
|
-
audio: { sampleRate: 16e3, channelCount: 1, echoCancellation: true }
|
|
33
|
-
});
|
|
34
|
-
audioContext = new AudioContext({ sampleRate: 16e3 });
|
|
35
|
-
const source = audioContext.createMediaStreamSource(mediaStream);
|
|
36
|
-
processor = audioContext.createScriptProcessor(4096, 1, 1);
|
|
37
|
-
processor.onaudioprocess = (e) => {
|
|
38
|
-
if (ws.readyState !== WebSocket.OPEN) return;
|
|
39
|
-
const input = e.inputBuffer.getChannelData(0);
|
|
40
|
-
const pcm = new Int16Array(input.length);
|
|
41
|
-
for (let i = 0; i < input.length; i++) {
|
|
42
|
-
const s = Math.max(-1, Math.min(1, input[i]));
|
|
43
|
-
pcm[i] = s < 0 ? s * 32768 : s * 32767;
|
|
44
|
-
}
|
|
45
|
-
const bytes = new Uint8Array(pcm.buffer);
|
|
46
|
-
let binary = "";
|
|
47
|
-
for (let i = 0; i < bytes.length; i++) binary += String.fromCharCode(bytes[i]);
|
|
48
|
-
ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: btoa(binary) }));
|
|
49
|
-
};
|
|
50
|
-
source.connect(processor);
|
|
51
|
-
processor.connect(audioContext.destination);
|
|
52
|
-
resolve(stop);
|
|
53
|
-
} catch (err) {
|
|
54
|
-
reject(err);
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
if (data.type === "conversation.item.input_audio_transcription.text") {
|
|
58
|
-
onTranscript(data.text || "", false);
|
|
59
|
-
}
|
|
60
|
-
if (data.type === "conversation.item.input_audio_transcription.completed") {
|
|
61
|
-
onTranscript(data.text || data.transcript || "", true);
|
|
62
|
-
}
|
|
63
|
-
if (data.type === "error") {
|
|
64
|
-
stop();
|
|
65
|
-
reject(new Error(data.error?.message || "ASR error"));
|
|
66
|
-
}
|
|
67
|
-
};
|
|
68
|
-
ws.onerror = () => reject(new Error("ASR connection failed"));
|
|
69
|
-
});
|
|
70
42
|
}
|
|
43
|
+
var log = (message, type = "") => {
|
|
44
|
+
console.log(`[${type}]`, message);
|
|
45
|
+
};
|
|
71
46
|
function createASRClient(config) {
|
|
72
47
|
const {
|
|
73
|
-
// audioFormat = 'pcm16',
|
|
74
|
-
sampleRate = 16e3,
|
|
75
48
|
onReady,
|
|
76
49
|
onSpeechStart,
|
|
77
50
|
onSpeechEnd,
|
|
78
51
|
onTranscript,
|
|
79
|
-
onError
|
|
52
|
+
onError,
|
|
53
|
+
onClose,
|
|
54
|
+
path = ASR_PATH
|
|
80
55
|
} = config;
|
|
81
56
|
let ws = null;
|
|
82
|
-
let
|
|
83
|
-
let audioContext = null;
|
|
84
|
-
let processor = null;
|
|
57
|
+
let recorder = null;
|
|
85
58
|
async function connect() {
|
|
59
|
+
ws = new WebSocket(path);
|
|
86
60
|
return new Promise((resolve, reject) => {
|
|
87
|
-
ws = new WebSocket(ASR_PATH);
|
|
88
|
-
ws.onopen = () => {
|
|
89
|
-
};
|
|
90
61
|
ws.onmessage = (event) => {
|
|
91
62
|
const data = JSON.parse(event.data);
|
|
92
63
|
if (data.type === "session.created") {
|
|
@@ -100,97 +71,59 @@ function createASRClient(config) {
|
|
|
100
71
|
onSpeechEnd?.();
|
|
101
72
|
}
|
|
102
73
|
if (data.type === "conversation.item.input_audio_transcription.text") {
|
|
103
|
-
onTranscript?.(data.text || "", false);
|
|
74
|
+
onTranscript?.(data.text || data.stash || data.transcript || "", false);
|
|
104
75
|
}
|
|
105
76
|
if (data.type === "conversation.item.input_audio_transcription.completed") {
|
|
106
77
|
onTranscript?.(data.text || data.transcript || "", true);
|
|
107
78
|
}
|
|
108
79
|
if (data.type === "error") {
|
|
109
|
-
const err = new Error(data.error?.message || "
|
|
80
|
+
const err = new Error(data.error?.message || "ASR error");
|
|
110
81
|
onError?.(err);
|
|
111
82
|
reject(err);
|
|
112
83
|
}
|
|
113
84
|
};
|
|
114
85
|
ws.onerror = () => {
|
|
115
|
-
const err = new Error("WebSocket
|
|
86
|
+
const err = new Error("WebSocket error");
|
|
116
87
|
onError?.(err);
|
|
117
88
|
reject(err);
|
|
118
89
|
};
|
|
119
90
|
ws.onclose = () => {
|
|
91
|
+
recorder?.stop();
|
|
92
|
+
recorder = null;
|
|
120
93
|
ws = null;
|
|
94
|
+
onClose?.();
|
|
121
95
|
};
|
|
122
96
|
});
|
|
123
97
|
}
|
|
124
98
|
async function startRecording() {
|
|
125
|
-
if (typeof window === "undefined") {
|
|
126
|
-
throw new Error("Recording only supported in browser");
|
|
127
|
-
}
|
|
128
99
|
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
|
129
100
|
throw new Error("WebSocket not connected");
|
|
130
101
|
}
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
echoCancellation: true,
|
|
137
|
-
noiseSuppression: true
|
|
138
|
-
}
|
|
139
|
-
});
|
|
140
|
-
audioContext = new AudioContext({ sampleRate });
|
|
141
|
-
const source = audioContext.createMediaStreamSource(mediaStream);
|
|
142
|
-
processor = audioContext.createScriptProcessor(4096, 1, 1);
|
|
143
|
-
processor.onaudioprocess = (e) => {
|
|
144
|
-
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
|
145
|
-
const inputData = e.inputBuffer.getChannelData(0);
|
|
146
|
-
const inputLen = inputData.length;
|
|
147
|
-
const pcm = new Int16Array(inputLen);
|
|
148
|
-
for (let i = 0; i < inputLen; i++) {
|
|
149
|
-
const s = Math.max(-1, Math.min(1, inputData[i]));
|
|
150
|
-
pcm[i] = s < 0 ? s * 32768 : s * 32767;
|
|
151
|
-
}
|
|
152
|
-
const bytes = new Uint8Array(pcm.buffer);
|
|
153
|
-
const len = bytes.length;
|
|
154
|
-
let binary = "";
|
|
155
|
-
for (let i = 0; i < len; i++) {
|
|
156
|
-
binary += String.fromCharCode(bytes[i]);
|
|
157
|
-
}
|
|
158
|
-
const base64 = btoa(binary);
|
|
159
|
-
ws.send(JSON.stringify({
|
|
102
|
+
recorder = await createRealtimeRecorder();
|
|
103
|
+
await recorder.start((audio) => {
|
|
104
|
+
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
|
105
|
+
ws.send(
|
|
106
|
+
JSON.stringify({
|
|
160
107
|
type: "input_audio_buffer.append",
|
|
161
|
-
audio
|
|
162
|
-
})
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
processor.connect(audioContext.destination);
|
|
166
|
-
} catch (err) {
|
|
167
|
-
onError?.(err);
|
|
168
|
-
throw err;
|
|
169
|
-
}
|
|
108
|
+
audio
|
|
109
|
+
})
|
|
110
|
+
);
|
|
111
|
+
});
|
|
170
112
|
}
|
|
171
|
-
function stopRecording() {
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
}
|
|
176
|
-
if (processor) {
|
|
177
|
-
processor.disconnect();
|
|
178
|
-
processor = null;
|
|
179
|
-
}
|
|
180
|
-
if (audioContext) {
|
|
181
|
-
audioContext.close();
|
|
182
|
-
audioContext = null;
|
|
113
|
+
async function stopRecording() {
|
|
114
|
+
try {
|
|
115
|
+
await recorder?.stop();
|
|
116
|
+
} catch (err) {
|
|
183
117
|
}
|
|
118
|
+
recorder = null;
|
|
184
119
|
if (ws && ws.readyState === WebSocket.OPEN) {
|
|
185
120
|
ws.send(JSON.stringify({ type: "input_audio_buffer.commit" }));
|
|
186
121
|
}
|
|
187
122
|
}
|
|
188
123
|
function close() {
|
|
189
124
|
stopRecording();
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
ws = null;
|
|
193
|
-
}
|
|
125
|
+
ws?.close();
|
|
126
|
+
ws = null;
|
|
194
127
|
}
|
|
195
128
|
return {
|
|
196
129
|
connect,
|
|
@@ -201,7 +134,6 @@ function createASRClient(config) {
|
|
|
201
134
|
}
|
|
202
135
|
export {
|
|
203
136
|
createASRClient,
|
|
204
|
-
createASRClient as createAsrClient
|
|
205
|
-
listen
|
|
137
|
+
createASRClient as createAsrClient
|
|
206
138
|
};
|
|
207
139
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/asr-client.ts"],"sourcesContent":["/**\r\n * ASR Realtime WebSocket Client\r\n */\r\n\r\nconst ASR_PATH = '/api/proxy/builtin/platform/qwen-asr-realtime/api-ws/v1/realtime';\r\n\r\n/**\r\n * Simple ASR: start listening and get transcript\r\n * @returns stop function\r\n * @example\r\n * const stop = await listen((text, isFinal) => console.log(text))\r\n * // later: stop()\r\n */\r\nexport async function listen(\r\n onTranscript: (text: string, isFinal: boolean) => void\r\n): Promise<() => void> {\r\n const ws = new WebSocket(ASR_PATH);\r\n let mediaStream: MediaStream | null = null;\r\n let audioContext: AudioContext | null = null;\r\n let processor: ScriptProcessorNode | null = null;\r\n\r\n const stop = () => {\r\n if (mediaStream) { mediaStream.getTracks().forEach(t => t.stop()); mediaStream = null; }\r\n if (processor) { processor.disconnect(); processor = null; }\r\n if (audioContext) { audioContext.close(); audioContext = null; }\r\n if (ws.readyState === WebSocket.OPEN) {\r\n ws.send(JSON.stringify({ type: 'input_audio_buffer.commit' }));\r\n ws.close();\r\n }\r\n };\r\n\r\n return new Promise((resolve, reject) => {\r\n ws.onmessage = async (event) => {\r\n const data = JSON.parse(event.data);\r\n\r\n if (data.type === 'session.created') {\r\n // Start recording\r\n try {\r\n mediaStream = await navigator.mediaDevices.getUserMedia({\r\n audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true }\r\n });\r\n audioContext = new AudioContext({ sampleRate: 16000 });\r\n const source = audioContext.createMediaStreamSource(mediaStream);\r\n processor = audioContext.createScriptProcessor(4096, 1, 1);\r\n\r\n processor.onaudioprocess = (e) => {\r\n if (ws.readyState !== WebSocket.OPEN) return;\r\n const input = e.inputBuffer.getChannelData(0);\r\n const pcm = new Int16Array(input.length);\r\n for (let i = 0; i < input.length; i++) {\r\n const s = Math.max(-1, Math.min(1, input[i]!));\r\n pcm[i] = s < 0 ? s * 32768 : s * 32767;\r\n }\r\n const bytes = new Uint8Array(pcm.buffer);\r\n let binary = '';\r\n for (let i = 0; i < bytes.length; i++) binary += String.fromCharCode(bytes[i]!);\r\n ws.send(JSON.stringify({ type: 'input_audio_buffer.append', audio: btoa(binary) }));\r\n };\r\n\r\n source.connect(processor);\r\n processor.connect(audioContext.destination);\r\n resolve(stop);\r\n } catch (err) {\r\n reject(err);\r\n }\r\n }\r\n\r\n if (data.type === 'conversation.item.input_audio_transcription.text') {\r\n onTranscript(data.text || '', false);\r\n }\r\n\r\n if (data.type === 'conversation.item.input_audio_transcription.completed') {\r\n onTranscript(data.text || data.transcript || '', true);\r\n }\r\n\r\n if (data.type === 'error') {\r\n stop();\r\n reject(new Error(data.error?.message || 'ASR error'));\r\n }\r\n };\r\n\r\n ws.onerror = () => reject(new Error('ASR connection failed'));\r\n });\r\n}\r\n\r\nexport interface ASRClientConfig {\r\n /** Audio format, default 'pcm16' */\r\n audioFormat?: 'pcm16' | 'g711a' | 'g711u';\r\n /** Sample rate, default 16000 */\r\n sampleRate?: number;\r\n /** Called when connection is ready */\r\n onReady?: () => void;\r\n /** Called when speech is detected */\r\n onSpeechStart?: () => void;\r\n /** Called when speech stops */\r\n onSpeechEnd?: () => void;\r\n /** Called on transcript result */\r\n onTranscript?: (text: string, isFinal: boolean) => void;\r\n /** Called on error */\r\n onError?: (error: Error) => void;\r\n}\r\n\r\nexport interface ASRClient {\r\n /** Connect to ASR service */\r\n connect(): Promise<void>;\r\n /** Start recording from microphone */\r\n startRecording(): Promise<void>;\r\n /** Stop recording */\r\n stopRecording(): void;\r\n /** Close connection */\r\n close(): void;\r\n}\r\n\r\nexport function createASRClient(config: ASRClientConfig): ASRClient {\r\n const {\r\n // audioFormat = 'pcm16',\r\n sampleRate = 16000,\r\n onReady,\r\n onSpeechStart,\r\n onSpeechEnd,\r\n onTranscript,\r\n onError,\r\n } = config;\r\n\r\n let ws: WebSocket | null = null;\r\n let mediaStream: MediaStream | null = null;\r\n let audioContext: AudioContext | null = null;\r\n let processor: ScriptProcessorNode | null = null;\r\n\r\n async function connect(): Promise<void> {\r\n return new Promise((resolve, reject) => {\r\n ws = new WebSocket(ASR_PATH);\r\n\r\n ws.onopen = () => {};\r\n\r\n ws.onmessage = (event) => {\r\n const data = JSON.parse(event.data);\r\n\r\n if (data.type === 'session.created') {\r\n onReady?.();\r\n resolve();\r\n }\r\n\r\n if (data.type === 'input_audio_buffer.speech_started') {\r\n onSpeechStart?.();\r\n }\r\n\r\n if (data.type === 'input_audio_buffer.speech_stopped') {\r\n onSpeechEnd?.();\r\n }\r\n\r\n if (data.type === 'conversation.item.input_audio_transcription.text') {\r\n onTranscript?.(data.text || '', false);\r\n }\r\n\r\n if (data.type === 'conversation.item.input_audio_transcription.completed') {\r\n onTranscript?.(data.text || data.transcript || '', true);\r\n }\r\n\r\n if (data.type === 'error') {\r\n const err = new Error(data.error?.message || 'Unknown error');\r\n onError?.(err);\r\n reject(err);\r\n }\r\n };\r\n\r\n ws.onerror = () => {\r\n const err = new Error('WebSocket connection error');\r\n onError?.(err);\r\n reject(err);\r\n };\r\n\r\n ws.onclose = () => {\r\n ws = null;\r\n };\r\n });\r\n }\r\n\r\n async function startRecording(): Promise<void> {\r\n if (typeof window === 'undefined') {\r\n throw new Error('Recording only supported in browser');\r\n }\r\n\r\n if (!ws || ws.readyState !== WebSocket.OPEN) {\r\n throw new Error('WebSocket not connected');\r\n }\r\n\r\n try {\r\n mediaStream = await navigator.mediaDevices.getUserMedia({\r\n audio: {\r\n sampleRate,\r\n channelCount: 1,\r\n echoCancellation: true,\r\n noiseSuppression: true,\r\n },\r\n });\r\n\r\n audioContext = new AudioContext({ sampleRate });\r\n const source = audioContext.createMediaStreamSource(mediaStream);\r\n processor = audioContext.createScriptProcessor(4096, 1, 1);\r\n\r\n processor.onaudioprocess = (e) => {\r\n if (!ws || ws.readyState !== WebSocket.OPEN) return;\r\n\r\n const inputData = e.inputBuffer.getChannelData(0);\r\n const inputLen = inputData.length;\r\n\r\n const pcm = new Int16Array(inputLen);\r\n for (let i = 0; i < inputLen; i++) {\r\n const s = Math.max(-1, Math.min(1, inputData[i]!));\r\n pcm[i] = s < 0 ? s * 32768 : s * 32767;\r\n }\r\n\r\n const bytes = new Uint8Array(pcm.buffer);\r\n const len = bytes.length;\r\n let binary = '';\r\n for (let i = 0; i < len; i++) {\r\n binary += String.fromCharCode(bytes[i]!);\r\n }\r\n const base64 = btoa(binary);\r\n\r\n ws.send(JSON.stringify({\r\n type: 'input_audio_buffer.append',\r\n audio: base64,\r\n }));\r\n };\r\n\r\n source.connect(processor);\r\n processor.connect(audioContext.destination);\r\n } catch (err) {\r\n onError?.(err as Error);\r\n throw err;\r\n }\r\n }\r\n\r\n function stopRecording() {\r\n if (mediaStream) {\r\n mediaStream.getTracks().forEach(track => track.stop());\r\n mediaStream = null;\r\n }\r\n\r\n if (processor) {\r\n processor.disconnect();\r\n processor = null;\r\n }\r\n\r\n if (audioContext) {\r\n audioContext.close();\r\n audioContext = null;\r\n }\r\n\r\n if (ws && ws.readyState === WebSocket.OPEN) {\r\n ws.send(JSON.stringify({ type: 'input_audio_buffer.commit' }));\r\n }\r\n }\r\n\r\n function close() {\r\n stopRecording();\r\n if (ws) {\r\n ws.close();\r\n ws = null;\r\n }\r\n }\r\n\r\n return {\r\n connect,\r\n startRecording,\r\n stopRecording,\r\n close,\r\n };\r\n}\r\n"],"mappings":";AAIA,IAAM,WAAW;AASjB,eAAsB,OACpB,cACqB;AACrB,QAAM,KAAK,IAAI,UAAU,QAAQ;AACjC,MAAI,cAAkC;AACtC,MAAI,eAAoC;AACxC,MAAI,YAAwC;AAE5C,QAAM,OAAO,MAAM;AACjB,QAAI,aAAa;AAAE,kBAAY,UAAU,EAAE,QAAQ,OAAK,EAAE,KAAK,CAAC;AAAG,oBAAc;AAAA,IAAM;AACvF,QAAI,WAAW;AAAE,gBAAU,WAAW;AAAG,kBAAY;AAAA,IAAM;AAC3D,QAAI,cAAc;AAAE,mBAAa,MAAM;AAAG,qBAAe;AAAA,IAAM;AAC/D,QAAI,GAAG,eAAe,UAAU,MAAM;AACpC,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,4BAA4B,CAAC,CAAC;AAC7D,SAAG,MAAM;AAAA,IACX;AAAA,EACF;AAEA,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,OAAG,YAAY,OAAO,UAAU;AAC9B,YAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,UAAI,KAAK,SAAS,mBAAmB;AAEnC,YAAI;AACF,wBAAc,MAAM,UAAU,aAAa,aAAa;AAAA,YACtD,OAAO,EAAE,YAAY,MAAO,cAAc,GAAG,kBAAkB,KAAK;AAAA,UACtE,CAAC;AACD,yBAAe,IAAI,aAAa,EAAE,YAAY,KAAM,CAAC;AACrD,gBAAM,SAAS,aAAa,wBAAwB,WAAW;AAC/D,sBAAY,aAAa,sBAAsB,MAAM,GAAG,CAAC;AAEzD,oBAAU,iBAAiB,CAAC,MAAM;AAChC,gBAAI,GAAG,eAAe,UAAU,KAAM;AACtC,kBAAM,QAAQ,EAAE,YAAY,eAAe,CAAC;AAC5C,kBAAM,MAAM,IAAI,WAAW,MAAM,MAAM;AACvC,qBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,oBAAM,IAAI,KAAK,IAAI,IAAI,KAAK,IAAI,GAAG,MAAM,CAAC,CAAE,CAAC;AAC7C,kBAAI,CAAC,IAAI,IAAI,IAAI,IAAI,QAAQ,IAAI;AAAA,YACnC;AACA,kBAAM,QAAQ,IAAI,WAAW,IAAI,MAAM;AACvC,gBAAI,SAAS;AACb,qBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,IAAK,WAAU,OAAO,aAAa,MAAM,CAAC,CAAE;AAC9E,eAAG,KAAK,KAAK,UAAU,EAAE,MAAM,6BAA6B,OAAO,KAAK,MAAM,EAAE,CAAC,CAAC;AAAA,UACpF;AAEA,iBAAO,QAAQ,SAAS;AACxB,oBAAU,QAAQ,aAAa,WAAW;AAC1C,kBAAQ,IAAI;AAAA,QACd,SAAS,KAAK;AACZ,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,UAAI,KAAK,SAAS,oDAAoD;AACpE,qBAAa,KAAK,QAAQ,IAAI,KAAK;AAAA,MACrC;AAEA,UAAI,KAAK,SAAS,yDAAyD;AACzE,qBAAa,KAAK,QAAQ,KAAK,cAAc,IAAI,IAAI;AAAA,MACvD;AAEA,UAAI,KAAK,SAAS,SAAS;AACzB,aAAK;AACL,eAAO,IAAI,MAAM,KAAK,OAAO,WAAW,WAAW,CAAC;AAAA,MACtD;AAAA,IACF;AAEA,OAAG,UAAU,MAAM,OAAO,IAAI,MAAM,uBAAuB,CAAC;AAAA,EAC9D,CAAC;AACH;AA8BO,SAAS,gBAAgB,QAAoC;AAClE,QAAM;AAAA;AAAA,IAEJ,aAAa;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,IAAI;AAEJ,MAAI,KAAuB;AAC3B,MAAI,cAAkC;AACtC,MAAI,eAAoC;AACxC,MAAI,YAAwC;AAE5C,iBAAe,UAAyB;AACtC,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,WAAK,IAAI,UAAU,QAAQ;AAE3B,SAAG,SAAS,MAAM;AAAA,MAAC;AAEnB,SAAG,YAAY,CAAC,UAAU;AACxB,cAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,YAAI,KAAK,SAAS,mBAAmB;AACnC,oBAAU;AACV,kBAAQ;AAAA,QACV;AAEA,YAAI,KAAK,SAAS,qCAAqC;AACrD,0BAAgB;AAAA,QAClB;AAEA,YAAI,KAAK,SAAS,qCAAqC;AACrD,wBAAc;AAAA,QAChB;AAEA,YAAI,KAAK,SAAS,oDAAoD;AACpE,yBAAe,KAAK,QAAQ,IAAI,KAAK;AAAA,QACvC;AAEA,YAAI,KAAK,SAAS,yDAAyD;AACzE,yBAAe,KAAK,QAAQ,KAAK,cAAc,IAAI,IAAI;AAAA,QACzD;AAEA,YAAI,KAAK,SAAS,SAAS;AACzB,gBAAM,MAAM,IAAI,MAAM,KAAK,OAAO,WAAW,eAAe;AAC5D,oBAAU,GAAG;AACb,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,SAAG,UAAU,MAAM;AACjB,cAAM,MAAM,IAAI,MAAM,4BAA4B;AAClD,kBAAU,GAAG;AACb,eAAO,GAAG;AAAA,MACZ;AAEA,SAAG,UAAU,MAAM;AACjB,aAAK;AAAA,MACP;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,iBAAgC;AAC7C,QAAI,OAAO,WAAW,aAAa;AACjC,YAAM,IAAI,MAAM,qCAAqC;AAAA,IACvD;AAEA,QAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC3C;AAEA,QAAI;AACF,oBAAc,MAAM,UAAU,aAAa,aAAa;AAAA,QACtD,OAAO;AAAA,UACL;AAAA,UACA,cAAc;AAAA,UACd,kBAAkB;AAAA,UAClB,kBAAkB;AAAA,QACpB;AAAA,MACF,CAAC;AAED,qBAAe,IAAI,aAAa,EAAE,WAAW,CAAC;AAC9C,YAAM,SAAS,aAAa,wBAAwB,WAAW;AAC/D,kBAAY,aAAa,sBAAsB,MAAM,GAAG,CAAC;AAEzD,gBAAU,iBAAiB,CAAC,MAAM;AAChC,YAAI,CAAC,MAAM,GAAG,eAAe,UAAU,KAAM;AAE7C,cAAM,YAAY,EAAE,YAAY,eAAe,CAAC;AAChD,cAAM,WAAW,UAAU;AAE3B,cAAM,MAAM,IAAI,WAAW,QAAQ;AACnC,iBAAS,IAAI,GAAG,IAAI,UAAU,KAAK;AACjC,gBAAM,IAAI,KAAK,IAAI,IAAI,KAAK,IAAI,GAAG,UAAU,CAAC,CAAE,CAAC;AACjD,cAAI,CAAC,IAAI,IAAI,IAAI,IAAI,QAAQ,IAAI;AAAA,QACnC;AAEA,cAAM,QAAQ,IAAI,WAAW,IAAI,MAAM;AACvC,cAAM,MAAM,MAAM;AAClB,YAAI,SAAS;AACb,iBAAS,IAAI,GAAG,IAAI,KAAK,KAAK;AAC5B,oBAAU,OAAO,aAAa,MAAM,CAAC,CAAE;AAAA,QACzC;AACA,cAAM,SAAS,KAAK,MAAM;AAE1B,WAAG,KAAK,KAAK,UAAU;AAAA,UACrB,MAAM;AAAA,UACN,OAAO;AAAA,QACT,CAAC,CAAC;AAAA,MACJ;AAEA,aAAO,QAAQ,SAAS;AACxB,gBAAU,QAAQ,aAAa,WAAW;AAAA,IAC5C,SAAS,KAAK;AACZ,gBAAU,GAAY;AACtB,YAAM;AAAA,IACR;AAAA,EACF;AAEA,WAAS,gBAAgB;AACvB,QAAI,aAAa;AACf,kBAAY,UAAU,EAAE,QAAQ,WAAS,MAAM,KAAK,CAAC;AACrD,oBAAc;AAAA,IAChB;AAEA,QAAI,WAAW;AACb,gBAAU,WAAW;AACrB,kBAAY;AAAA,IACd;AAEA,QAAI,cAAc;AAChB,mBAAa,MAAM;AACnB,qBAAe;AAAA,IACjB;AAEA,QAAI,MAAM,GAAG,eAAe,UAAU,MAAM;AAC1C,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,4BAA4B,CAAC,CAAC;AAAA,IAC/D;AAAA,EACF;AAEA,WAAS,QAAQ;AACf,kBAAc;AACd,QAAI,IAAI;AACN,SAAG,MAAM;AACT,WAAK;AAAA,IACP;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/asr-client.ts"],"sourcesContent":["/**\n * ASR Realtime WebSocket Client\n */\n\nconst ASR_PATH = \"/api/proxy/builtin/platform/qwen-asr-realtime/api-ws/v1/realtime\";\n\ninterface RealtimeRecorder {\n start(onAudio: (base64: string) => void): Promise<void>;\n stop(): Promise<void>;\n}\n\nasync function createRealtimeRecorder(): Promise<RealtimeRecorder> {\n let stream: MediaStream;\n let ctx: AudioContext;\n let source: MediaStreamAudioSourceNode;\n let processor: ScriptProcessorNode;\n\n return {\n async start(onAudio) {\n stream = await navigator.mediaDevices.getUserMedia({\n audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true },\n });\n\n log(\"✅ 麦克风已启动\", \"success\");\n log(\"💬 请对着麦克风说话,实时识别中...\", \"success\");\n\n ctx = new AudioContext({ sampleRate: 16000 });\n source = ctx.createMediaStreamSource(stream);\n processor = ctx.createScriptProcessor(4096, 1, 1);\n\n processor.onaudioprocess = (e) => {\n const inputData = e.inputBuffer.getChannelData(0);\n const pcm = new Int16Array(inputData.length);\n for (let i = 0; i < inputData.length; i++) {\n const s = Math.max(-1, Math.min(1, inputData[i] || 0));\n pcm[i] = s < 0 ? s * 32768 : s * 32767;\n }\n\n const bytes = new Uint8Array(pcm.buffer);\n let binary = \"\";\n for (let i = 0; i < bytes.length; i++) {\n binary += String.fromCharCode(bytes[i] || 0);\n }\n\n onAudio(btoa(binary));\n };\n\n source.connect(processor);\n processor.connect(ctx.destination);\n },\n\n async stop() {\n stream?.getTracks().forEach((t) => t.stop());\n source?.disconnect();\n processor?.disconnect();\n await ctx?.close();\n },\n };\n}\n\nconst log = (message: string, type = \"\") => {\n console.log(`[${type}]`, message);\n};\n\nexport interface ASRClientConfig {\n /** Audio format, default 'pcm16' */\n audioFormat?: \"pcm16\" | \"g711a\" | \"g711u\";\n /** Sample rate, default 16000 */\n sampleRate?: number;\n /** Called when connection is ready */\n onReady?: () => void;\n /** Called when speech is detected */\n onSpeechStart?: () => void;\n /** Called when speech stops */\n onSpeechEnd?: () => void;\n /** Called on transcript result */\n onTranscript?: (text: string, isFinal: boolean) => void;\n /** Called on error */\n onError?: (error: Error) => void;\n /** Called on close */\n onClose?: () => void;\n /** WebSocket path, default ASR_PATH */\n path?: string;\n}\n\nexport interface ASRClient {\n /** Connect to ASR service */\n connect(): Promise<void>;\n /** Start recording from microphone */\n startRecording(): Promise<void>;\n /** Stop recording */\n stopRecording(): void;\n /** Close connection */\n close(): void;\n}\n\nexport function createASRClient(config: ASRClientConfig): ASRClient {\n const {\n onReady,\n onSpeechStart,\n onSpeechEnd,\n onTranscript,\n onError,\n onClose,\n path = ASR_PATH,\n } = config;\n\n let ws: WebSocket | null = null;\n let recorder: RealtimeRecorder | null = null;\n\n async function connect(): Promise<void> {\n ws = new WebSocket(path);\n\n return new Promise((resolve, reject) => {\n ws!.onmessage = (event) => {\n const data = JSON.parse(event.data);\n\n if (data.type === \"session.created\") {\n onReady?.();\n resolve();\n }\n\n if (data.type === \"input_audio_buffer.speech_started\") {\n onSpeechStart?.();\n }\n\n if (data.type === \"input_audio_buffer.speech_stopped\") {\n onSpeechEnd?.();\n }\n\n if (data.type === \"conversation.item.input_audio_transcription.text\") {\n onTranscript?.(data.text || data.stash || data.transcript || \"\", false);\n }\n\n if (data.type === \"conversation.item.input_audio_transcription.completed\") {\n onTranscript?.(data.text || data.transcript || \"\", true);\n }\n\n if (data.type === \"error\") {\n const err = new Error(data.error?.message || \"ASR error\");\n onError?.(err);\n reject(err);\n }\n };\n\n ws!.onerror = () => {\n const err = new Error(\"WebSocket error\");\n onError?.(err);\n reject(err);\n };\n\n ws!.onclose = () => {\n recorder?.stop();\n recorder = null;\n ws = null;\n onClose?.();\n };\n });\n }\n\n async function startRecording(): Promise<void> {\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n throw new Error(\"WebSocket not connected\");\n }\n\n recorder = await createRealtimeRecorder();\n await recorder.start((audio) => {\n if (!ws || ws.readyState !== WebSocket.OPEN) return;\n ws.send(\n JSON.stringify({\n type: \"input_audio_buffer.append\",\n audio,\n })\n );\n });\n }\n\n async function stopRecording() {\n try {\n await recorder?.stop();\n } catch (err) {}\n recorder = null;\n\n if (ws && ws.readyState === WebSocket.OPEN) {\n ws.send(JSON.stringify({ type: \"input_audio_buffer.commit\" }));\n }\n }\n\n function close() {\n stopRecording();\n ws?.close();\n ws = null;\n }\n\n return {\n connect,\n startRecording,\n stopRecording,\n close,\n };\n}\n"],"mappings":";AAIA,IAAM,WAAW;AAOjB,eAAe,yBAAoD;AACjE,MAAI;AACJ,MAAI;AACJ,MAAI;AACJ,MAAI;AAEJ,SAAO;AAAA,IACL,MAAM,MAAM,SAAS;AACnB,eAAS,MAAM,UAAU,aAAa,aAAa;AAAA,QACjD,OAAO,EAAE,YAAY,MAAO,cAAc,GAAG,kBAAkB,KAAK;AAAA,MACtE,CAAC;AAED,UAAI,+CAAY,SAAS;AACzB,UAAI,qGAAwB,SAAS;AAErC,YAAM,IAAI,aAAa,EAAE,YAAY,KAAM,CAAC;AAC5C,eAAS,IAAI,wBAAwB,MAAM;AAC3C,kBAAY,IAAI,sBAAsB,MAAM,GAAG,CAAC;AAEhD,gBAAU,iBAAiB,CAAC,MAAM;AAChC,cAAM,YAAY,EAAE,YAAY,eAAe,CAAC;AAChD,cAAM,MAAM,IAAI,WAAW,UAAU,MAAM;AAC3C,iBAAS,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AACzC,gBAAM,IAAI,KAAK,IAAI,IAAI,KAAK,IAAI,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;AACrD,cAAI,CAAC,IAAI,IAAI,IAAI,IAAI,QAAQ,IAAI;AAAA,QACnC;AAEA,cAAM,QAAQ,IAAI,WAAW,IAAI,MAAM;AACvC,YAAI,SAAS;AACb,iBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,oBAAU,OAAO,aAAa,MAAM,CAAC,KAAK,CAAC;AAAA,QAC7C;AAEA,gBAAQ,KAAK,MAAM,CAAC;AAAA,MACtB;AAEA,aAAO,QAAQ,SAAS;AACxB,gBAAU,QAAQ,IAAI,WAAW;AAAA,IACnC;AAAA,IAEA,MAAM,OAAO;AACX,cAAQ,UAAU,EAAE,QAAQ,CAAC,MAAM,EAAE,KAAK,CAAC;AAC3C,cAAQ,WAAW;AACnB,iBAAW,WAAW;AACtB,YAAM,KAAK,MAAM;AAAA,IACnB;AAAA,EACF;AACF;AAEA,IAAM,MAAM,CAAC,SAAiB,OAAO,OAAO;AAC1C,UAAQ,IAAI,IAAI,IAAI,KAAK,OAAO;AAClC;AAkCO,SAAS,gBAAgB,QAAoC;AAClE,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,OAAO;AAAA,EACT,IAAI;AAEJ,MAAI,KAAuB;AAC3B,MAAI,WAAoC;AAExC,iBAAe,UAAyB;AACtC,SAAK,IAAI,UAAU,IAAI;AAEvB,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,SAAI,YAAY,CAAC,UAAU;AACzB,cAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,YAAI,KAAK,SAAS,mBAAmB;AACnC,oBAAU;AACV,kBAAQ;AAAA,QACV;AAEA,YAAI,KAAK,SAAS,qCAAqC;AACrD,0BAAgB;AAAA,QAClB;AAEA,YAAI,KAAK,SAAS,qCAAqC;AACrD,wBAAc;AAAA,QAChB;AAEA,YAAI,KAAK,SAAS,oDAAoD;AACpE,yBAAe,KAAK,QAAQ,KAAK,SAAS,KAAK,cAAc,IAAI,KAAK;AAAA,QACxE;AAEA,YAAI,KAAK,SAAS,yDAAyD;AACzE,yBAAe,KAAK,QAAQ,KAAK,cAAc,IAAI,IAAI;AAAA,QACzD;AAEA,YAAI,KAAK,SAAS,SAAS;AACzB,gBAAM,MAAM,IAAI,MAAM,KAAK,OAAO,WAAW,WAAW;AACxD,oBAAU,GAAG;AACb,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,SAAI,UAAU,MAAM;AAClB,cAAM,MAAM,IAAI,MAAM,iBAAiB;AACvC,kBAAU,GAAG;AACb,eAAO,GAAG;AAAA,MACZ;AAEA,SAAI,UAAU,MAAM;AAClB,kBAAU,KAAK;AACf,mBAAW;AACX,aAAK;AACL,kBAAU;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,iBAAgC;AAC7C,QAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC3C;AAEA,eAAW,MAAM,uBAAuB;AACxC,UAAM,SAAS,MAAM,CAAC,UAAU;AAC9B,UAAI,CAAC,MAAM,GAAG,eAAe,UAAU,KAAM;AAC7C,SAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,MAAM;AAAA,UACN;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,gBAAgB;AAC7B,QAAI;AACF,YAAM,UAAU,KAAK;AAAA,IACvB,SAAS,KAAK;AAAA,IAAC;AACf,eAAW;AAEX,QAAI,MAAM,GAAG,eAAe,UAAU,MAAM;AAC1C,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,4BAA4B,CAAC,CAAC;AAAA,IAC/D;AAAA,EACF;AAEA,WAAS,QAAQ;AACf,kBAAc;AACd,QAAI,MAAM;AACV,SAAK;AAAA,EACP;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,45 +1,45 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@amaster.ai/asr-client",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "Qwen ASR Realtime WebSocket client with microphone recording",
|
|
5
|
-
"type": "module",
|
|
6
|
-
"main": "./dist/index.cjs",
|
|
7
|
-
"module": "./dist/index.js",
|
|
8
|
-
"types": "./dist/index.d.ts",
|
|
9
|
-
"exports": {
|
|
10
|
-
".": {
|
|
11
|
-
"types": "./dist/index.d.ts",
|
|
12
|
-
"import": "./dist/index.js",
|
|
13
|
-
"require": "./dist/index.cjs"
|
|
14
|
-
}
|
|
15
|
-
},
|
|
16
|
-
"files": [
|
|
17
|
-
"dist",
|
|
18
|
-
"README.md"
|
|
19
|
-
],
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
"
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
|
|
35
|
-
"
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
"
|
|
43
|
-
"
|
|
44
|
-
}
|
|
45
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "@amaster.ai/asr-client",
|
|
3
|
+
"version": "1.1.0-beta.1",
|
|
4
|
+
"description": "Qwen ASR Realtime WebSocket client with microphone recording",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.cjs",
|
|
7
|
+
"module": "./dist/index.js",
|
|
8
|
+
"types": "./dist/index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"types": "./dist/index.d.ts",
|
|
12
|
+
"import": "./dist/index.js",
|
|
13
|
+
"require": "./dist/index.cjs"
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"files": [
|
|
17
|
+
"dist",
|
|
18
|
+
"README.md"
|
|
19
|
+
],
|
|
20
|
+
"keywords": [
|
|
21
|
+
"asr",
|
|
22
|
+
"speech-to-text",
|
|
23
|
+
"qwen",
|
|
24
|
+
"realtime",
|
|
25
|
+
"websocket",
|
|
26
|
+
"audio",
|
|
27
|
+
"speech-recognition"
|
|
28
|
+
],
|
|
29
|
+
"author": "Amaster Team",
|
|
30
|
+
"license": "MIT",
|
|
31
|
+
"publishConfig": {
|
|
32
|
+
"access": "public",
|
|
33
|
+
"registry": "https://registry.npmjs.org/"
|
|
34
|
+
},
|
|
35
|
+
"devDependencies": {
|
|
36
|
+
"tsup": "^8.3.5",
|
|
37
|
+
"typescript": "~5.7.2"
|
|
38
|
+
},
|
|
39
|
+
"scripts": {
|
|
40
|
+
"build": "tsup",
|
|
41
|
+
"dev": "tsup --watch",
|
|
42
|
+
"clean": "rm -rf dist *.tsbuildinfo",
|
|
43
|
+
"type-check": "tsc --noEmit"
|
|
44
|
+
}
|
|
45
|
+
}
|