@amaster.ai/asr-client 1.0.0-beta.6 → 1.1.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +125 -84
- package/dist/index.cjs +70 -69
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +5 -1
- package/dist/index.d.ts +5 -1
- package/dist/index.js +70 -69
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,131 +1,172 @@
|
|
|
1
|
-
#
|
|
1
|
+
# ASR Realtime WebSocket Client SDK
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
基于 Web Audio + WebSocket 的 **实时语音识别(ASR)客户端 SDK**,用于将浏览器麦克风音频实时发送到 ASR 服务(如 Qwen ASR Realtime),并接收实时/最终转写结果。
|
|
4
|
+
|
|
5
|
+
---
|
|
4
6
|
|
|
5
7
|
## 特性
|
|
6
8
|
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
- ✅ 基于实际调试验证的实现
|
|
9
|
+
- 🎙 浏览器麦克风实时采集(16kHz / 单声道)
|
|
10
|
+
- 🔁 实时音频流式发送(Base64 PCM16)
|
|
11
|
+
- 🧠 支持语音开始 / 结束事件
|
|
12
|
+
- ✍️ 支持中间结果与最终转写结果
|
|
13
|
+
- 🔌 简单的 WebSocket 生命周期管理
|
|
13
14
|
|
|
14
|
-
|
|
15
|
+
---
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
## 安装与环境要求
|
|
18
|
+
|
|
19
|
+
### 浏览器要求
|
|
20
|
+
|
|
21
|
+
- 支持 `getUserMedia`
|
|
22
|
+
- 支持 `AudioContext`
|
|
23
|
+
- 支持 `WebSocket`
|
|
24
|
+
|
|
25
|
+
推荐使用 **Chrome / Edge 最新版本**。
|
|
26
|
+
|
|
27
|
+
---
|
|
19
28
|
|
|
20
29
|
## 快速开始
|
|
21
30
|
|
|
22
|
-
|
|
23
|
-
import { createASRClient } from '@amaster.ai/asr-client';
|
|
31
|
+
### 1️⃣ 创建 ASR Client
|
|
24
32
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
gatewayUrl: 'ws://www.appok.ai/api/proxy/builtin/platform/qwen-asr-realtime/api-ws/v1/realtime',
|
|
28
|
-
language: 'zh',
|
|
29
|
-
});
|
|
33
|
+
```ts
|
|
34
|
+
import { createASRClient } from "./asr-client";
|
|
30
35
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
}
|
|
36
|
+
const client = createASRClient({
|
|
37
|
+
onReady() {
|
|
38
|
+
console.log("ASR 连接成功");
|
|
39
|
+
},
|
|
35
40
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
}
|
|
41
|
+
onSpeechStart() {
|
|
42
|
+
console.log("检测到说话开始");
|
|
43
|
+
},
|
|
39
44
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
}
|
|
45
|
+
onSpeechEnd() {
|
|
46
|
+
console.log("检测到说话结束");
|
|
47
|
+
},
|
|
48
|
+
|
|
49
|
+
onTranscript(text, isFinal) {
|
|
50
|
+
console.log(isFinal ? "最终结果:" : "实时结果:", text);
|
|
51
|
+
},
|
|
43
52
|
|
|
44
|
-
|
|
45
|
-
|
|
53
|
+
onError(err) {
|
|
54
|
+
console.error("ASR 错误", err);
|
|
55
|
+
},
|
|
56
|
+
|
|
57
|
+
onClose() {
|
|
58
|
+
console.log("连接已关闭");
|
|
59
|
+
},
|
|
46
60
|
});
|
|
61
|
+
```
|
|
47
62
|
|
|
48
|
-
|
|
49
|
-
await asr.connect();
|
|
50
|
-
await asr.startRecording(); // 自动请求麦克风权限
|
|
63
|
+
---
|
|
51
64
|
|
|
52
|
-
|
|
53
|
-
asr.stopRecording();
|
|
65
|
+
### 2️⃣ 建立 WebSocket 连接
|
|
54
66
|
|
|
55
|
-
|
|
56
|
-
|
|
67
|
+
```ts
|
|
68
|
+
await client.connect();
|
|
57
69
|
```
|
|
58
70
|
|
|
59
|
-
|
|
71
|
+
连接成功后会触发 `onReady` 回调。
|
|
60
72
|
|
|
61
|
-
|
|
73
|
+
---
|
|
62
74
|
|
|
63
|
-
|
|
75
|
+
### 3️⃣ 开始录音并实时识别
|
|
64
76
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
- `audioFormat`: 音频格式,默认 `'pcm16'`
|
|
69
|
-
- `sampleRate`: 采样率,默认 `16000`
|
|
77
|
+
```ts
|
|
78
|
+
await client.startRecording();
|
|
79
|
+
```
|
|
70
80
|
|
|
71
|
-
|
|
81
|
+
* 自动请求麦克风权限
|
|
82
|
+
* 自动开始推送音频流
|
|
83
|
+
* 服务端会持续返回实时转写结果
|
|
72
84
|
|
|
73
|
-
|
|
85
|
+
---
|
|
74
86
|
|
|
75
|
-
|
|
87
|
+
### 4️⃣ 停止录音
|
|
76
88
|
|
|
77
|
-
|
|
89
|
+
```ts
|
|
90
|
+
client.stopRecording();
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
* 停止麦克风采集
|
|
94
|
+
* 向服务端发送 `input_audio_buffer.commit`
|
|
95
|
+
* 触发最终转写结果
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
### 5️⃣ 关闭连接
|
|
100
|
+
|
|
101
|
+
```ts
|
|
102
|
+
client.close();
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
---
|
|
78
106
|
|
|
79
|
-
|
|
107
|
+
## API 说明
|
|
80
108
|
|
|
81
|
-
|
|
109
|
+
### `createASRClient(config): ASRClient`
|
|
110
|
+
|
|
111
|
+
#### `ASRClientConfig`
|
|
112
|
+
|
|
113
|
+
| 参数 | 类型 | 说明 |
|
|
114
|
+
| --------------- | ------------------------------------------ | -------------- |
|
|
115
|
+
| `onReady` | `() => void` | 会话创建完成 |
|
|
116
|
+
| `onSpeechStart` | `() => void` | 检测到语音开始 |
|
|
117
|
+
| `onSpeechEnd` | `() => void` | 检测到语音结束 |
|
|
118
|
+
| `onTranscript` | `(text: string, isFinal: boolean) => void` | 转写回调 |
|
|
119
|
+
| `onError` | `(error: Error) => void` | 错误回调 |
|
|
120
|
+
| `onClose` | `() => void` | 连接关闭回调 |
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
### `ASRClient`
|
|
125
|
+
|
|
126
|
+
```ts
|
|
127
|
+
interface ASRClient {
|
|
128
|
+
connect(): Promise<void>;
|
|
129
|
+
startRecording(): Promise<void>;
|
|
130
|
+
stopRecording(): void;
|
|
131
|
+
close(): void;
|
|
132
|
+
}
|
|
133
|
+
```
|
|
82
134
|
|
|
83
|
-
|
|
135
|
+
---
|
|
84
136
|
|
|
85
|
-
|
|
137
|
+
## 音频参数说明
|
|
86
138
|
|
|
87
|
-
|
|
139
|
+
* **采样率**:16000 Hz
|
|
140
|
+
* **声道**:单声道
|
|
141
|
+
* **格式**:PCM16 → Base64
|
|
142
|
+
* **缓冲大小**:4096 frames
|
|
88
143
|
|
|
89
|
-
|
|
90
|
-
- `connected`: WebSocket 连接建立
|
|
91
|
-
- `session-created`: 会话创建成功
|
|
92
|
-
- `recording-started`: 录音已开始
|
|
93
|
-
- `speech-started`: VAD 检测到语音开始
|
|
94
|
-
- `speech-stopped`: VAD 检测到语音停止
|
|
95
|
-
- `transcript-partial`: 中间识别结果(实时更新)
|
|
96
|
-
- `transcript-final`: 最终识别结果(确认)
|
|
97
|
-
- `error`: 发生错误
|
|
98
|
-
- `closed`: 连接关闭
|
|
144
|
+
---
|
|
99
145
|
|
|
100
|
-
|
|
146
|
+
## 常见问题
|
|
101
147
|
|
|
102
|
-
|
|
148
|
+
### Q: 为什么必须是 16kHz?
|
|
103
149
|
|
|
104
|
-
|
|
150
|
+
ASR 服务通常要求 16kHz PCM 输入,否则会影响识别效果或直接报错。
|
|
105
151
|
|
|
106
|
-
|
|
152
|
+
---
|
|
107
153
|
|
|
108
|
-
|
|
109
|
-
- 格式:PCM 16-bit Mono
|
|
110
|
-
- 编码:Base64
|
|
111
|
-
- 发送频率:约每秒 4 次(每次 4096 采样点)
|
|
154
|
+
### Q: 支持移动端吗?
|
|
112
155
|
|
|
113
|
-
|
|
156
|
+
支持,但需注意:
|
|
114
157
|
|
|
115
|
-
|
|
116
|
-
|
|
158
|
+
* iOS Safari 需用户手势触发录音
|
|
159
|
+
* 后台会自动暂停音频采集
|
|
117
160
|
|
|
118
|
-
|
|
161
|
+
---
|
|
119
162
|
|
|
120
|
-
|
|
121
|
-
- `conversation.item.input_audio_transcription.text` - 中间结果
|
|
122
|
-
- `conversation.item.input_audio_transcription.completed` - 最终结果
|
|
163
|
+
## 注意事项
|
|
123
164
|
|
|
124
|
-
|
|
165
|
+
* WebSocket 必须在 HTTPS 页面下使用麦克风
|
|
166
|
+
* 页面关闭前建议调用 `client.close()`
|
|
167
|
+
* 不建议在多个 ASR Client 实例中共享麦克风
|
|
125
168
|
|
|
126
|
-
|
|
127
|
-
- `input_audio_buffer.speech_started` - 开始说话
|
|
128
|
-
- `input_audio_buffer.speech_stopped` - 停止说话
|
|
169
|
+
---
|
|
129
170
|
|
|
130
171
|
## License
|
|
131
172
|
|
package/dist/index.cjs
CHANGED
|
@@ -27,25 +27,64 @@ module.exports = __toCommonJS(index_exports);
|
|
|
27
27
|
|
|
28
28
|
// src/asr-client.ts
|
|
29
29
|
var ASR_PATH = "/api/proxy/builtin/platform/qwen-asr-realtime/api-ws/v1/realtime";
|
|
30
|
+
async function createRealtimeRecorder() {
|
|
31
|
+
let stream;
|
|
32
|
+
let ctx;
|
|
33
|
+
let source;
|
|
34
|
+
let processor;
|
|
35
|
+
return {
|
|
36
|
+
async start(onAudio) {
|
|
37
|
+
stream = await navigator.mediaDevices.getUserMedia({
|
|
38
|
+
audio: { sampleRate: 16e3, channelCount: 1, echoCancellation: true }
|
|
39
|
+
});
|
|
40
|
+
log("\u2705 \u9EA6\u514B\u98CE\u5DF2\u542F\u52A8", "success");
|
|
41
|
+
log("\u{1F4AC} \u8BF7\u5BF9\u7740\u9EA6\u514B\u98CE\u8BF4\u8BDD\uFF0C\u5B9E\u65F6\u8BC6\u522B\u4E2D...", "success");
|
|
42
|
+
ctx = new AudioContext({ sampleRate: 16e3 });
|
|
43
|
+
source = ctx.createMediaStreamSource(stream);
|
|
44
|
+
processor = ctx.createScriptProcessor(4096, 1, 1);
|
|
45
|
+
processor.onaudioprocess = (e) => {
|
|
46
|
+
const inputData = e.inputBuffer.getChannelData(0);
|
|
47
|
+
const pcm = new Int16Array(inputData.length);
|
|
48
|
+
for (let i = 0; i < inputData.length; i++) {
|
|
49
|
+
const s = Math.max(-1, Math.min(1, inputData[i] || 0));
|
|
50
|
+
pcm[i] = s < 0 ? s * 32768 : s * 32767;
|
|
51
|
+
}
|
|
52
|
+
const bytes = new Uint8Array(pcm.buffer);
|
|
53
|
+
let binary = "";
|
|
54
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
55
|
+
binary += String.fromCharCode(bytes[i] || 0);
|
|
56
|
+
}
|
|
57
|
+
onAudio(btoa(binary));
|
|
58
|
+
};
|
|
59
|
+
source.connect(processor);
|
|
60
|
+
processor.connect(ctx.destination);
|
|
61
|
+
},
|
|
62
|
+
async stop() {
|
|
63
|
+
stream?.getTracks().forEach((t) => t.stop());
|
|
64
|
+
source?.disconnect();
|
|
65
|
+
processor?.disconnect();
|
|
66
|
+
await ctx?.close();
|
|
67
|
+
}
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
var log = (message, type = "") => {
|
|
71
|
+
console.log(`[${type}]`, message);
|
|
72
|
+
};
|
|
30
73
|
function createASRClient(config) {
|
|
31
74
|
const {
|
|
32
|
-
// audioFormat = 'pcm16',
|
|
33
|
-
sampleRate = 16e3,
|
|
34
75
|
onReady,
|
|
35
76
|
onSpeechStart,
|
|
36
77
|
onSpeechEnd,
|
|
37
78
|
onTranscript,
|
|
38
|
-
onError
|
|
79
|
+
onError,
|
|
80
|
+
onClose,
|
|
81
|
+
path = ASR_PATH
|
|
39
82
|
} = config;
|
|
40
83
|
let ws = null;
|
|
41
|
-
let
|
|
42
|
-
let audioContext = null;
|
|
43
|
-
let processor = null;
|
|
84
|
+
let recorder = null;
|
|
44
85
|
async function connect() {
|
|
86
|
+
ws = new WebSocket(path);
|
|
45
87
|
return new Promise((resolve, reject) => {
|
|
46
|
-
ws = new WebSocket(ASR_PATH);
|
|
47
|
-
ws.onopen = () => {
|
|
48
|
-
};
|
|
49
88
|
ws.onmessage = (event) => {
|
|
50
89
|
const data = JSON.parse(event.data);
|
|
51
90
|
if (data.type === "session.created") {
|
|
@@ -59,97 +98,59 @@ function createASRClient(config) {
|
|
|
59
98
|
onSpeechEnd?.();
|
|
60
99
|
}
|
|
61
100
|
if (data.type === "conversation.item.input_audio_transcription.text") {
|
|
62
|
-
onTranscript?.(data.text || "", false);
|
|
101
|
+
onTranscript?.(data.text || data.stash || data.transcript || "", false);
|
|
63
102
|
}
|
|
64
103
|
if (data.type === "conversation.item.input_audio_transcription.completed") {
|
|
65
104
|
onTranscript?.(data.text || data.transcript || "", true);
|
|
66
105
|
}
|
|
67
106
|
if (data.type === "error") {
|
|
68
|
-
const err = new Error(data.error?.message || "
|
|
107
|
+
const err = new Error(data.error?.message || "ASR error");
|
|
69
108
|
onError?.(err);
|
|
70
109
|
reject(err);
|
|
71
110
|
}
|
|
72
111
|
};
|
|
73
112
|
ws.onerror = () => {
|
|
74
|
-
const err = new Error("WebSocket
|
|
113
|
+
const err = new Error("WebSocket error");
|
|
75
114
|
onError?.(err);
|
|
76
115
|
reject(err);
|
|
77
116
|
};
|
|
78
117
|
ws.onclose = () => {
|
|
118
|
+
recorder?.stop();
|
|
119
|
+
recorder = null;
|
|
79
120
|
ws = null;
|
|
121
|
+
onClose?.();
|
|
80
122
|
};
|
|
81
123
|
});
|
|
82
124
|
}
|
|
83
125
|
async function startRecording() {
|
|
84
|
-
if (typeof window === "undefined") {
|
|
85
|
-
throw new Error("Recording only supported in browser");
|
|
86
|
-
}
|
|
87
126
|
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
|
88
127
|
throw new Error("WebSocket not connected");
|
|
89
128
|
}
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
echoCancellation: true,
|
|
96
|
-
noiseSuppression: true
|
|
97
|
-
}
|
|
98
|
-
});
|
|
99
|
-
audioContext = new AudioContext({ sampleRate });
|
|
100
|
-
const source = audioContext.createMediaStreamSource(mediaStream);
|
|
101
|
-
processor = audioContext.createScriptProcessor(4096, 1, 1);
|
|
102
|
-
processor.onaudioprocess = (e) => {
|
|
103
|
-
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
|
104
|
-
const inputData = e.inputBuffer.getChannelData(0);
|
|
105
|
-
const inputLen = inputData.length;
|
|
106
|
-
const pcm = new Int16Array(inputLen);
|
|
107
|
-
for (let i = 0; i < inputLen; i++) {
|
|
108
|
-
const s = Math.max(-1, Math.min(1, inputData[i]));
|
|
109
|
-
pcm[i] = s < 0 ? s * 32768 : s * 32767;
|
|
110
|
-
}
|
|
111
|
-
const bytes = new Uint8Array(pcm.buffer);
|
|
112
|
-
const len = bytes.length;
|
|
113
|
-
let binary = "";
|
|
114
|
-
for (let i = 0; i < len; i++) {
|
|
115
|
-
binary += String.fromCharCode(bytes[i]);
|
|
116
|
-
}
|
|
117
|
-
const base64 = btoa(binary);
|
|
118
|
-
ws.send(JSON.stringify({
|
|
129
|
+
recorder = await createRealtimeRecorder();
|
|
130
|
+
await recorder.start((audio) => {
|
|
131
|
+
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
|
132
|
+
ws.send(
|
|
133
|
+
JSON.stringify({
|
|
119
134
|
type: "input_audio_buffer.append",
|
|
120
|
-
audio
|
|
121
|
-
})
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
processor.connect(audioContext.destination);
|
|
125
|
-
} catch (err) {
|
|
126
|
-
onError?.(err);
|
|
127
|
-
throw err;
|
|
128
|
-
}
|
|
135
|
+
audio
|
|
136
|
+
})
|
|
137
|
+
);
|
|
138
|
+
});
|
|
129
139
|
}
|
|
130
|
-
function stopRecording() {
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
}
|
|
135
|
-
if (processor) {
|
|
136
|
-
processor.disconnect();
|
|
137
|
-
processor = null;
|
|
138
|
-
}
|
|
139
|
-
if (audioContext) {
|
|
140
|
-
audioContext.close();
|
|
141
|
-
audioContext = null;
|
|
140
|
+
async function stopRecording() {
|
|
141
|
+
try {
|
|
142
|
+
await recorder?.stop();
|
|
143
|
+
} catch (err) {
|
|
142
144
|
}
|
|
145
|
+
recorder = null;
|
|
143
146
|
if (ws && ws.readyState === WebSocket.OPEN) {
|
|
144
147
|
ws.send(JSON.stringify({ type: "input_audio_buffer.commit" }));
|
|
145
148
|
}
|
|
146
149
|
}
|
|
147
150
|
function close() {
|
|
148
151
|
stopRecording();
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
ws = null;
|
|
152
|
-
}
|
|
152
|
+
ws?.close();
|
|
153
|
+
ws = null;
|
|
153
154
|
}
|
|
154
155
|
return {
|
|
155
156
|
connect,
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts","../src/asr-client.ts"],"sourcesContent":["export
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/asr-client.ts"],"sourcesContent":["export * from './asr-client';\nexport { createASRClient as createAsrClient } from './asr-client';\n","/**\n * ASR Realtime WebSocket Client\n */\n\nconst ASR_PATH = \"/api/proxy/builtin/platform/qwen-asr-realtime/api-ws/v1/realtime\";\n\ninterface RealtimeRecorder {\n start(onAudio: (base64: string) => void): Promise<void>;\n stop(): Promise<void>;\n}\n\nasync function createRealtimeRecorder(): Promise<RealtimeRecorder> {\n let stream: MediaStream;\n let ctx: AudioContext;\n let source: MediaStreamAudioSourceNode;\n let processor: ScriptProcessorNode;\n\n return {\n async start(onAudio) {\n stream = await navigator.mediaDevices.getUserMedia({\n audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true },\n });\n\n log(\"✅ 麦克风已启动\", \"success\");\n log(\"💬 请对着麦克风说话,实时识别中...\", \"success\");\n\n ctx = new AudioContext({ sampleRate: 16000 });\n source = ctx.createMediaStreamSource(stream);\n processor = ctx.createScriptProcessor(4096, 1, 1);\n\n processor.onaudioprocess = (e) => {\n const inputData = e.inputBuffer.getChannelData(0);\n const pcm = new Int16Array(inputData.length);\n for (let i = 0; i < inputData.length; i++) {\n const s = Math.max(-1, Math.min(1, inputData[i] || 0));\n pcm[i] = s < 0 ? s * 32768 : s * 32767;\n }\n\n const bytes = new Uint8Array(pcm.buffer);\n let binary = \"\";\n for (let i = 0; i < bytes.length; i++) {\n binary += String.fromCharCode(bytes[i] || 0);\n }\n\n onAudio(btoa(binary));\n };\n\n source.connect(processor);\n processor.connect(ctx.destination);\n },\n\n async stop() {\n stream?.getTracks().forEach((t) => t.stop());\n source?.disconnect();\n processor?.disconnect();\n await ctx?.close();\n },\n };\n}\n\nconst log = (message: string, type = \"\") => {\n console.log(`[${type}]`, message);\n};\n\nexport interface ASRClientConfig {\n /** Audio format, default 'pcm16' */\n audioFormat?: \"pcm16\" | \"g711a\" | \"g711u\";\n /** Sample rate, default 16000 */\n sampleRate?: number;\n /** Called when connection is ready */\n onReady?: () => void;\n /** Called when speech is detected */\n onSpeechStart?: () => void;\n /** Called when speech stops */\n onSpeechEnd?: () => void;\n /** Called on transcript result */\n onTranscript?: (text: string, isFinal: boolean) => void;\n /** Called on error */\n onError?: (error: Error) => void;\n /** Called on close */\n onClose?: () => void;\n /** WebSocket path, default ASR_PATH */\n path?: string;\n}\n\nexport interface ASRClient {\n /** Connect to ASR service */\n connect(): Promise<void>;\n /** Start recording from microphone */\n startRecording(): Promise<void>;\n /** Stop recording */\n stopRecording(): void;\n /** Close connection */\n close(): void;\n}\n\nexport function createASRClient(config: ASRClientConfig): ASRClient {\n const {\n onReady,\n onSpeechStart,\n onSpeechEnd,\n onTranscript,\n onError,\n onClose,\n path = ASR_PATH,\n } = config;\n\n let ws: WebSocket | null = null;\n let recorder: RealtimeRecorder | null = null;\n\n async function connect(): Promise<void> {\n ws = new WebSocket(path);\n\n return new Promise((resolve, reject) => {\n ws!.onmessage = (event) => {\n const data = JSON.parse(event.data);\n\n if (data.type === \"session.created\") {\n onReady?.();\n resolve();\n }\n\n if (data.type === \"input_audio_buffer.speech_started\") {\n onSpeechStart?.();\n }\n\n if (data.type === \"input_audio_buffer.speech_stopped\") {\n onSpeechEnd?.();\n }\n\n if (data.type === \"conversation.item.input_audio_transcription.text\") {\n onTranscript?.(data.text || data.stash || data.transcript || \"\", false);\n }\n\n if (data.type === \"conversation.item.input_audio_transcription.completed\") {\n onTranscript?.(data.text || data.transcript || \"\", true);\n }\n\n if (data.type === \"error\") {\n const err = new Error(data.error?.message || \"ASR error\");\n onError?.(err);\n reject(err);\n }\n };\n\n ws!.onerror = () => {\n const err = new Error(\"WebSocket error\");\n onError?.(err);\n reject(err);\n };\n\n ws!.onclose = () => {\n recorder?.stop();\n recorder = null;\n ws = null;\n onClose?.();\n };\n });\n }\n\n async function startRecording(): Promise<void> {\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n throw new Error(\"WebSocket not connected\");\n }\n\n recorder = await createRealtimeRecorder();\n await recorder.start((audio) => {\n if (!ws || ws.readyState !== WebSocket.OPEN) return;\n ws.send(\n JSON.stringify({\n type: \"input_audio_buffer.append\",\n audio,\n })\n );\n });\n }\n\n async function stopRecording() {\n try {\n await recorder?.stop();\n } catch (err) {}\n recorder = null;\n\n if (ws && ws.readyState === WebSocket.OPEN) {\n ws.send(JSON.stringify({ type: \"input_audio_buffer.commit\" }));\n }\n }\n\n function close() {\n stopRecording();\n ws?.close();\n ws = null;\n }\n\n return {\n connect,\n startRecording,\n stopRecording,\n close,\n };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACIA,IAAM,WAAW;AAOjB,eAAe,yBAAoD;AACjE,MAAI;AACJ,MAAI;AACJ,MAAI;AACJ,MAAI;AAEJ,SAAO;AAAA,IACL,MAAM,MAAM,SAAS;AACnB,eAAS,MAAM,UAAU,aAAa,aAAa;AAAA,QACjD,OAAO,EAAE,YAAY,MAAO,cAAc,GAAG,kBAAkB,KAAK;AAAA,MACtE,CAAC;AAED,UAAI,+CAAY,SAAS;AACzB,UAAI,qGAAwB,SAAS;AAErC,YAAM,IAAI,aAAa,EAAE,YAAY,KAAM,CAAC;AAC5C,eAAS,IAAI,wBAAwB,MAAM;AAC3C,kBAAY,IAAI,sBAAsB,MAAM,GAAG,CAAC;AAEhD,gBAAU,iBAAiB,CAAC,MAAM;AAChC,cAAM,YAAY,EAAE,YAAY,eAAe,CAAC;AAChD,cAAM,MAAM,IAAI,WAAW,UAAU,MAAM;AAC3C,iBAAS,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AACzC,gBAAM,IAAI,KAAK,IAAI,IAAI,KAAK,IAAI,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;AACrD,cAAI,CAAC,IAAI,IAAI,IAAI,IAAI,QAAQ,IAAI;AAAA,QACnC;AAEA,cAAM,QAAQ,IAAI,WAAW,IAAI,MAAM;AACvC,YAAI,SAAS;AACb,iBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,oBAAU,OAAO,aAAa,MAAM,CAAC,KAAK,CAAC;AAAA,QAC7C;AAEA,gBAAQ,KAAK,MAAM,CAAC;AAAA,MACtB;AAEA,aAAO,QAAQ,SAAS;AACxB,gBAAU,QAAQ,IAAI,WAAW;AAAA,IACnC;AAAA,IAEA,MAAM,OAAO;AACX,cAAQ,UAAU,EAAE,QAAQ,CAAC,MAAM,EAAE,KAAK,CAAC;AAC3C,cAAQ,WAAW;AACnB,iBAAW,WAAW;AACtB,YAAM,KAAK,MAAM;AAAA,IACnB;AAAA,EACF;AACF;AAEA,IAAM,MAAM,CAAC,SAAiB,OAAO,OAAO;AAC1C,UAAQ,IAAI,IAAI,IAAI,KAAK,OAAO;AAClC;AAkCO,SAAS,gBAAgB,QAAoC;AAClE,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,OAAO;AAAA,EACT,IAAI;AAEJ,MAAI,KAAuB;AAC3B,MAAI,WAAoC;AAExC,iBAAe,UAAyB;AACtC,SAAK,IAAI,UAAU,IAAI;AAEvB,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,SAAI,YAAY,CAAC,UAAU;AACzB,cAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,YAAI,KAAK,SAAS,mBAAmB;AACnC,oBAAU;AACV,kBAAQ;AAAA,QACV;AAEA,YAAI,KAAK,SAAS,qCAAqC;AACrD,0BAAgB;AAAA,QAClB;AAEA,YAAI,KAAK,SAAS,qCAAqC;AACrD,wBAAc;AAAA,QAChB;AAEA,YAAI,KAAK,SAAS,oDAAoD;AACpE,yBAAe,KAAK,QAAQ,KAAK,SAAS,KAAK,cAAc,IAAI,KAAK;AAAA,QACxE;AAEA,YAAI,KAAK,SAAS,yDAAyD;AACzE,yBAAe,KAAK,QAAQ,KAAK,cAAc,IAAI,IAAI;AAAA,QACzD;AAEA,YAAI,KAAK,SAAS,SAAS;AACzB,gBAAM,MAAM,IAAI,MAAM,KAAK,OAAO,WAAW,WAAW;AACxD,oBAAU,GAAG;AACb,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,SAAI,UAAU,MAAM;AAClB,cAAM,MAAM,IAAI,MAAM,iBAAiB;AACvC,kBAAU,GAAG;AACb,eAAO,GAAG;AAAA,MACZ;AAEA,SAAI,UAAU,MAAM;AAClB,kBAAU,KAAK;AACf,mBAAW;AACX,aAAK;AACL,kBAAU;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,iBAAgC;AAC7C,QAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC3C;AAEA,eAAW,MAAM,uBAAuB;AACxC,UAAM,SAAS,MAAM,CAAC,UAAU;AAC9B,UAAI,CAAC,MAAM,GAAG,eAAe,UAAU,KAAM;AAC7C,SAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,MAAM;AAAA,UACN;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,gBAAgB;AAC7B,QAAI;AACF,YAAM,UAAU,KAAK;AAAA,IACvB,SAAS,KAAK;AAAA,IAAC;AACf,eAAW;AAEX,QAAI,MAAM,GAAG,eAAe,UAAU,MAAM;AAC1C,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,4BAA4B,CAAC,CAAC;AAAA,IAC/D;AAAA,EACF;AAEA,WAAS,QAAQ;AACf,kBAAc;AACd,QAAI,MAAM;AACV,SAAK;AAAA,EACP;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;","names":[]}
|
package/dist/index.d.cts
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
interface ASRClientConfig {
|
|
5
5
|
/** Audio format, default 'pcm16' */
|
|
6
|
-
audioFormat?:
|
|
6
|
+
audioFormat?: "pcm16" | "g711a" | "g711u";
|
|
7
7
|
/** Sample rate, default 16000 */
|
|
8
8
|
sampleRate?: number;
|
|
9
9
|
/** Called when connection is ready */
|
|
@@ -16,6 +16,10 @@ interface ASRClientConfig {
|
|
|
16
16
|
onTranscript?: (text: string, isFinal: boolean) => void;
|
|
17
17
|
/** Called on error */
|
|
18
18
|
onError?: (error: Error) => void;
|
|
19
|
+
/** Called on close */
|
|
20
|
+
onClose?: () => void;
|
|
21
|
+
/** WebSocket path, default ASR_PATH */
|
|
22
|
+
path?: string;
|
|
19
23
|
}
|
|
20
24
|
interface ASRClient {
|
|
21
25
|
/** Connect to ASR service */
|
package/dist/index.d.ts
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
interface ASRClientConfig {
|
|
5
5
|
/** Audio format, default 'pcm16' */
|
|
6
|
-
audioFormat?:
|
|
6
|
+
audioFormat?: "pcm16" | "g711a" | "g711u";
|
|
7
7
|
/** Sample rate, default 16000 */
|
|
8
8
|
sampleRate?: number;
|
|
9
9
|
/** Called when connection is ready */
|
|
@@ -16,6 +16,10 @@ interface ASRClientConfig {
|
|
|
16
16
|
onTranscript?: (text: string, isFinal: boolean) => void;
|
|
17
17
|
/** Called on error */
|
|
18
18
|
onError?: (error: Error) => void;
|
|
19
|
+
/** Called on close */
|
|
20
|
+
onClose?: () => void;
|
|
21
|
+
/** WebSocket path, default ASR_PATH */
|
|
22
|
+
path?: string;
|
|
19
23
|
}
|
|
20
24
|
interface ASRClient {
|
|
21
25
|
/** Connect to ASR service */
|
package/dist/index.js
CHANGED
|
@@ -1,24 +1,63 @@
|
|
|
1
1
|
// src/asr-client.ts
|
|
2
2
|
var ASR_PATH = "/api/proxy/builtin/platform/qwen-asr-realtime/api-ws/v1/realtime";
|
|
3
|
+
async function createRealtimeRecorder() {
|
|
4
|
+
let stream;
|
|
5
|
+
let ctx;
|
|
6
|
+
let source;
|
|
7
|
+
let processor;
|
|
8
|
+
return {
|
|
9
|
+
async start(onAudio) {
|
|
10
|
+
stream = await navigator.mediaDevices.getUserMedia({
|
|
11
|
+
audio: { sampleRate: 16e3, channelCount: 1, echoCancellation: true }
|
|
12
|
+
});
|
|
13
|
+
log("\u2705 \u9EA6\u514B\u98CE\u5DF2\u542F\u52A8", "success");
|
|
14
|
+
log("\u{1F4AC} \u8BF7\u5BF9\u7740\u9EA6\u514B\u98CE\u8BF4\u8BDD\uFF0C\u5B9E\u65F6\u8BC6\u522B\u4E2D...", "success");
|
|
15
|
+
ctx = new AudioContext({ sampleRate: 16e3 });
|
|
16
|
+
source = ctx.createMediaStreamSource(stream);
|
|
17
|
+
processor = ctx.createScriptProcessor(4096, 1, 1);
|
|
18
|
+
processor.onaudioprocess = (e) => {
|
|
19
|
+
const inputData = e.inputBuffer.getChannelData(0);
|
|
20
|
+
const pcm = new Int16Array(inputData.length);
|
|
21
|
+
for (let i = 0; i < inputData.length; i++) {
|
|
22
|
+
const s = Math.max(-1, Math.min(1, inputData[i] || 0));
|
|
23
|
+
pcm[i] = s < 0 ? s * 32768 : s * 32767;
|
|
24
|
+
}
|
|
25
|
+
const bytes = new Uint8Array(pcm.buffer);
|
|
26
|
+
let binary = "";
|
|
27
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
28
|
+
binary += String.fromCharCode(bytes[i] || 0);
|
|
29
|
+
}
|
|
30
|
+
onAudio(btoa(binary));
|
|
31
|
+
};
|
|
32
|
+
source.connect(processor);
|
|
33
|
+
processor.connect(ctx.destination);
|
|
34
|
+
},
|
|
35
|
+
async stop() {
|
|
36
|
+
stream?.getTracks().forEach((t) => t.stop());
|
|
37
|
+
source?.disconnect();
|
|
38
|
+
processor?.disconnect();
|
|
39
|
+
await ctx?.close();
|
|
40
|
+
}
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
var log = (message, type = "") => {
|
|
44
|
+
console.log(`[${type}]`, message);
|
|
45
|
+
};
|
|
3
46
|
function createASRClient(config) {
|
|
4
47
|
const {
|
|
5
|
-
// audioFormat = 'pcm16',
|
|
6
|
-
sampleRate = 16e3,
|
|
7
48
|
onReady,
|
|
8
49
|
onSpeechStart,
|
|
9
50
|
onSpeechEnd,
|
|
10
51
|
onTranscript,
|
|
11
|
-
onError
|
|
52
|
+
onError,
|
|
53
|
+
onClose,
|
|
54
|
+
path = ASR_PATH
|
|
12
55
|
} = config;
|
|
13
56
|
let ws = null;
|
|
14
|
-
let
|
|
15
|
-
let audioContext = null;
|
|
16
|
-
let processor = null;
|
|
57
|
+
let recorder = null;
|
|
17
58
|
async function connect() {
|
|
59
|
+
ws = new WebSocket(path);
|
|
18
60
|
return new Promise((resolve, reject) => {
|
|
19
|
-
ws = new WebSocket(ASR_PATH);
|
|
20
|
-
ws.onopen = () => {
|
|
21
|
-
};
|
|
22
61
|
ws.onmessage = (event) => {
|
|
23
62
|
const data = JSON.parse(event.data);
|
|
24
63
|
if (data.type === "session.created") {
|
|
@@ -32,97 +71,59 @@ function createASRClient(config) {
|
|
|
32
71
|
onSpeechEnd?.();
|
|
33
72
|
}
|
|
34
73
|
if (data.type === "conversation.item.input_audio_transcription.text") {
|
|
35
|
-
onTranscript?.(data.text || "", false);
|
|
74
|
+
onTranscript?.(data.text || data.stash || data.transcript || "", false);
|
|
36
75
|
}
|
|
37
76
|
if (data.type === "conversation.item.input_audio_transcription.completed") {
|
|
38
77
|
onTranscript?.(data.text || data.transcript || "", true);
|
|
39
78
|
}
|
|
40
79
|
if (data.type === "error") {
|
|
41
|
-
const err = new Error(data.error?.message || "
|
|
80
|
+
const err = new Error(data.error?.message || "ASR error");
|
|
42
81
|
onError?.(err);
|
|
43
82
|
reject(err);
|
|
44
83
|
}
|
|
45
84
|
};
|
|
46
85
|
ws.onerror = () => {
|
|
47
|
-
const err = new Error("WebSocket
|
|
86
|
+
const err = new Error("WebSocket error");
|
|
48
87
|
onError?.(err);
|
|
49
88
|
reject(err);
|
|
50
89
|
};
|
|
51
90
|
ws.onclose = () => {
|
|
91
|
+
recorder?.stop();
|
|
92
|
+
recorder = null;
|
|
52
93
|
ws = null;
|
|
94
|
+
onClose?.();
|
|
53
95
|
};
|
|
54
96
|
});
|
|
55
97
|
}
|
|
56
98
|
async function startRecording() {
|
|
57
|
-
if (typeof window === "undefined") {
|
|
58
|
-
throw new Error("Recording only supported in browser");
|
|
59
|
-
}
|
|
60
99
|
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
|
61
100
|
throw new Error("WebSocket not connected");
|
|
62
101
|
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
echoCancellation: true,
|
|
69
|
-
noiseSuppression: true
|
|
70
|
-
}
|
|
71
|
-
});
|
|
72
|
-
audioContext = new AudioContext({ sampleRate });
|
|
73
|
-
const source = audioContext.createMediaStreamSource(mediaStream);
|
|
74
|
-
processor = audioContext.createScriptProcessor(4096, 1, 1);
|
|
75
|
-
processor.onaudioprocess = (e) => {
|
|
76
|
-
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
|
77
|
-
const inputData = e.inputBuffer.getChannelData(0);
|
|
78
|
-
const inputLen = inputData.length;
|
|
79
|
-
const pcm = new Int16Array(inputLen);
|
|
80
|
-
for (let i = 0; i < inputLen; i++) {
|
|
81
|
-
const s = Math.max(-1, Math.min(1, inputData[i]));
|
|
82
|
-
pcm[i] = s < 0 ? s * 32768 : s * 32767;
|
|
83
|
-
}
|
|
84
|
-
const bytes = new Uint8Array(pcm.buffer);
|
|
85
|
-
const len = bytes.length;
|
|
86
|
-
let binary = "";
|
|
87
|
-
for (let i = 0; i < len; i++) {
|
|
88
|
-
binary += String.fromCharCode(bytes[i]);
|
|
89
|
-
}
|
|
90
|
-
const base64 = btoa(binary);
|
|
91
|
-
ws.send(JSON.stringify({
|
|
102
|
+
recorder = await createRealtimeRecorder();
|
|
103
|
+
await recorder.start((audio) => {
|
|
104
|
+
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
|
105
|
+
ws.send(
|
|
106
|
+
JSON.stringify({
|
|
92
107
|
type: "input_audio_buffer.append",
|
|
93
|
-
audio
|
|
94
|
-
})
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
processor.connect(audioContext.destination);
|
|
98
|
-
} catch (err) {
|
|
99
|
-
onError?.(err);
|
|
100
|
-
throw err;
|
|
101
|
-
}
|
|
108
|
+
audio
|
|
109
|
+
})
|
|
110
|
+
);
|
|
111
|
+
});
|
|
102
112
|
}
|
|
103
|
-
function stopRecording() {
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
}
|
|
108
|
-
if (processor) {
|
|
109
|
-
processor.disconnect();
|
|
110
|
-
processor = null;
|
|
111
|
-
}
|
|
112
|
-
if (audioContext) {
|
|
113
|
-
audioContext.close();
|
|
114
|
-
audioContext = null;
|
|
113
|
+
async function stopRecording() {
|
|
114
|
+
try {
|
|
115
|
+
await recorder?.stop();
|
|
116
|
+
} catch (err) {
|
|
115
117
|
}
|
|
118
|
+
recorder = null;
|
|
116
119
|
if (ws && ws.readyState === WebSocket.OPEN) {
|
|
117
120
|
ws.send(JSON.stringify({ type: "input_audio_buffer.commit" }));
|
|
118
121
|
}
|
|
119
122
|
}
|
|
120
123
|
function close() {
|
|
121
124
|
stopRecording();
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
ws = null;
|
|
125
|
-
}
|
|
125
|
+
ws?.close();
|
|
126
|
+
ws = null;
|
|
126
127
|
}
|
|
127
128
|
return {
|
|
128
129
|
connect,
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/asr-client.ts"],"sourcesContent":["/**\n * ASR Realtime WebSocket Client\n */\n\nconst ASR_PATH =
|
|
1
|
+
{"version":3,"sources":["../src/asr-client.ts"],"sourcesContent":["/**\n * ASR Realtime WebSocket Client\n */\n\nconst ASR_PATH = \"/api/proxy/builtin/platform/qwen-asr-realtime/api-ws/v1/realtime\";\n\ninterface RealtimeRecorder {\n start(onAudio: (base64: string) => void): Promise<void>;\n stop(): Promise<void>;\n}\n\nasync function createRealtimeRecorder(): Promise<RealtimeRecorder> {\n let stream: MediaStream;\n let ctx: AudioContext;\n let source: MediaStreamAudioSourceNode;\n let processor: ScriptProcessorNode;\n\n return {\n async start(onAudio) {\n stream = await navigator.mediaDevices.getUserMedia({\n audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true },\n });\n\n log(\"✅ 麦克风已启动\", \"success\");\n log(\"💬 请对着麦克风说话,实时识别中...\", \"success\");\n\n ctx = new AudioContext({ sampleRate: 16000 });\n source = ctx.createMediaStreamSource(stream);\n processor = ctx.createScriptProcessor(4096, 1, 1);\n\n processor.onaudioprocess = (e) => {\n const inputData = e.inputBuffer.getChannelData(0);\n const pcm = new Int16Array(inputData.length);\n for (let i = 0; i < inputData.length; i++) {\n const s = Math.max(-1, Math.min(1, inputData[i] || 0));\n pcm[i] = s < 0 ? s * 32768 : s * 32767;\n }\n\n const bytes = new Uint8Array(pcm.buffer);\n let binary = \"\";\n for (let i = 0; i < bytes.length; i++) {\n binary += String.fromCharCode(bytes[i] || 0);\n }\n\n onAudio(btoa(binary));\n };\n\n source.connect(processor);\n processor.connect(ctx.destination);\n },\n\n async stop() {\n stream?.getTracks().forEach((t) => t.stop());\n source?.disconnect();\n processor?.disconnect();\n await ctx?.close();\n },\n };\n}\n\nconst log = (message: string, type = \"\") => {\n console.log(`[${type}]`, message);\n};\n\nexport interface ASRClientConfig {\n /** Audio format, default 'pcm16' */\n audioFormat?: \"pcm16\" | \"g711a\" | \"g711u\";\n /** Sample rate, default 16000 */\n sampleRate?: number;\n /** Called when connection is ready */\n onReady?: () => void;\n /** Called when speech is detected */\n onSpeechStart?: () => void;\n /** Called when speech stops */\n onSpeechEnd?: () => void;\n /** Called on transcript result */\n onTranscript?: (text: string, isFinal: boolean) => void;\n /** Called on error */\n onError?: (error: Error) => void;\n /** Called on close */\n onClose?: () => void;\n /** WebSocket path, default ASR_PATH */\n path?: string;\n}\n\nexport interface ASRClient {\n /** Connect to ASR service */\n connect(): Promise<void>;\n /** Start recording from microphone */\n startRecording(): Promise<void>;\n /** Stop recording */\n stopRecording(): void;\n /** Close connection */\n close(): void;\n}\n\nexport function createASRClient(config: ASRClientConfig): ASRClient {\n const {\n onReady,\n onSpeechStart,\n onSpeechEnd,\n onTranscript,\n onError,\n onClose,\n path = ASR_PATH,\n } = config;\n\n let ws: WebSocket | null = null;\n let recorder: RealtimeRecorder | null = null;\n\n async function connect(): Promise<void> {\n ws = new WebSocket(path);\n\n return new Promise((resolve, reject) => {\n ws!.onmessage = (event) => {\n const data = JSON.parse(event.data);\n\n if (data.type === \"session.created\") {\n onReady?.();\n resolve();\n }\n\n if (data.type === \"input_audio_buffer.speech_started\") {\n onSpeechStart?.();\n }\n\n if (data.type === \"input_audio_buffer.speech_stopped\") {\n onSpeechEnd?.();\n }\n\n if (data.type === \"conversation.item.input_audio_transcription.text\") {\n onTranscript?.(data.text || data.stash || data.transcript || \"\", false);\n }\n\n if (data.type === \"conversation.item.input_audio_transcription.completed\") {\n onTranscript?.(data.text || data.transcript || \"\", true);\n }\n\n if (data.type === \"error\") {\n const err = new Error(data.error?.message || \"ASR error\");\n onError?.(err);\n reject(err);\n }\n };\n\n ws!.onerror = () => {\n const err = new Error(\"WebSocket error\");\n onError?.(err);\n reject(err);\n };\n\n ws!.onclose = () => {\n recorder?.stop();\n recorder = null;\n ws = null;\n onClose?.();\n };\n });\n }\n\n async function startRecording(): Promise<void> {\n if (!ws || ws.readyState !== WebSocket.OPEN) {\n throw new Error(\"WebSocket not connected\");\n }\n\n recorder = await createRealtimeRecorder();\n await recorder.start((audio) => {\n if (!ws || ws.readyState !== WebSocket.OPEN) return;\n ws.send(\n JSON.stringify({\n type: \"input_audio_buffer.append\",\n audio,\n })\n );\n });\n }\n\n async function stopRecording() {\n try {\n await recorder?.stop();\n } catch (err) {}\n recorder = null;\n\n if (ws && ws.readyState === WebSocket.OPEN) {\n ws.send(JSON.stringify({ type: \"input_audio_buffer.commit\" }));\n }\n }\n\n function close() {\n stopRecording();\n ws?.close();\n ws = null;\n }\n\n return {\n connect,\n startRecording,\n stopRecording,\n close,\n };\n}\n"],"mappings":";AAIA,IAAM,WAAW;AAOjB,eAAe,yBAAoD;AACjE,MAAI;AACJ,MAAI;AACJ,MAAI;AACJ,MAAI;AAEJ,SAAO;AAAA,IACL,MAAM,MAAM,SAAS;AACnB,eAAS,MAAM,UAAU,aAAa,aAAa;AAAA,QACjD,OAAO,EAAE,YAAY,MAAO,cAAc,GAAG,kBAAkB,KAAK;AAAA,MACtE,CAAC;AAED,UAAI,+CAAY,SAAS;AACzB,UAAI,qGAAwB,SAAS;AAErC,YAAM,IAAI,aAAa,EAAE,YAAY,KAAM,CAAC;AAC5C,eAAS,IAAI,wBAAwB,MAAM;AAC3C,kBAAY,IAAI,sBAAsB,MAAM,GAAG,CAAC;AAEhD,gBAAU,iBAAiB,CAAC,MAAM;AAChC,cAAM,YAAY,EAAE,YAAY,eAAe,CAAC;AAChD,cAAM,MAAM,IAAI,WAAW,UAAU,MAAM;AAC3C,iBAAS,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AACzC,gBAAM,IAAI,KAAK,IAAI,IAAI,KAAK,IAAI,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;AACrD,cAAI,CAAC,IAAI,IAAI,IAAI,IAAI,QAAQ,IAAI;AAAA,QACnC;AAEA,cAAM,QAAQ,IAAI,WAAW,IAAI,MAAM;AACvC,YAAI,SAAS;AACb,iBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,oBAAU,OAAO,aAAa,MAAM,CAAC,KAAK,CAAC;AAAA,QAC7C;AAEA,gBAAQ,KAAK,MAAM,CAAC;AAAA,MACtB;AAEA,aAAO,QAAQ,SAAS;AACxB,gBAAU,QAAQ,IAAI,WAAW;AAAA,IACnC;AAAA,IAEA,MAAM,OAAO;AACX,cAAQ,UAAU,EAAE,QAAQ,CAAC,MAAM,EAAE,KAAK,CAAC;AAC3C,cAAQ,WAAW;AACnB,iBAAW,WAAW;AACtB,YAAM,KAAK,MAAM;AAAA,IACnB;AAAA,EACF;AACF;AAEA,IAAM,MAAM,CAAC,SAAiB,OAAO,OAAO;AAC1C,UAAQ,IAAI,IAAI,IAAI,KAAK,OAAO;AAClC;AAkCO,SAAS,gBAAgB,QAAoC;AAClE,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,OAAO;AAAA,EACT,IAAI;AAEJ,MAAI,KAAuB;AAC3B,MAAI,WAAoC;AAExC,iBAAe,UAAyB;AACtC,SAAK,IAAI,UAAU,IAAI;AAEvB,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,SAAI,YAAY,CAAC,UAAU;AACzB,cAAM,OAAO,KAAK,MAAM,MAAM,IAAI;AAElC,YAAI,KAAK,SAAS,mBAAmB;AACnC,oBAAU;AACV,kBAAQ;AAAA,QACV;AAEA,YAAI,KAAK,SAAS,qCAAqC;AACrD,0BAAgB;AAAA,QAClB;AAEA,YAAI,KAAK,SAAS,qCAAqC;AACrD,wBAAc;AAAA,QAChB;AAEA,YAAI,KAAK,SAAS,oDAAoD;AACpE,yBAAe,KAAK,QAAQ,KAAK,SAAS,KAAK,cAAc,IAAI,KAAK;AAAA,QACxE;AAEA,YAAI,KAAK,SAAS,yDAAyD;AACzE,yBAAe,KAAK,QAAQ,KAAK,cAAc,IAAI,IAAI;AAAA,QACzD;AAEA,YAAI,KAAK,SAAS,SAAS;AACzB,gBAAM,MAAM,IAAI,MAAM,KAAK,OAAO,WAAW,WAAW;AACxD,oBAAU,GAAG;AACb,iBAAO,GAAG;AAAA,QACZ;AAAA,MACF;AAEA,SAAI,UAAU,MAAM;AAClB,cAAM,MAAM,IAAI,MAAM,iBAAiB;AACvC,kBAAU,GAAG;AACb,eAAO,GAAG;AAAA,MACZ;AAEA,SAAI,UAAU,MAAM;AAClB,kBAAU,KAAK;AACf,mBAAW;AACX,aAAK;AACL,kBAAU;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,iBAAgC;AAC7C,QAAI,CAAC,MAAM,GAAG,eAAe,UAAU,MAAM;AAC3C,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC3C;AAEA,eAAW,MAAM,uBAAuB;AACxC,UAAM,SAAS,MAAM,CAAC,UAAU;AAC9B,UAAI,CAAC,MAAM,GAAG,eAAe,UAAU,KAAM;AAC7C,SAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,MAAM;AAAA,UACN;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAAA,EACH;AAEA,iBAAe,gBAAgB;AAC7B,QAAI;AACF,YAAM,UAAU,KAAK;AAAA,IACvB,SAAS,KAAK;AAAA,IAAC;AACf,eAAW;AAEX,QAAI,MAAM,GAAG,eAAe,UAAU,MAAM;AAC1C,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,4BAA4B,CAAC,CAAC;AAAA,IAC/D;AAAA,EACF;AAEA,WAAS,QAAQ;AACf,kBAAc;AACd,QAAI,MAAM;AACV,SAAK;AAAA,EACP;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;","names":[]}
|