@xdfnet/ispeak 1.6.15 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +12 -19
- package/Makefile +1 -1
- package/README.md +9 -18
- package/avaudioengine_player_darwin.go +0 -6
- package/configs/hook-speak.sh +12 -192
- package/docs/architecture.md +91 -117
- package/docs/hook-text-extraction.md +68 -216
- package/main.go +54 -45
- package/package.json +1 -1
- package/scripts/ispeak +1 -1
package/AGENTS.md
CHANGED
|
@@ -28,11 +28,9 @@ make help # 显示帮助
|
|
|
28
28
|
ispeak (CLI, bash)
|
|
29
29
|
└─ nc -U ~/.config/iSpeak/ispeak.sock
|
|
30
30
|
└─ ispeakd (Go daemon)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
└─ pending -> running -> delete
|
|
35
|
-
└─ SSE PCM chunk -> AVAudioEngine
|
|
31
|
+
└─ Player (channel, buffer=1)
|
|
32
|
+
└─ loop goroutine: 单 AVAudioEngine 实例复用
|
|
33
|
+
└─ SSE PCM chunk → AVAudioEngine
|
|
36
34
|
```
|
|
37
35
|
|
|
38
36
|
- **Socket**: `~/.config/iSpeak/ispeak.sock`
|
|
@@ -42,7 +40,7 @@ ispeak (CLI, bash)
|
|
|
42
40
|
|
|
43
41
|
## 核心文件
|
|
44
42
|
|
|
45
|
-
- `main.go` —
|
|
43
|
+
- `main.go` — 守护进程、Player (channel 驱动)、TTS 流式请求、SSE 解析
|
|
46
44
|
- `avaudioengine_player_darwin.go` — macOS 原生 `AVAudioEngine` PCM 播放器
|
|
47
45
|
- `clean_text.go` — TTS 播报文本清洗
|
|
48
46
|
- `main_test.go` — 任务引擎关键行为测试
|
|
@@ -62,18 +60,13 @@ CLI 与 daemon 通过 socket 传输原始文本,支持音色前缀:
|
|
|
62
60
|
## 任务策略(节省 TTS 费用)
|
|
63
61
|
|
|
64
62
|
新消息到达时:
|
|
65
|
-
1.
|
|
66
|
-
2.
|
|
67
|
-
3.
|
|
68
|
-
|
|
69
|
-
**任务状态流转:**
|
|
70
|
-
```
|
|
71
|
-
pending → running → delete
|
|
72
|
-
```
|
|
63
|
+
1. 丢弃 channel 中排队的旧消息
|
|
64
|
+
2. 不打断当前正在合成/播放的任务
|
|
65
|
+
3. 新消息入队
|
|
73
66
|
|
|
74
67
|
## 失败策略
|
|
75
68
|
|
|
76
|
-
-
|
|
69
|
+
- 流式合成/播放失败:日志记录,继续处理下一条,不重试
|
|
77
70
|
|
|
78
71
|
## 配置
|
|
79
72
|
|
|
@@ -93,11 +86,11 @@ pending → running → delete
|
|
|
93
86
|
|
|
94
87
|
## 稳定性设计
|
|
95
88
|
|
|
96
|
-
- 单
|
|
97
|
-
-
|
|
89
|
+
- 单 Player goroutine,合成与播放同链路,降低首播延迟
|
|
90
|
+
- AVAudioEngine 实例复用,避免重复初始化开销
|
|
91
|
+
- Channel buffer=1 + drain,新消息自动丢弃旧排队消息
|
|
98
92
|
- 配置热更新(mtime 缓存 + 自动重载)
|
|
99
93
|
- TTS HTTP Client 复用,减少连接开销
|
|
100
94
|
- 主链路使用 macOS 原生 `AVAudioEngine` 播放 PCM
|
|
101
|
-
-
|
|
95
|
+
- 合成/播放失败直接跳过,不重试
|
|
102
96
|
- 日志轮转,防止文件过大
|
|
103
|
-
- 进程级 temp 目录,退出时自动清理
|
package/Makefile
CHANGED
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# iSpeak
|
|
2
2
|
|
|
3
|
-

|
|
4
4
|
[](https://opensource.org/licenses/MIT)
|
|
5
5
|
[](https://golang.org/dl/)
|
|
6
6
|

|
|
@@ -20,8 +20,8 @@ ispeak "Pull request 已合并,3 个测试通过"
|
|
|
20
20
|
|
|
21
21
|
| 问题 | 方案 |
|
|
22
22
|
|------|------|
|
|
23
|
-
| AI 生成多条回复,TTS 账单飞涨 |
|
|
24
|
-
| 回复快慢不一,音频播报乱序 | 单
|
|
23
|
+
| AI 生成多条回复,TTS 账单飞涨 | 新消息丢弃旧排队消息,避免无效合成 |
|
|
24
|
+
| 回复快慢不一,音频播报乱序 | 单 channel goroutine,串行顺序稳定 |
|
|
25
25
|
| 修改配置要重启服务 | 热更新:编辑 `config.json` 立即生效 |
|
|
26
26
|
| 默认音色太无聊 | hook 按来源前缀选择音色 |
|
|
27
27
|
|
|
@@ -33,7 +33,7 @@ ispeak "Pull request 已合并,3 个测试通过"
|
|
|
33
33
|
npm i -g @xdfnet/ispeak
|
|
34
34
|
```
|
|
35
35
|
|
|
36
|
-
当前 npm 安装会在本机编译 `ispeakd`,需要已安装 Go。主播放链路使用 macOS 原生 `AVAudioEngine`,不依赖 `ffmpeg
|
|
36
|
+
当前 npm 安装会在本机编译 `ispeakd`,需要已安装 Go。主播放链路使用 macOS 原生 `AVAudioEngine`,不依赖 `ffmpeg`。合成失败记录日志,播放器异常自动重建。
|
|
37
37
|
|
|
38
38
|
**源码安装:**
|
|
39
39
|
|
|
@@ -61,26 +61,17 @@ ispeak "iSpeak 准备好了"
|
|
|
61
61
|
│ 通过 Unix Socket 接收文本 │
|
|
62
62
|
│ │ │
|
|
63
63
|
│ ▼ │
|
|
64
|
-
│
|
|
65
|
-
│
|
|
64
|
+
│ Player (channel) │
|
|
65
|
+
│ buffer=1 + drain(新消息丢弃旧排队消息) │
|
|
66
66
|
│ │ │
|
|
67
67
|
│ ▼ │
|
|
68
|
-
│
|
|
69
|
-
│ (SSE PCM chunk → AVAudioEngine) │
|
|
68
|
+
│ TTS SSE → AVAudioEngine(单实例复用) │
|
|
70
69
|
│ │ │
|
|
71
70
|
│ ▼ │
|
|
72
|
-
│
|
|
73
|
-
│ (失败时记录日志并删除任务) │
|
|
71
|
+
│ 失败记录日志,播放器异常自动重建 │
|
|
74
72
|
└─────────────────────────────────────────────────────┘
|
|
75
73
|
```
|
|
76
74
|
|
|
77
|
-
**任务状态流转:**
|
|
78
|
-
```
|
|
79
|
-
pending → running → delete
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
新消息到达时只清理未开始任务,不打断当前合成/播放;当前事务结束后再播最新消息。
|
|
83
|
-
|
|
84
75
|
## 语音清洗规则
|
|
85
76
|
|
|
86
77
|
清洗只影响 TTS 播报内容,不改变 Claude/Codex 屏幕显示内容。
|
|
@@ -138,7 +129,7 @@ ispeak version # 版本
|
|
|
138
129
|
|
|
139
130
|
Claude Code 和 Codex 的详细 hook 配置见 [docs/hook-text-extraction.md](/Users/admin/iCode/iSpeak/docs/hook-text-extraction.md)。
|
|
140
131
|
|
|
141
|
-
`hook-speak.sh`
|
|
132
|
+
`hook-speak.sh` 会自动跳过 Codex 遗留 notify 的 `agent-turn-complete` 事件,避免同一回合重复播报。
|
|
142
133
|
|
|
143
134
|
## 开发命令
|
|
144
135
|
|
|
@@ -252,12 +252,6 @@ func (p *avAudioEngineStreamPlayer) CloseAndWait() error {
|
|
|
252
252
|
return p.closeLocked()
|
|
253
253
|
}
|
|
254
254
|
|
|
255
|
-
func (p *avAudioEngineStreamPlayer) Abort() error {
|
|
256
|
-
p.mu.Lock()
|
|
257
|
-
defer p.mu.Unlock()
|
|
258
|
-
return p.closeLocked()
|
|
259
|
-
}
|
|
260
|
-
|
|
261
255
|
func (p *avAudioEngineStreamPlayer) writeChunk(data []byte) error {
|
|
262
256
|
if len(data) == 0 {
|
|
263
257
|
return nil
|
package/configs/hook-speak.sh
CHANGED
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
# Claude Code / Codex 共用播报 Hook:
|
|
3
|
-
#
|
|
3
|
+
# 取 last_assistant_message,加 {source:<name>} 前缀后发给 ispeakd。
|
|
4
|
+
# Claude: payload.last_assistant_message (snake_case)
|
|
5
|
+
# Codex: payload["last-assistant-message"] (kebab-case)
|
|
4
6
|
[[ "$ISPEAK_SKIP" == "1" ]] && exit 0
|
|
5
7
|
|
|
6
8
|
SOURCE="${1:-claude}"
|
|
7
9
|
SOCK="$HOME/.config/iSpeak/ispeak.sock"
|
|
8
10
|
LOG="$HOME/.config/iSpeak/hook.log"
|
|
9
|
-
STATE_FILE="$HOME/.config/iSpeak/hook.last"
|
|
10
11
|
|
|
11
|
-
# Codex `notify` 会把 JSON 作为最后一个参数传入;
|
|
12
|
-
# Claude/Claude 风格 Stop Hook 会把 JSON 写到 stdin。
|
|
13
12
|
input="${2:-}"
|
|
14
13
|
if [[ -z "$input" ]]; then
|
|
15
14
|
input=$(cat)
|
|
@@ -18,183 +17,29 @@ input_file=$(mktemp)
|
|
|
18
17
|
trap 'rm -f "$input_file"' EXIT
|
|
19
18
|
printf "%s" "$input" > "$input_file"
|
|
20
19
|
|
|
21
|
-
result=$(
|
|
20
|
+
result=$(HOOK_INPUT_FILE="$input_file" node <<'NODE' 2>>"$LOG"
|
|
22
21
|
const fs = require("fs");
|
|
23
|
-
const crypto = require("crypto");
|
|
24
22
|
|
|
25
23
|
(() => {
|
|
26
24
|
const input = readFile(process.env.HOOK_INPUT_FILE || "");
|
|
27
25
|
const payload = parseJSON(input) || {};
|
|
28
|
-
const source = process.env.SOURCE || "";
|
|
29
|
-
const stateFile = process.env.HOOK_STATE_FILE || "";
|
|
30
|
-
const result = source.startsWith("codex")
|
|
31
|
-
? lastCodexAssistant(payload)
|
|
32
|
-
: lastClaudeAssistant(payload);
|
|
33
26
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
}
|
|
27
|
+
// Codex Stop hook 会在 agent-turn-complete 事件中重复触发,跳过
|
|
28
|
+
if (payload.type === "agent-turn-complete") return;
|
|
37
29
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
}
|
|
42
|
-
saveTurn(stateFile, source, result.turnId, result.text);
|
|
43
|
-
} else if (stateFile) {
|
|
44
|
-
saveTurn(stateFile, source, "", result.text);
|
|
45
|
-
}
|
|
30
|
+
const text = payload.last_assistant_message
|
|
31
|
+
|| payload["last-assistant-message"]
|
|
32
|
+
|| "";
|
|
46
33
|
|
|
47
|
-
process.stdout.write(
|
|
34
|
+
if (text) process.stdout.write(text);
|
|
48
35
|
})();
|
|
49
36
|
|
|
50
|
-
function lastClaudeAssistant(payload) {
|
|
51
|
-
const direct = firstString(payload.last_assistant_message, payload.message);
|
|
52
|
-
if (direct) return { text: direct, turnId: extractTurnId(payload) };
|
|
53
|
-
|
|
54
|
-
const transcript = firstString(payload.transcript_path, payload.transcriptPath);
|
|
55
|
-
return transcript ? lastClaudeTranscript(transcript, payload) : { text: "", turnId: extractTurnId(payload) };
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
function lastCodexAssistant(payload) {
|
|
59
|
-
const direct = firstString(
|
|
60
|
-
payload["last-assistant-message"],
|
|
61
|
-
payload.last_assistant_message,
|
|
62
|
-
payload.lastAssistantMessage,
|
|
63
|
-
payload.message,
|
|
64
|
-
payload.lastMessage
|
|
65
|
-
);
|
|
66
|
-
if (direct) return { text: direct, turnId: extractTurnId(payload) };
|
|
67
|
-
|
|
68
|
-
const transcript = firstString(
|
|
69
|
-
payload.transcript_path,
|
|
70
|
-
payload.transcriptPath,
|
|
71
|
-
payload["transcript-path"]
|
|
72
|
-
);
|
|
73
|
-
return transcript ? lastAssistantFromTranscript(transcript, "codex") : { text: "", turnId: extractTurnId(payload) };
|
|
74
|
-
}
|
|
75
|
-
|
|
76
37
|
function readFile(file) {
|
|
77
|
-
try {
|
|
78
|
-
return fs.readFileSync(file, "utf8");
|
|
79
|
-
} catch {
|
|
80
|
-
return "";
|
|
81
|
-
}
|
|
38
|
+
try { return fs.readFileSync(file, "utf8"); } catch { return ""; }
|
|
82
39
|
}
|
|
83
|
-
|
|
84
40
|
function parseJSON(text) {
|
|
85
|
-
try {
|
|
86
|
-
return JSON.parse(text);
|
|
87
|
-
} catch {
|
|
88
|
-
return null;
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
function firstString(...values) {
|
|
93
|
-
for (const value of values) {
|
|
94
|
-
if (typeof value === "string" && value !== "") return value;
|
|
95
|
-
}
|
|
96
|
-
return "";
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
function collectText(content) {
|
|
100
|
-
if (typeof content === "string") return content;
|
|
101
|
-
if (Array.isArray(content)) {
|
|
102
|
-
return content
|
|
103
|
-
.map(item => collectText(item))
|
|
104
|
-
.filter(Boolean)
|
|
105
|
-
.join(" ");
|
|
106
|
-
}
|
|
107
|
-
if (!content || typeof content !== "object") return "";
|
|
108
|
-
if (typeof content.text === "string") return content.text;
|
|
109
|
-
if (content.content) return collectText(content.content);
|
|
110
|
-
return "";
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
function lastClaudeTranscript(file, payload) {
|
|
114
|
-
const deadline = Date.now() + 5000;
|
|
115
|
-
let result = { text: "", turnId: extractTurnId(payload) };
|
|
116
|
-
|
|
117
|
-
while (Date.now() <= deadline) {
|
|
118
|
-
result = lastAssistantFromTranscript(file, "claude");
|
|
119
|
-
if (result.text) return result;
|
|
120
|
-
sleepMs(120);
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
return result;
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
function lastAssistantFromTranscript(file, source) {
|
|
127
|
-
let data = "";
|
|
128
|
-
try {
|
|
129
|
-
data = fs.readFileSync(file, "utf8");
|
|
130
|
-
} catch {
|
|
131
|
-
return "";
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
let last = "";
|
|
135
|
-
let turnId = "";
|
|
136
|
-
for (const line of data.split(/\r?\n/)) {
|
|
137
|
-
if (!line.trim()) continue;
|
|
138
|
-
const event = parseJSON(line);
|
|
139
|
-
if (!event) continue;
|
|
140
|
-
|
|
141
|
-
if (source === "claude") {
|
|
142
|
-
if (event.role === "assistant") {
|
|
143
|
-
last = collectText(event.content) || last;
|
|
144
|
-
}
|
|
145
|
-
if (event.message && event.message.role === "assistant") {
|
|
146
|
-
last = collectText(event.message.content) || last;
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
if (source === "codex" &&
|
|
151
|
-
event.type === "response_item" &&
|
|
152
|
-
event.payload &&
|
|
153
|
-
event.payload.type === "message" &&
|
|
154
|
-
event.payload.role === "assistant"
|
|
155
|
-
) {
|
|
156
|
-
last = collectText(event.payload.content) || last;
|
|
157
|
-
turnId = turnId || extractTurnId(event) || extractTurnId(event.payload);
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
return { text: last, turnId };
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
function extractTurnId(payload) {
|
|
164
|
-
return firstString(
|
|
165
|
-
payload.turn_id,
|
|
166
|
-
payload.turnId,
|
|
167
|
-
payload["turn-id"]
|
|
168
|
-
);
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
function isDuplicateTurn(stateFile, source, turnId) {
|
|
172
|
-
const current = `${source}:${turnId}`;
|
|
173
|
-
try {
|
|
174
|
-
return fs.readFileSync(stateFile, "utf8").trim() === current;
|
|
175
|
-
} catch {
|
|
176
|
-
return false;
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
function saveTurn(stateFile, source, turnId, text) {
|
|
181
|
-
const current = `${source}:${turnId || textHash(text)}`;
|
|
182
|
-
try {
|
|
183
|
-
fs.mkdirSync(require("path").dirname(stateFile), { recursive: true });
|
|
184
|
-
fs.writeFileSync(stateFile, current, "utf8");
|
|
185
|
-
} catch {
|
|
186
|
-
// 去重失败不影响播报。
|
|
187
|
-
}
|
|
41
|
+
try { return JSON.parse(text); } catch { return null; }
|
|
188
42
|
}
|
|
189
|
-
|
|
190
|
-
function textHash(text) {
|
|
191
|
-
return crypto.createHash("sha1").update(text, "utf8").digest("hex");
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
function sleepMs(ms) {
|
|
195
|
-
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
|
|
196
|
-
}
|
|
197
|
-
|
|
198
43
|
NODE
|
|
199
44
|
)
|
|
200
45
|
|
|
@@ -203,31 +48,6 @@ if [[ "$ISPEAK_HOOK_PRINT_TEXT" == "1" ]]; then
|
|
|
203
48
|
exit 0
|
|
204
49
|
fi
|
|
205
50
|
|
|
206
|
-
echo "=== $(date) ===" >> "$LOG"
|
|
207
|
-
echo "SOURCE: $SOURCE" >> "$LOG"
|
|
208
|
-
echo "TEXT_LEN: ${#result}" >> "$LOG"
|
|
209
|
-
echo "PREVIEW: ${result:0:150}" >> "$LOG"
|
|
210
|
-
|
|
211
|
-
# Claude Code Stop Hook 调试
|
|
212
|
-
if [[ "$SOURCE" == "claude" && -n "$input" ]]; then
|
|
213
|
-
# 用 grep 提取 transcript_path
|
|
214
|
-
tp=$(echo "$input" | grep -o '"transcript_path":"[^"]*"' | head -1 | sed 's/"transcript_path":"//;s/"$//')
|
|
215
|
-
if [[ -n "$tp" ]]; then
|
|
216
|
-
echo "CLAUDE_TRANSCRIPT_PATH: $tp" >> "$LOG"
|
|
217
|
-
if [[ -f "$tp" ]]; then
|
|
218
|
-
echo "CLAUDE_TRANSCRIPT_EXISTS: yes" >> "$LOG"
|
|
219
|
-
else
|
|
220
|
-
echo "CLAUDE_TRANSCRIPT_EXISTS: no" >> "$LOG"
|
|
221
|
-
fi
|
|
222
|
-
else
|
|
223
|
-
echo "CLAUDE_TRANSCRIPT_PATH: none" >> "$LOG"
|
|
224
|
-
echo "CLAUDE_RAW: ${input:0:300}" >> "$LOG"
|
|
225
|
-
fi
|
|
226
|
-
fi
|
|
227
|
-
|
|
228
51
|
if [[ -n "$result" && -S "$SOCK" ]]; then
|
|
229
52
|
printf "{source:%s}%s" "$SOURCE" "$result" | nc -U -w5 "$SOCK" 2>> "$LOG"
|
|
230
|
-
echo "SPOKE: OK" >> "$LOG"
|
|
231
|
-
else
|
|
232
|
-
echo "SPOKE: SKIP" >> "$LOG"
|
|
233
53
|
fi
|
package/docs/architecture.md
CHANGED
|
@@ -2,179 +2,145 @@
|
|
|
2
2
|
|
|
3
3
|
## 概述
|
|
4
4
|
|
|
5
|
-
iSpeak 是一个运行在 macOS 上的本地 TTS 播报守护进程,通过 Unix Socket 接收文本,调用火山引擎 TTS 流式 API
|
|
5
|
+
iSpeak 是一个运行在 macOS 上的本地 TTS 播报守护进程,通过 Unix Socket 接收文本,调用火山引擎 TTS 流式 API,边合成边通过原生 AVAudioEngine 播放 PCM 音频。
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
- transaction worker:领取待执行任务,SSE 每到一段音频就写入播放器 stdin
|
|
9
|
-
- 播放器优先使用 `ffplay -i pipe:0`,没有 `ffplay` 时回退到完整音频 `afplay`
|
|
7
|
+
核心链路:**Socket → Player (channel) → TTS SSE → AVAudioEngine**
|
|
10
8
|
|
|
11
9
|
## 系统架构
|
|
12
10
|
|
|
13
11
|
```
|
|
14
12
|
┌─────────────────────────────────────────────────────────────┐
|
|
15
13
|
│ 客户端 │
|
|
16
|
-
│
|
|
14
|
+
│ nc -U ─────────> ~/.config/iSpeak/ispeak.sock │
|
|
15
|
+
│ ispeak "文本" (Unix Socket) │
|
|
17
16
|
└─────────────────────────────────────────────────────────────┘
|
|
18
17
|
│
|
|
19
18
|
▼
|
|
20
19
|
┌─────────────────────────────────────────────────────────────┐
|
|
21
|
-
│ ispeakd (Go Daemon)
|
|
20
|
+
│ ispeakd (Go Daemon) │
|
|
22
21
|
│ │
|
|
23
|
-
│ Socket Acceptor
|
|
24
|
-
│ -
|
|
25
|
-
│ - 每个连接读取文本并提交任务 │
|
|
22
|
+
│ Socket Acceptor (handleConnection) │
|
|
23
|
+
│ - 读文本 → 解析 {source:xxx} → 选音色 → cleanText → 提交 │
|
|
26
24
|
│ │
|
|
27
|
-
│
|
|
25
|
+
│ Player (channel 驱动) │
|
|
28
26
|
│ ┌───────────────────────────────────────────────────────┐ │
|
|
29
|
-
│ │
|
|
30
|
-
│ │
|
|
31
|
-
│ │
|
|
27
|
+
│ │ chan job (buffer=1) │ │
|
|
28
|
+
│ │ Submit: drain 旧消息 → 入队最新 │ │
|
|
29
|
+
│ │ loop: for j := range ch → play(j, player) │ │
|
|
32
30
|
│ └───────────────────────────────────────────────────────┘ │
|
|
33
31
|
│ │ │
|
|
34
32
|
│ ▼ │
|
|
35
|
-
│
|
|
36
|
-
│
|
|
37
|
-
│
|
|
38
|
-
│
|
|
39
|
-
│ - 播放完成后删除任务;失败直接删除任务 │
|
|
40
|
-
│ │
|
|
33
|
+
│ AVAudioEngine (cgo, 单实例复用) │
|
|
34
|
+
│ - PCM 48kHz 单声道 int16 → float32 │
|
|
35
|
+
│ - 流式 scheduleBuffer + pending 计数 + cond 同步 │
|
|
36
|
+
│ - 关闭时补齐残留字节 │
|
|
41
37
|
└─────────────────────────────────────────────────────────────┘
|
|
42
38
|
```
|
|
43
39
|
|
|
44
40
|
## 核心数据结构
|
|
45
41
|
|
|
46
|
-
###
|
|
42
|
+
### job
|
|
47
43
|
|
|
48
44
|
```go
|
|
49
|
-
type
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
Cfg Config // 任务配置快照(提交时)
|
|
45
|
+
type job struct {
|
|
46
|
+
text string // cleanText 清洗后的文本
|
|
47
|
+
voice VoiceInfo // 音色快照
|
|
48
|
+
source string // 来源: "claude" / "codex" / "default"
|
|
49
|
+
cfg Config // 配置快照
|
|
55
50
|
}
|
|
56
51
|
```
|
|
57
52
|
|
|
58
|
-
###
|
|
59
|
-
|
|
60
|
-
```go
|
|
61
|
-
const (
|
|
62
|
-
TaskStatusPending TaskStatus = iota // 待执行
|
|
63
|
-
TaskStatusRunning // 合成播放事务执行中
|
|
64
|
-
)
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
说明:
|
|
68
|
-
- 终态不持久化。任务成功/失败后都会从仓库删除。
|
|
69
|
-
- 不保留 `failed/canceled/completed` 常驻状态,历史通过日志追踪。
|
|
70
|
-
|
|
71
|
-
### TaskEngine
|
|
53
|
+
### Player
|
|
72
54
|
|
|
73
55
|
```go
|
|
74
|
-
type
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
nextID uint64
|
|
78
|
-
tasks map[uint64]*Task
|
|
79
|
-
latestID uint64
|
|
80
|
-
pending []uint64
|
|
81
|
-
wake chan struct{}
|
|
82
|
-
|
|
83
|
-
synthesizeStreamFn func(ctx context.Context, cfg Config, text string, voice *VoiceInfo, onAudio func([]byte) error) error
|
|
84
|
-
newStreamPlayerFn func() (StreamPlayer, error)
|
|
56
|
+
type Player struct {
|
|
57
|
+
ch chan job // buffer=1,串行播报
|
|
85
58
|
}
|
|
86
59
|
```
|
|
87
60
|
|
|
88
|
-
|
|
61
|
+
单 goroutine 消费 channel,一个 AVAudioEngine 实例复用。新消息到达时 drain 旧消息,不打断正在播放的。
|
|
62
|
+
|
|
63
|
+
### StreamPlayer
|
|
89
64
|
|
|
90
65
|
```go
|
|
91
66
|
type StreamPlayer interface {
|
|
92
67
|
Write(audio []byte) error
|
|
93
68
|
CloseAndWait() error
|
|
94
|
-
Abort() error
|
|
95
69
|
}
|
|
96
70
|
```
|
|
97
71
|
|
|
98
|
-
##
|
|
99
|
-
|
|
100
|
-
### 状态流转
|
|
101
|
-
|
|
102
|
-
```
|
|
103
|
-
pending -> running -> delete
|
|
104
|
-
```
|
|
72
|
+
## 消息流程
|
|
105
73
|
|
|
106
|
-
###
|
|
74
|
+
### 1. Socket 接收
|
|
107
75
|
|
|
108
|
-
`
|
|
109
|
-
1.
|
|
110
|
-
2.
|
|
111
|
-
3.
|
|
112
|
-
4.
|
|
76
|
+
`handleConnection()`:
|
|
77
|
+
1. `bufio.Scanner` 读取完整文本(最大 1MB)
|
|
78
|
+
2. `extractVoicePrefix` 解析 `{source:claude}` 前缀,匹配 SourceVoices
|
|
79
|
+
3. 未匹配到 → fallback 到 DefaultVoice
|
|
80
|
+
4. `cleanText()` 过滤文本噪音(markdown/code/URL/path/UUID 等)
|
|
81
|
+
5. `player.Submit(文本, 音色, 来源, 配置)`
|
|
113
82
|
|
|
114
|
-
|
|
115
|
-
- 未开始的旧任务直接删除
|
|
116
|
-
- 已领取但过期的旧任务在事务执行前跳过
|
|
117
|
-
- 正在合成/播放的任务自然结束
|
|
83
|
+
### 2. 调度与去重
|
|
118
84
|
|
|
119
|
-
|
|
85
|
+
`Submit()`:
|
|
86
|
+
- 非阻塞 drain channel 中旧消息:`select { case <-ch: default: }`
|
|
87
|
+
- 新消息入队
|
|
120
88
|
|
|
121
|
-
|
|
122
|
-
2. 启动 `StreamPlayer`
|
|
123
|
-
3. 调用 TTS 流式接口,SSE 每解析出一个音频 chunk 就写入播放器
|
|
124
|
-
4. TTS 结束后关闭播放器 stdin 并等待播放结束
|
|
125
|
-
5. 成功:删除任务
|
|
126
|
-
6. 失败:删除任务,不重试
|
|
89
|
+
策略:**新消息丢弃旧排队消息,不打断正在播放的**
|
|
127
90
|
|
|
128
|
-
|
|
91
|
+
### 3. 流式合成与播放
|
|
129
92
|
|
|
130
|
-
|
|
93
|
+
`play()`:
|
|
94
|
+
1. HTTP POST 火山引擎 `/api/v3/tts/unidirectional/sse`
|
|
95
|
+
2. SSE 流式解析 → base64 解码 → PCM int16 数据
|
|
96
|
+
3. 每块 PCM 立即写入 AVAudioEngine 播放
|
|
97
|
+
4. **合成失败**:只记日志,播放器正常继续
|
|
98
|
+
5. **播放器写入失败**:返回 error,loop 层重建 AVAudioEngine
|
|
131
99
|
|
|
132
|
-
|
|
133
|
-
- 读取 socket 文本
|
|
134
|
-
- 解析 `{source:xxx}` 音色前缀
|
|
135
|
-
- `cleanText()` 生成语音友好的文本
|
|
136
|
-
- 将“过滤后文本”提交给 `TaskEngine.Submit`
|
|
100
|
+
## SSE 解析
|
|
137
101
|
|
|
138
|
-
`
|
|
102
|
+
`parseSSEStream()`:
|
|
103
|
+
- 逐行读取,累积 `data:` 行
|
|
104
|
+
- 空行触发 flush → `processEvent()` 解析 JSON
|
|
105
|
+
- 兼容非标准直出(无 `data:` 前缀的裸 JSON)
|
|
106
|
+
- `extractAudioBase64` 递归提取:顶层 `data/audio/audio_data` → 嵌套 `data/result/payload`
|
|
107
|
+
- 错误码检查:`code` 不为 0 且不为 20000000 时返回 error
|
|
108
|
+
- 整条流无音频块 → 返回 `"no audio data"`
|
|
139
109
|
|
|
140
|
-
|
|
141
|
-
- Markdown 表格整块:表头、分隔线、表格内容
|
|
142
|
-
- 代码块、artifact、HTML 页面源码
|
|
143
|
-
- Markdown 链接 URL,仅保留链接标题
|
|
144
|
-
- 绝对路径简化为“路径”
|
|
145
|
-
- 长 commit hash、UUID、长 ID
|
|
146
|
-
- 明显文件列表、模型分片列表、下载清单
|
|
147
|
-
- 下载进度、速度、进度条、ANSI 控制符等终端噪声
|
|
110
|
+
## 配置热加载
|
|
148
111
|
|
|
149
|
-
|
|
112
|
+
`loadConfig()`:
|
|
113
|
+
- mtime 缓存:路径相同 + 修改时间未变 → 直接返回缓存
|
|
114
|
+
- 校验失败 → 用上一次有效配置兜底
|
|
115
|
+
- 文件不存在 → fallback 环境变量 `IAGENT_TTS_API_KEY` / `IAGENT_TTS_ENDPOINT`
|
|
150
116
|
|
|
151
|
-
|
|
117
|
+
## 稳定性设计
|
|
152
118
|
|
|
153
|
-
-
|
|
154
|
-
-
|
|
155
|
-
-
|
|
156
|
-
-
|
|
157
|
-
-
|
|
158
|
-
-
|
|
119
|
+
- **panic recover**: loop goroutine 崩溃后 `go p.loop()` 自动重启
|
|
120
|
+
- **播放器重建**: 写入失败时关闭旧实例并创建新的 AVAudioEngine
|
|
121
|
+
- **新消息优先**: channel buffer=1 + drain,旧排队消息自动丢弃
|
|
122
|
+
- **配置热加载**: 每次连接重新读取,mtime 缓存避免频繁 I/O
|
|
123
|
+
- **HTTP 复用**: 全局 `ttsHTTPClient`,30s 超时,连接池复用
|
|
124
|
+
- **日志轮转**: lumberjack,10MB/份,保留 3 份,压缩归档
|
|
125
|
+
- **优雅退出**: SIGINT/SIGTERM 触发 listener.Close()
|
|
159
126
|
|
|
160
|
-
##
|
|
127
|
+
## 清洗规则
|
|
161
128
|
|
|
162
|
-
|
|
163
|
-
- 单 transaction worker,保证播报顺序稳定
|
|
164
|
-
- `wake` 为缓冲 1 的唤醒信号,防止重复唤醒堆积
|
|
165
|
-
- FIFO 保证未开始任务公平顺序
|
|
129
|
+
`cleanText()` 过滤顺序(先跨行块再行内符号):
|
|
166
130
|
|
|
167
|
-
|
|
131
|
+
1. 跳过代码块 (` ```...``` `)
|
|
132
|
+
2. 跳过 artifact (`<artifact>...</artifact>`)
|
|
133
|
+
3. 跳过 Markdown 表格(分隔线 + 表头 + 内容行)
|
|
134
|
+
4. 跳过 HTML 源码行、进度噪声行
|
|
135
|
+
5. 行内清洗:ANSI 转义 → 链接 URL → 绝对路径 → UUID → commit hash → markdown 符号 → HTML 标签
|
|
168
136
|
|
|
169
|
-
|
|
170
|
-
- 流式合成/播放失败:直接删除任务,不重试,避免重复播报
|
|
171
|
-
- 只保留最新消息优先播报,降低 TTS 成本
|
|
137
|
+
保留适合听的内容:结论、状态、下一步动作、关键错误原因。
|
|
172
138
|
|
|
173
139
|
## 文件布局
|
|
174
140
|
|
|
175
141
|
```
|
|
176
142
|
~/.config/iSpeak/
|
|
177
|
-
├── config.json # API Key
|
|
143
|
+
├── config.json # API Key、音色映射
|
|
178
144
|
├── ispeak.sock # Unix Socket
|
|
179
145
|
├── ispeak.log # 日志(lumberjack 轮转)
|
|
180
146
|
└── hook-speak.sh # Claude/Codex Hook
|
|
@@ -183,10 +149,18 @@ pending -> running -> delete
|
|
|
183
149
|
└── com.ispeak.plist # launchd 服务配置
|
|
184
150
|
```
|
|
185
151
|
|
|
186
|
-
##
|
|
152
|
+
## 来源 & 音色映射
|
|
153
|
+
|
|
154
|
+
Hook 传入 `{source:claude}` 前缀,ispeakd 解析后匹配 `config.json` 中的 `sourceVoices`:
|
|
155
|
+
|
|
156
|
+
```json
|
|
157
|
+
{
|
|
158
|
+
"defaultVoice": { "voice_type": "zh_female_mizai_uranus_bigtts", "resourceId": "seed-tts-2.0" },
|
|
159
|
+
"sourceVoices": {
|
|
160
|
+
"claude": { "voice_type": "zh_female_tianmeitaozi_uranus_bigtts", "resourceId": "seed-tts-2.0" },
|
|
161
|
+
"codex": { "voice_type": "zh_female_shuangkuaisisi_uranus_bigtts", "resourceId": "seed-tts-2.0" }
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
```
|
|
187
165
|
|
|
188
|
-
|
|
189
|
-
- 配置热更新(每次连接重新加载配置)
|
|
190
|
-
- 播放器子进程命令协议,保证“播完再删任务”
|
|
191
|
-
- 日志轮转(10MB/份,保留 3 份)
|
|
192
|
-
- 进程级 temp 目录,退出时自动清理
|
|
166
|
+
日志区分来源:`TTS [claude]: 文本` / `TTS [codex]: 文本` / `TTS [default]: 文本`
|
|
@@ -1,274 +1,126 @@
|
|
|
1
1
|
# Hook 文本提取链路
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
`hook-speak.sh` 只做一件事:从 Hook JSON 里取 assistant 回复文本,发给 iSpeak socket。当前 51 行。
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## 提取逻辑
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
2. **Codex Stop Hook**:从 stdin 读取 JSON,取 `last_assistant_message`(snake_case)。
|
|
11
|
-
3. **Claude Code Stop Hook**:从 stdin 读取 JSON,只读 `transcript_path`(官方无 direct 字段)。
|
|
12
|
-
|
|
13
|
-
不扫描 `~/.codex/sessions`。没有 direct 字段也没有 `transcript_path` 时,本次不播报。
|
|
14
|
-
|
|
15
|
-
## Codex CLI:notify
|
|
16
|
-
|
|
17
|
-
当前本机版本:
|
|
7
|
+
```js
|
|
8
|
+
// Codex 遗留 notify(agent-turn-complete)与现代 Stop Hook 重复触发,跳过
|
|
9
|
+
if (payload.type === "agent-turn-complete") return;
|
|
18
10
|
|
|
19
|
-
|
|
20
|
-
|
|
11
|
+
const text = payload.last_assistant_message // Claude Stop / Codex Stop (snake_case)
|
|
12
|
+
|| payload["last-assistant-message"] // Codex notify (kebab-case)
|
|
13
|
+
|| "";
|
|
21
14
|
```
|
|
22
15
|
|
|
23
|
-
|
|
16
|
+
不再需要 transcript 轮询、去重、状态文件、`payload.message` fallback。
|
|
24
17
|
|
|
25
|
-
|
|
18
|
+
## 输入来源
|
|
26
19
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
20
|
+
| 来源 | 传参方式 | 字段名 | 处理 |
|
|
21
|
+
|------|---------|--------|------|
|
|
22
|
+
| Claude Code Stop Hook | stdin | `last_assistant_message` | 提取并播报 |
|
|
23
|
+
| Codex Stop Hook | stdin | `last_assistant_message` | 提取并播报 |
|
|
24
|
+
| Codex 遗留 notify | `$2` (argv) | `last-assistant-message` | 跳过(`agent-turn-complete`) |
|
|
30
25
|
|
|
31
|
-
|
|
26
|
+
脚本统一处理 stdin 和 argv:
|
|
32
27
|
|
|
33
28
|
```bash
|
|
34
|
-
$
|
|
35
|
-
|
|
36
|
-
|
|
29
|
+
input="${2:-}" # 遗留 notify 走 $2
|
|
30
|
+
if [[ -z "$input" ]]; then
|
|
31
|
+
input=$(cat) # Stop Hook 走 stdin
|
|
32
|
+
fi
|
|
37
33
|
```
|
|
38
34
|
|
|
39
|
-
|
|
35
|
+
## Codex Stop Hook(现代)
|
|
36
|
+
|
|
37
|
+
stdin JSON:
|
|
40
38
|
|
|
41
39
|
```json
|
|
42
40
|
{
|
|
43
|
-
"
|
|
44
|
-
"
|
|
45
|
-
"
|
|
46
|
-
"cwd": "...",
|
|
47
|
-
"input-messages": ["..."],
|
|
48
|
-
"last-assistant-message": "最后一条 assistant 回复"
|
|
41
|
+
"turn_id": "...",
|
|
42
|
+
"transcript_path": "...",
|
|
43
|
+
"last_assistant_message": "最后一条 assistant 回复"
|
|
49
44
|
}
|
|
50
45
|
```
|
|
51
46
|
|
|
52
|
-
|
|
47
|
+
源码:`codex-rs/hooks/src/events/stop.rs` — `StopCommandInput` struct 包含 `last_assistant_message`。
|
|
53
48
|
|
|
54
|
-
|
|
55
|
-
input="${2:-}"
|
|
56
|
-
```
|
|
49
|
+
## Codex 遗留 notify(跳过)
|
|
57
50
|
|
|
58
|
-
|
|
51
|
+
Codex 有两套通知机制同时触发:
|
|
59
52
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
53
|
+
| 机制 | 事件 | 触发时机 |
|
|
54
|
+
|------|------|---------|
|
|
55
|
+
| 现代 Stop Hook | `stop` | agent 回合结束 |
|
|
56
|
+
| 遗留 notify | `agent-turn-complete` | agent 回合结束 |
|
|
63
57
|
|
|
64
|
-
|
|
58
|
+
两套系统都包含 `last_assistant_message`,导致重复播报。现代 Stop Hook 已覆盖需求,遗留 notify 通过 `payload.type === "agent-turn-complete"` 跳过。
|
|
65
59
|
|
|
66
|
-
|
|
60
|
+
源码:`codex-rs/hooks/src/legacy_notify.rs` — 向后兼容,JSON 通过 `command.arg()` 传入,字段序列化为 kebab-case。
|
|
67
61
|
|
|
68
|
-
|
|
62
|
+
## 触发时间点
|
|
69
63
|
|
|
70
|
-
|
|
64
|
+
Hook 在 AI **回复完成**时触发,每个回合一次。Claude Code 和 Codex 均使用 `Stop` 事件:
|
|
71
65
|
|
|
72
|
-
```json
|
|
73
|
-
{
|
|
74
|
-
"hooks": {
|
|
75
|
-
"Stop": [
|
|
76
|
-
{
|
|
77
|
-
"hooks": [
|
|
78
|
-
{
|
|
79
|
-
"type": "command",
|
|
80
|
-
"command": "bash $HOME/.config/iSpeak/hook-speak.sh codex",
|
|
81
|
-
"timeout": 30
|
|
82
|
-
}
|
|
83
|
-
]
|
|
84
|
-
}
|
|
85
|
-
]
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
66
|
```
|
|
89
|
-
|
|
90
|
-
脚本实际收到:
|
|
91
|
-
|
|
92
|
-
```bash
|
|
93
|
-
$1 = "codex"
|
|
94
|
-
$2 = empty
|
|
95
|
-
stdin = '{"hook_event_name":"Stop",...,"last_assistant_message":"..."}'
|
|
67
|
+
用户发送消息 → AI 生成回复 → 回复结束 → Hook 触发 → 提取文本 → 发送 socket → TTS 播报
|
|
96
68
|
```
|
|
97
69
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
```rust
|
|
101
|
-
struct StopCommandInput {
|
|
102
|
-
session_id: String,
|
|
103
|
-
turn_id: String,
|
|
104
|
-
transcript_path: NullableString,
|
|
105
|
-
cwd: String,
|
|
106
|
-
hook_event_name: String,
|
|
107
|
-
model: String,
|
|
108
|
-
permission_mode: String,
|
|
109
|
-
stop_hook_active: bool,
|
|
110
|
-
last_assistant_message: NullableString, // ← Codex 有此字段
|
|
111
|
-
}
|
|
112
|
-
```
|
|
70
|
+
从 Hook 触发到 TTS 首字延迟通常 < 500ms(取决于文本长度和网络)。
|
|
113
71
|
|
|
114
|
-
|
|
72
|
+
## 来源 & 音色
|
|
115
73
|
|
|
116
|
-
|
|
117
|
-
{
|
|
118
|
-
"session_id": "...",
|
|
119
|
-
"turn_id": "...",
|
|
120
|
-
"transcript_path": "...",
|
|
121
|
-
"cwd": "...",
|
|
122
|
-
"hook_event_name": "Stop",
|
|
123
|
-
"model": "...",
|
|
124
|
-
"permission_mode": "bypassPermissions",
|
|
125
|
-
"stop_hook_active": false,
|
|
126
|
-
"last_assistant_message": "最后一条 assistant 回复"
|
|
127
|
-
}
|
|
128
|
-
```
|
|
74
|
+
Hook 调用时传入来源名称(`$1`),对应 `config.json` 中的音色映射:
|
|
129
75
|
|
|
130
|
-
|
|
76
|
+
```bash
|
|
77
|
+
# ~/.claude/settings.json — Claude Code
|
|
78
|
+
"command": "bash ~/.config/iSpeak/hook-speak.sh claude"
|
|
131
79
|
|
|
132
|
-
|
|
133
|
-
|
|
80
|
+
# ~/.codex/hooks.json — Codex
|
|
81
|
+
"command": "bash /Users/admin/.config/iSpeak/hook-speak.sh codex"
|
|
82
|
+
```
|
|
134
83
|
|
|
135
|
-
|
|
84
|
+
文本加上 `{source:claude}` 或 `{source:codex}` 前缀发往 socket,`ispeakd` 解析后选择对应音色。无前缀则用 `defaultVoice`。
|
|
136
85
|
|
|
137
|
-
|
|
86
|
+
音色映射示例(`~/.config/iSpeak/config.json`):
|
|
138
87
|
|
|
139
88
|
```json
|
|
140
89
|
{
|
|
141
|
-
"
|
|
142
|
-
"
|
|
143
|
-
"
|
|
144
|
-
"
|
|
145
|
-
"content": [
|
|
146
|
-
{
|
|
147
|
-
"type": "output_text",
|
|
148
|
-
"text": "最后一条 assistant 回复"
|
|
149
|
-
}
|
|
150
|
-
]
|
|
90
|
+
"defaultVoice": { "voice_type": "zh_female_mizai_uranus_bigtts" },
|
|
91
|
+
"sourceVoices": {
|
|
92
|
+
"claude": { "voice_type": "zh_female_tianmeitaozi_uranus_bigtts" },
|
|
93
|
+
"codex": { "voice_type": "zh_female_shuangkuaisisi_uranus_bigtts" }
|
|
151
94
|
}
|
|
152
95
|
}
|
|
153
96
|
```
|
|
154
97
|
|
|
155
|
-
|
|
98
|
+
日志中也会区分来源:
|
|
156
99
|
|
|
157
|
-
```js
|
|
158
|
-
event.type === "response_item" &&
|
|
159
|
-
event.payload?.type === "message" &&
|
|
160
|
-
event.payload?.role === "assistant"
|
|
161
100
|
```
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
```js
|
|
166
|
-
event.payload.content[].text
|
|
101
|
+
TTS [claude]: 飞哥好。 → tianmeitaozi 音色
|
|
102
|
+
TTS [codex]: 飞哥,你好。 → shuangkuaisisi 音色
|
|
103
|
+
TTS [default]: 直接文本 → mizai 音色
|
|
167
104
|
```
|
|
168
105
|
|
|
169
|
-
## Claude Code
|
|
170
|
-
|
|
171
|
-
> **来源**:[Claude Code Hooks Reference](https://code.claude.com/docs/en/hooks.md),更新时间:2026-05-11
|
|
106
|
+
## Claude Code Stop Hook
|
|
172
107
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
根据官方文档,Stop Hook 的 Common Input Fields 为:
|
|
108
|
+
stdin JSON(实测,2026-05):
|
|
176
109
|
|
|
177
110
|
```json
|
|
178
111
|
{
|
|
179
|
-
"session_id": "
|
|
180
|
-
"transcript_path": "/
|
|
181
|
-
"cwd": "
|
|
182
|
-
"permission_mode": "default",
|
|
112
|
+
"session_id": "...",
|
|
113
|
+
"transcript_path": "/Users/admin/.claude/projects/.../xxx.jsonl",
|
|
114
|
+
"cwd": "...",
|
|
183
115
|
"hook_event_name": "Stop",
|
|
184
|
-
"
|
|
185
|
-
"level": "medium"
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
```
|
|
189
|
-
|
|
190
|
-
子 agent 上下文中额外字段:
|
|
191
|
-
|
|
192
|
-
```json
|
|
193
|
-
{
|
|
194
|
-
"agent_id": "subagent_xyz",
|
|
195
|
-
"agent_type": "Explore"
|
|
116
|
+
"last_assistant_message": "最后一条 assistant 回复"
|
|
196
117
|
}
|
|
197
118
|
```
|
|
198
119
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
Claude transcript 常见 assistant 形态:
|
|
202
|
-
|
|
203
|
-
```json
|
|
204
|
-
{"role":"assistant","content":[{"type":"text","text":"..."}]}
|
|
205
|
-
```
|
|
206
|
-
|
|
207
|
-
或:
|
|
208
|
-
|
|
209
|
-
```json
|
|
210
|
-
{"message":{"role":"assistant","content":[{"type":"text","text":"..."}]}}
|
|
211
|
-
```
|
|
212
|
-
|
|
213
|
-
## 当前脚本策略
|
|
214
|
-
|
|
215
|
-
`configs/hook-speak.sh` 当前入口:
|
|
216
|
-
|
|
217
|
-
```bash
|
|
218
|
-
input="${2:-}"
|
|
219
|
-
if [[ -z "$input" ]]; then
|
|
220
|
-
input=$(cat)
|
|
221
|
-
fi
|
|
222
|
-
```
|
|
223
|
-
|
|
224
|
-
含义:
|
|
225
|
-
|
|
226
|
-
- Codex `notify`:读 `$2`
|
|
227
|
-
- Claude / Codex Stop Hook:读 stdin
|
|
228
|
-
- 如果 Codex 的 `notify` 和 `Stop` 同时启用,脚本会按 `turn_id` 去重,避免同一回合播两次
|
|
229
|
-
|
|
230
|
-
Codex 文本字段优先级(源码确认):
|
|
231
|
-
|
|
232
|
-
```js
|
|
233
|
-
payload["last-assistant-message"] // notify: kebab-case
|
|
234
|
-
payload.last_assistant_message // Stop Hook: snake_case
|
|
235
|
-
payload.lastAssistantMessage
|
|
236
|
-
payload.message
|
|
237
|
-
payload.lastMessage
|
|
238
|
-
payload.transcript_path
|
|
239
|
-
payload.transcriptPath
|
|
240
|
-
payload["transcript-path"]
|
|
241
|
-
```
|
|
242
|
-
|
|
243
|
-
Claude Code 文本字段优先级(官方文档):
|
|
244
|
-
|
|
245
|
-
```js
|
|
246
|
-
payload.transcript_path // 官方支持的唯一方式
|
|
247
|
-
```
|
|
248
|
-
|
|
249
|
-
> **注**:Claude Code Stop Hook 官方 payload 中**没有 `last_assistant_message` 字段**,这是与 Codex 的本质区别。
|
|
250
|
-
|
|
251
|
-
## 为什么不能只读 stdin
|
|
252
|
-
|
|
253
|
-
因为 Codex `notify` 不走 stdin。只读 stdin 会导致:
|
|
254
|
-
|
|
255
|
-
```text
|
|
256
|
-
TEXT_LEN: 0
|
|
257
|
-
SPOKE: SKIP
|
|
258
|
-
```
|
|
259
|
-
|
|
260
|
-
正确做法是先读 `$2`,再读 stdin;不扫历史 session。
|
|
261
|
-
|
|
262
|
-
## Claude Code TEXT_LEN: 0 的根因
|
|
263
|
-
|
|
264
|
-
当 Claude Code Stop Hook 触发但 `TEXT_LEN: 0` 时:
|
|
265
|
-
|
|
266
|
-
1. **官方字段不存在**:Claude Code Stop Hook 官方 payload 中**没有 `last_assistant_message` 字段**,只有 `transcript_path`
|
|
267
|
-
2. **transcript 文件可能晚一点才写完**:Hook 触发时文件虽已存在,但最后一条 assistant 文本还没落盘
|
|
268
|
-
3. **结果**:如果只读一次,`hook-speak.sh` 可能拿到空串,本次不播报
|
|
269
|
-
|
|
270
|
-
当前脚本对 Claude transcript 做了很短的轮询,等最后一条 assistant 文本真正出现再播,避免这个时序窗。
|
|
120
|
+
Claude Code 官方文档只列出 `transcript_path`,但实际 payload **包含 `last_assistant_message`**(实测确认)。直接用 direct 字段,无需读 transcript。
|
|
271
121
|
|
|
272
|
-
|
|
122
|
+
## 历史演进
|
|
273
123
|
|
|
274
|
-
|
|
124
|
+
- v1(250 行):transcript 轮询 + turn_id 去重 + state file + text hash。复杂度高,`session_id` 做去重 key 导致同一 session 只播第一条。
|
|
125
|
+
- v2(53 行):省略去重和 transcript 轮询,但 `payload.message` 回退太宽泛,且 Codex 重复触发未处理。
|
|
126
|
+
- v3(51 行):统一提取,移除 `payload.message`,过滤 `agent-turn-complete` 解决 Codex 双重通知导致的重复播报。
|
package/main.go
CHANGED
|
@@ -40,7 +40,6 @@ var errAlreadyRunning = errors.New("iSpeak already running")
|
|
|
40
40
|
type StreamPlayer interface {
|
|
41
41
|
Write(audio []byte) error
|
|
42
42
|
CloseAndWait() error
|
|
43
|
-
Abort() error
|
|
44
43
|
}
|
|
45
44
|
|
|
46
45
|
// 最简单的播放器:channel 队列,串行播报
|
|
@@ -49,45 +48,75 @@ type Player struct {
|
|
|
49
48
|
}
|
|
50
49
|
|
|
51
50
|
type job struct {
|
|
52
|
-
text
|
|
53
|
-
voice
|
|
54
|
-
|
|
51
|
+
text string
|
|
52
|
+
voice VoiceInfo
|
|
53
|
+
source string
|
|
54
|
+
cfg Config
|
|
55
55
|
}
|
|
56
56
|
|
|
57
57
|
func NewPlayer() *Player {
|
|
58
|
-
p := &Player{ch: make(chan job,
|
|
58
|
+
p := &Player{ch: make(chan job, 1)}
|
|
59
59
|
go p.loop()
|
|
60
60
|
return p
|
|
61
61
|
}
|
|
62
62
|
|
|
63
|
-
func (p *Player) Submit(text string, voice VoiceInfo, cfg Config) {
|
|
64
|
-
log.Printf("TTS: %s", text)
|
|
65
|
-
|
|
63
|
+
func (p *Player) Submit(text string, voice VoiceInfo, source string, cfg Config) {
|
|
64
|
+
log.Printf("TTS [%s]: %s", source, text)
|
|
65
|
+
// 丢弃队列中的旧消息,只保留最新
|
|
66
|
+
select {
|
|
67
|
+
case <-p.ch:
|
|
68
|
+
default:
|
|
69
|
+
}
|
|
70
|
+
p.ch <- job{text, voice, source, cfg}
|
|
66
71
|
}
|
|
67
72
|
|
|
68
73
|
func (p *Player) loop() {
|
|
74
|
+
defer func() {
|
|
75
|
+
if r := recover(); r != nil {
|
|
76
|
+
log.Printf("Player loop 崩溃: %v,重启中", r)
|
|
77
|
+
go p.loop()
|
|
78
|
+
}
|
|
79
|
+
}()
|
|
80
|
+
|
|
81
|
+
player, err := newDefaultStreamPlayer()
|
|
82
|
+
if err != nil {
|
|
83
|
+
log.Printf("启动播放器失败: %v", err)
|
|
84
|
+
return
|
|
85
|
+
}
|
|
86
|
+
defer player.CloseAndWait()
|
|
87
|
+
|
|
69
88
|
for j := range p.ch {
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
89
|
+
if err := p.play(j, player); err != nil {
|
|
90
|
+
log.Printf("播放器异常,重建: %v", err)
|
|
91
|
+
player.CloseAndWait()
|
|
92
|
+
player, err = newDefaultStreamPlayer()
|
|
93
|
+
if err != nil {
|
|
94
|
+
log.Printf("重建播放器失败: %v", err)
|
|
95
|
+
return
|
|
96
|
+
}
|
|
74
97
|
}
|
|
75
|
-
p.play(j, player)
|
|
76
|
-
_ = player.CloseAndWait()
|
|
77
98
|
}
|
|
78
99
|
}
|
|
79
100
|
|
|
80
|
-
func (p *Player) play(j job, player StreamPlayer) {
|
|
101
|
+
func (p *Player) play(j job, player StreamPlayer) error {
|
|
81
102
|
startedAt := time.Now()
|
|
103
|
+
var writeErr error
|
|
82
104
|
onAudio := func(audio []byte) error {
|
|
83
|
-
|
|
105
|
+
if err := player.Write(audio); err != nil {
|
|
106
|
+
writeErr = err
|
|
107
|
+
return err
|
|
108
|
+
}
|
|
109
|
+
return nil
|
|
84
110
|
}
|
|
85
111
|
|
|
86
112
|
if err := synthesizeStream(context.Background(), j.cfg, j.text, &j.voice, onAudio); err != nil {
|
|
113
|
+
if writeErr != nil {
|
|
114
|
+
return writeErr
|
|
115
|
+
}
|
|
87
116
|
log.Printf("TTS 合成失败: %v", err)
|
|
88
|
-
return
|
|
89
117
|
}
|
|
90
118
|
log.Printf("TTS: 完成 elapsed=%s", time.Since(startedAt).Round(time.Millisecond))
|
|
119
|
+
return nil
|
|
91
120
|
}
|
|
92
121
|
|
|
93
122
|
// 音色信息
|
|
@@ -231,7 +260,7 @@ func synthesizeStream(ctx context.Context, cfg Config, text string, voice *Voice
|
|
|
231
260
|
return fmt.Errorf("http request: %w", err)
|
|
232
261
|
}
|
|
233
262
|
if resp.StatusCode != 200 {
|
|
234
|
-
io.Copy(io.Discard, resp.Body)
|
|
263
|
+
io.Copy(io.Discard, resp.Body)
|
|
235
264
|
resp.Body.Close()
|
|
236
265
|
return fmt.Errorf("http status %d", resp.StatusCode)
|
|
237
266
|
}
|
|
@@ -240,28 +269,6 @@ func synthesizeStream(ctx context.Context, cfg Config, text string, voice *Voice
|
|
|
240
269
|
return parseSSEStream(resp.Body, onAudio)
|
|
241
270
|
}
|
|
242
271
|
|
|
243
|
-
// 解析 SSE 流,提取 base64 音频数据
|
|
244
|
-
func parseSSE(r io.Reader) ([]byte, error) {
|
|
245
|
-
var chunks [][]byte
|
|
246
|
-
if err := parseSSEStream(r, func(audio []byte) error {
|
|
247
|
-
chunk := append([]byte(nil), audio...)
|
|
248
|
-
chunks = append(chunks, chunk)
|
|
249
|
-
return nil
|
|
250
|
-
}); err != nil {
|
|
251
|
-
return nil, err
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
total := 0
|
|
255
|
-
for _, c := range chunks {
|
|
256
|
-
total += len(c)
|
|
257
|
-
}
|
|
258
|
-
result := make([]byte, 0, total)
|
|
259
|
-
for _, c := range chunks {
|
|
260
|
-
result = append(result, c...)
|
|
261
|
-
}
|
|
262
|
-
return result, nil
|
|
263
|
-
}
|
|
264
|
-
|
|
265
272
|
func parseSSEStream(r io.Reader, onAudio func([]byte) error) error {
|
|
266
273
|
audioChunks := 0
|
|
267
274
|
reader := bufio.NewReaderSize(r, 64*1024)
|
|
@@ -552,7 +559,7 @@ func handleConnection(conn net.Conn, player *Player) {
|
|
|
552
559
|
return
|
|
553
560
|
}
|
|
554
561
|
|
|
555
|
-
voice, content := extractVoicePrefix(text, cfg)
|
|
562
|
+
source, voice, content := extractVoicePrefix(text, cfg)
|
|
556
563
|
if voice == nil {
|
|
557
564
|
voice = cfg.DefaultVoice
|
|
558
565
|
}
|
|
@@ -566,22 +573,24 @@ func handleConnection(conn net.Conn, player *Player) {
|
|
|
566
573
|
return
|
|
567
574
|
}
|
|
568
575
|
|
|
569
|
-
player.Submit(cleaned, *voice, cfg)
|
|
576
|
+
player.Submit(cleaned, *voice, source, cfg)
|
|
570
577
|
}
|
|
571
578
|
|
|
572
|
-
// 解析消息中的音色前缀,返回
|
|
573
|
-
func extractVoicePrefix(text string, cfg Config) (voice *VoiceInfo, content string) {
|
|
579
|
+
// 解析消息中的音色前缀,返回 (来源, 音色, 内容)
|
|
580
|
+
func extractVoicePrefix(text string, cfg Config) (source string, voice *VoiceInfo, content string) {
|
|
574
581
|
// 格式: {source:claude}文本
|
|
575
582
|
const prefix = "{source:"
|
|
576
583
|
if strings.HasPrefix(text, prefix) {
|
|
577
584
|
if end := strings.Index(text, "}"); end > len(prefix) {
|
|
578
|
-
|
|
585
|
+
source = text[len(prefix):end]
|
|
586
|
+
if v, ok := cfg.SourceVoices[source]; ok {
|
|
579
587
|
voice = v
|
|
580
588
|
}
|
|
581
589
|
content = text[end+1:]
|
|
582
590
|
return
|
|
583
591
|
}
|
|
584
592
|
}
|
|
593
|
+
source = "default"
|
|
585
594
|
content = text
|
|
586
595
|
return
|
|
587
596
|
}
|
package/package.json
CHANGED
package/scripts/ispeak
CHANGED