@xdfnet/ispeak 1.6.4 → 1.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -177,7 +177,7 @@ pending -> running -> delete
177
177
  ├── config.json # API Key、音色配置
178
178
  ├── ispeak.sock # Unix Socket
179
179
  ├── ispeak.log # 日志(lumberjack 轮转)
180
- └── hook-speak.sh # Claude/Codex Stop Hook
180
+ └── hook-speak.sh # Claude/Codex Hook
181
181
 
182
182
  ~/Library/LaunchAgents/
183
183
  └── com.iSpeak.plist # launchd 服务配置
@@ -0,0 +1,240 @@
1
+ # Hook 文本提取链路
2
+
3
+ 本文记录 Claude Code / Codex CLI 在 Hook 中拿到“最后一条 assistant 回复”的实际方式。`hook-speak.sh` 的目标只做两件事:取最后一条 assistant 回复,发给 iSpeak socket。
4
+
5
+ ## 结论
6
+
7
+ 推荐优先级:
8
+
9
+ 1. **Codex `notify`**:从脚本第二个参数 `$2` 读取 JSON,取 `last-assistant-message`。
10
+ 2. **Claude / Codex Stop Hook**:从 stdin 读取 JSON,优先取 `last_assistant_message`。
11
+ 3. **明确 transcript**:如果没有直接字段,只读取 payload 里明确传入的 `transcript_path`。
12
+
13
+ 不扫描 `~/.codex/sessions`。没有 direct 字段也没有 `transcript_path` 时,本次不播报。
14
+
15
+ ## Codex CLI:notify
16
+
17
+ 当前本机版本:
18
+
19
+ ```text
20
+ codex-cli 0.130.0
21
+ ```
22
+
23
+ Codex CLI 的 `notify = [...]` 是 legacy notify 机制。官方源码里会把通知 JSON 追加成命令的最后一个 argv 参数,不写 stdin。
24
+
25
+ 配置示例:
26
+
27
+ ```toml
28
+ notify = ["/Users/你的用户名/.config/iSpeak/hook-speak.sh", "codex"]
29
+ ```
30
+
31
+ 脚本实际收到:
32
+
33
+ ```bash
34
+ $1 = "codex"
35
+ $2 = '{"type":"agent-turn-complete",...,"last-assistant-message":"..."}'
36
+ stdin = empty
37
+ ```
38
+
39
+ 核心字段:
40
+
41
+ ```json
42
+ {
43
+ "type": "agent-turn-complete",
44
+ "thread-id": "...",
45
+ "turn-id": "...",
46
+ "cwd": "...",
47
+ "input-messages": ["..."],
48
+ "last-assistant-message": "最后一条 assistant 回复"
49
+ }
50
+ ```
51
+
52
+ 所以 Codex `notify` 的正确读取方式是:
53
+
54
+ ```bash
55
+ input="${2:-}"
56
+ ```
57
+
58
+ 然后解析:
59
+
60
+ ```js
61
+ payload["last-assistant-message"]
62
+ ```
63
+
64
+ 源码依据:`codex-rs/hooks/src/legacy_notify.rs`。该文件把 `last_assistant_message` 序列化为 kebab-case 的 `last-assistant-message`,并在执行命令前 `command.arg(notify_payload)`。
65
+
66
+ ## Codex CLI:Stop Hook
67
+
68
+ Codex 也支持 Claude 风格 Hook。Stop Hook 的输入 JSON 写入 stdin。
69
+
70
+ 配置示例:
71
+
72
+ ```json
73
+ {
74
+ "hooks": {
75
+ "Stop": [
76
+ {
77
+ "hooks": [
78
+ {
79
+ "type": "command",
80
+ "command": "bash $HOME/.config/iSpeak/hook-speak.sh codex",
81
+ "timeout": 30
82
+ }
83
+ ]
84
+ }
85
+ ]
86
+ }
87
+ }
88
+ ```
89
+
90
+ 脚本实际收到:
91
+
92
+ ```bash
93
+ $1 = "codex"
94
+ $2 = empty
95
+ stdin = '{"hook_event_name":"Stop",...,"last_assistant_message":"..."}'
96
+ ```
97
+
98
+ 核心字段:
99
+
100
+ ```json
101
+ {
102
+ "session_id": "...",
103
+ "turn_id": "...",
104
+ "transcript_path": "...",
105
+ "cwd": "...",
106
+ "hook_event_name": "Stop",
107
+ "model": "...",
108
+ "permission_mode": "bypassPermissions",
109
+ "stop_hook_active": false,
110
+ "last_assistant_message": "最后一条 assistant 回复"
111
+ }
112
+ ```
113
+
114
+ 源码依据:
115
+
116
+ - `codex-rs/hooks/src/events/stop.rs`:构造 `StopCommandInput`,包含 `last_assistant_message` 和 `transcript_path`。
117
+ - `codex-rs/hooks/schema/generated/stop.command.input.schema.json`:Stop stdin schema。
118
+ - `codex-rs/hooks/src/engine/command_runner.rs`:Hook 命令通过 stdin 接收 `input_json`。
119
+
120
+ ## Codex Transcript
121
+
122
+ Codex 的 transcript/session 文件是 JSONL。实际 assistant 回复形态:
123
+
124
+ ```json
125
+ {
126
+ "type": "response_item",
127
+ "payload": {
128
+ "type": "message",
129
+ "role": "assistant",
130
+ "content": [
131
+ {
132
+ "type": "output_text",
133
+ "text": "最后一条 assistant 回复"
134
+ }
135
+ ]
136
+ }
137
+ }
138
+ ```
139
+
140
+ 提取规则:
141
+
142
+ ```js
143
+ event.type === "response_item" &&
144
+ event.payload?.type === "message" &&
145
+ event.payload?.role === "assistant"
146
+ ```
147
+
148
+ 然后拼接:
149
+
150
+ ```js
151
+ event.payload.content[].text
152
+ ```
153
+
154
+ ## Claude Code:Stop Hook
155
+
156
+ Claude Code 官方 Stop Hook 通过 stdin 传 JSON,核心字段是:
157
+
158
+ ```json
159
+ {
160
+ "session_id": "...",
161
+ "transcript_path": "...",
162
+ "hook_event_name": "Stop",
163
+ "stop_hook_active": false
164
+ }
165
+ ```
166
+
167
+ 有些版本或场景可能直接提供:
168
+
169
+ ```json
170
+ {
171
+ "last_assistant_message": "最后一条 assistant 回复"
172
+ }
173
+ ```
174
+
175
+ 所以 Claude 的读取顺序是:
176
+
177
+ 1. `last_assistant_message`
178
+ 2. `message`
179
+ 3. `transcript_path`
180
+
181
+ Claude transcript 常见 assistant 形态:
182
+
183
+ ```json
184
+ {"role":"assistant","content":[{"type":"text","text":"..."}]}
185
+ ```
186
+
187
+ 或:
188
+
189
+ ```json
190
+ {"message":{"role":"assistant","content":[{"type":"text","text":"..."}]}}
191
+ ```
192
+
193
+ ## 当前脚本策略
194
+
195
+ `configs/hook-speak.sh` 当前入口:
196
+
197
+ ```bash
198
+ input="${2:-}"
199
+ if [[ -z "$input" ]]; then
200
+ input=$(cat)
201
+ fi
202
+ ```
203
+
204
+ 含义:
205
+
206
+ - Codex `notify`:读 `$2`
207
+ - Claude / Codex Stop Hook:读 stdin
208
+
209
+ Codex 文本字段优先级:
210
+
211
+ ```js
212
+ payload["last-assistant-message"]
213
+ payload.last_assistant_message
214
+ payload.lastAssistantMessage
215
+ payload.message
216
+ payload.lastMessage
217
+ payload.transcript_path
218
+ payload.transcriptPath
219
+ payload["transcript-path"]
220
+ ```
221
+
222
+ Claude 文本字段优先级:
223
+
224
+ ```js
225
+ payload.last_assistant_message
226
+ payload.message
227
+ payload.transcript_path
228
+ payload.transcriptPath
229
+ ```
230
+
231
+ ## 为什么不能只读 stdin
232
+
233
+ 因为 Codex `notify` 不走 stdin。只读 stdin 会导致:
234
+
235
+ ```text
236
+ TEXT_LEN: 0
237
+ SPOKE: SKIP
238
+ ```
239
+
240
+ 正确做法是先读 `$2`,再读 stdin;不扫历史 session。
package/README.md CHANGED
@@ -1,13 +1,13 @@
1
1
  # iSpeak
2
2
 
3
- ![Version](https://img.shields.io/badge/version-1.6.3-blue)
3
+ ![Version](https://img.shields.io/badge/version-1.6.6-blue)
4
4
  [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
5
5
  [![Go Version](https://img.shields.io/badge/Go-1.26-blue)](https://golang.org/dl/)
6
6
  ![Platform](https://img.shields.io/badge/platform-macOS-green)
7
7
 
8
8
  iSpeak 让 AI 编程助手开口说话。你写代码,它播结果——眼睛休息,耳朵来听。
9
9
 
10
- 适合 Claude Code 或 Codex 常驻后台的开发者。AI 完成任务后自动播报;你发新消息时,旧播报立即中断,不花冤枉钱。
10
+ 适合 Claude Code 或 Codex 常驻后台的开发者。AI 完成任务后自动播报;你发新消息时,未开始的旧播报会被丢弃,不花冤枉钱。
11
11
 
12
12
  ## 效果示例
13
13
 
@@ -104,8 +104,7 @@ pending → running → delete
104
104
  - Markdown 链接:只保留链接标题,不播 URL
105
105
  - 绝对路径:简化为“路径”
106
106
  - 长 commit hash、UUID、长 ID:不播
107
- - 明显文件列表:如模型分片、代码文件列表、下载文件清单
108
- - 下载进度和终端噪声:百分比、速度、进度条、ANSI 控制符
107
+ - 下载进度噪声:速度、ETA、预计剩余时间、ANSI 控制符
109
108
 
110
109
  保留优先级:结论、成功/失败状态、需要用户操作的下一步、关键错误原因。
111
110
 
@@ -131,7 +130,7 @@ ispeak-codex "消息" # Codex 专属音色
131
130
  ```json
132
131
  {
133
132
  "apiKey": "你的火山引擎 API Key",
134
- "endpoint": "https://openspeech.bytedance.com/api/v3/tts/unidirectional",
133
+ "endpoint": "https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse",
135
134
  "defaultVoice": {
136
135
  "voice_type": "zh_female_mizai_uranus_bigtts",
137
136
  "resourceId": "seed-tts-2.0"
@@ -173,7 +172,13 @@ ispeak-codex "消息" # Codex 专属音色
173
172
 
174
173
  ### Codex
175
174
 
176
- `~/.codex/hooks.json` 中添加 Stop Hook:
175
+ 推荐在 `~/.codex/config.toml` 中添加回合结束通知:
176
+
177
+ ```toml
178
+ notify = ["bash", "/Users/你的用户名/.config/iSpeak/hook-speak.sh", "codex"]
179
+ ```
180
+
181
+ 如果你启用了 Codex hooks,也可以在 `~/.codex/hooks.json` 中添加 Stop Hook:
177
182
 
178
183
  ```json
179
184
  {
package/clean_text.go ADDED
@@ -0,0 +1,153 @@
1
+ package main
2
+
3
+ import (
4
+ "regexp"
5
+ "strings"
6
+ )
7
+
8
+ var (
9
+ markdownLinkRe = regexp.MustCompile(`\[[^\]]+\]\(([^)]*)\)`)
10
+ absolutePathRe = regexp.MustCompile(`/(?:Users|private|tmp|var|opt|usr|bin|sbin|etc|Library|Applications)/\S+`)
11
+ commitHashRe = regexp.MustCompile(`\b[0-9a-f]{7,40}\b`)
12
+ uuidRe = regexp.MustCompile(`\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b`)
13
+ urlRe = regexp.MustCompile(`https?://\S+`)
14
+ ansiEscapeRe = regexp.MustCompile(`\x1b\[[0-9;]*[A-Za-z]`)
15
+ multiSpaceRe = regexp.MustCompile(`\s+`)
16
+ markdownListRe = regexp.MustCompile(`^\s*(?:[-*+]\s+|\d+[.)]\s+)`)
17
+ htmlTagRe = regexp.MustCompile(`<[^>]+>`)
18
+ codeFenceStartRe = regexp.MustCompile("^```")
19
+ artifactStartRe = regexp.MustCompile(`(?i)^<artifact\b`)
20
+ htmlDocumentLineRe = regexp.MustCompile(`(?i)^<!doctype html|^<html\b|^<head\b|^<body\b|^<style\b|^</`)
21
+ speedNoiseRe = regexp.MustCompile(`(?i)\d+(?:\.\d+)?\s*(?:kb|mb|gb)/s`)
22
+ etaNoiseRe = regexp.MustCompile(`(?i)\bETA\b|预计剩余|剩余时间`)
23
+ )
24
+
25
+ // 过滤格式符号,保留自然朗读文本。
26
+ // 顺序很重要:先跳过跨行块结构,再跳过整行噪声,最后清理行内符号。
27
+ func cleanText(text string) string {
28
+ var lines []string
29
+ rawLines := strings.Split(text, "\n")
30
+ inCodeBlock := false
31
+ inArtifact := false
32
+ inMarkdownTable := false
33
+ for i := 0; i < len(rawLines); i++ {
34
+ line := rawLines[i]
35
+ line = strings.TrimSpace(line)
36
+ if line == "" {
37
+ inMarkdownTable = false
38
+ continue
39
+ }
40
+ if codeFenceStartRe.MatchString(line) {
41
+ inCodeBlock = !inCodeBlock
42
+ continue
43
+ }
44
+ if inCodeBlock {
45
+ continue
46
+ }
47
+ if artifactStartRe.MatchString(line) {
48
+ inArtifact = !strings.Contains(strings.ToLower(line), "</artifact>")
49
+ continue
50
+ }
51
+ if inArtifact {
52
+ if strings.Contains(strings.ToLower(line), "</artifact>") {
53
+ inArtifact = false
54
+ }
55
+ continue
56
+ }
57
+ if isMarkdownTableSeparator(line) {
58
+ if len(lines) > 0 && isMarkdownTableRow(strings.TrimSpace(rawLines[i-1])) {
59
+ lines = lines[:len(lines)-1]
60
+ }
61
+ inMarkdownTable = true
62
+ continue
63
+ }
64
+ if inMarkdownTable {
65
+ if isMarkdownTableRow(line) {
66
+ continue
67
+ }
68
+ inMarkdownTable = false
69
+ }
70
+ if shouldSkipSpeechLine(line) {
71
+ continue
72
+ }
73
+
74
+ cleaned := cleanSpeechLine(line)
75
+ if cleaned != "" {
76
+ lines = append(lines, cleaned)
77
+ }
78
+ }
79
+ return strings.Join(lines, ",")
80
+ }
81
+
82
+ func shouldSkipSpeechLine(line string) bool {
83
+ if isMarkdownTableSeparator(line) {
84
+ return true
85
+ }
86
+ if strings.HasPrefix(line, "---") && strings.Count(line, "-") > 3 {
87
+ return true
88
+ }
89
+ if htmlDocumentLineRe.MatchString(line) {
90
+ return true
91
+ }
92
+ if isProgressNoiseLine(line) {
93
+ return true
94
+ }
95
+ if isMostlyTableRow(line) {
96
+ return true
97
+ }
98
+ return false
99
+ }
100
+
101
+ func isMarkdownTableSeparator(line string) bool {
102
+ line = strings.TrimSpace(line)
103
+ return strings.Contains(line, "|") && strings.Trim(line, "|-: ") == ""
104
+ }
105
+
106
+ func isMarkdownTableRow(line string) bool {
107
+ line = strings.TrimSpace(line)
108
+ return strings.Count(line, "|") >= 2
109
+ }
110
+
111
+ func cleanSpeechLine(line string) string {
112
+ // Markdown 链接必须在 URL 删除前处理,否则会丢掉链接标题。
113
+ line = ansiEscapeRe.ReplaceAllString(line, "")
114
+ line = markdownListRe.ReplaceAllString(line, "")
115
+ line = markdownLinkRe.ReplaceAllStringFunc(line, func(match string) string {
116
+ if end := strings.Index(match, "]"); end > 1 {
117
+ return match[1:end]
118
+ }
119
+ return ""
120
+ })
121
+ line = urlRe.ReplaceAllString(line, "")
122
+ line = absolutePathRe.ReplaceAllString(line, " 路径 ")
123
+ // UUID 必须在短 hash 前处理,避免先删短片段后破坏 UUID 识别。
124
+ line = uuidRe.ReplaceAllString(line, "")
125
+ line = commitHashRe.ReplaceAllString(line, "")
126
+ line = htmlTagRe.ReplaceAllString(line, "")
127
+ line = strings.NewReplacer(
128
+ "**", "",
129
+ "*", "",
130
+ "`", "",
131
+ "#", "",
132
+ ">", "",
133
+ "✅", "",
134
+ "❌", "",
135
+ "✓", "",
136
+ "✗", "",
137
+ "→", "到",
138
+ ).Replace(line)
139
+ line = strings.Trim(line, " \t-:|")
140
+ line = multiSpaceRe.ReplaceAllString(line, " ")
141
+ return strings.TrimSpace(line)
142
+ }
143
+
144
+ func isMostlyTableRow(line string) bool {
145
+ if !strings.Contains(line, "|") {
146
+ return false
147
+ }
148
+ return strings.Count(line, "|") >= 2 && len([]rune(line)) > 40
149
+ }
150
+
151
+ func isProgressNoiseLine(line string) bool {
152
+ return speedNoiseRe.MatchString(line) || etaNoiseRe.MatchString(line)
153
+ }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "apiKey": "your-api-key",
3
- "endpoint": "https://openspeech.bytedance.com/api/v3/tts/unidirectional",
3
+ "endpoint": "https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse",
4
4
  "defaultVoice": {
5
5
  "voice_type": "zh_female_mizai_uranus_bigtts",
6
6
  "resourceId": "seed-tts-2.0"
@@ -1,130 +1,144 @@
1
1
  #!/bin/bash
2
- # Stop Hook: transcript 文件中提取本次会话所有 Claude 回复文本
3
- # iAgent 调用 Claude 时设 ISPEAK_SKIP=1,此时跳过(iAgent 自己播)
2
+ # Claude Code / Codex 共用播报 Hook:
3
+ # 只取最后一条 assistant 回复,加 `{source:<name>}` 前缀后发给 ispeakd。
4
4
  [[ "$ISPEAK_SKIP" == "1" ]] && exit 0
5
5
 
6
- # 来源参数: claude 或 codex
7
6
  SOURCE="${1:-claude}"
8
-
9
7
  SOCK="$HOME/.config/iSpeak/ispeak.sock"
10
8
  LOG="$HOME/.config/iSpeak/hook.log"
11
9
 
12
- input=$(cat)
13
-
14
- json_value() {
15
- local key="$1"
16
- if command -v node >/dev/null 2>&1; then
17
- printf "%s" "$input" | node -e '
18
- const key = process.argv[1];
19
- let input = "";
20
- process.stdin.setEncoding("utf8");
21
- process.stdin.on("data", chunk => input += chunk);
22
- process.stdin.on("end", () => {
23
- try {
24
- const value = JSON.parse(input)[key];
25
- if (typeof value === "string") process.stdout.write(value);
26
- } catch (_) {}
27
- });
28
- ' "$key"
29
- return
30
- fi
31
-
32
- printf "%s" "$input" | sed -n "s/.*\"$key\"[[:space:]]*:[[:space:]]*\"\([^\"]*\)\".*/\1/p"
10
+ # Codex `notify` 会把 JSON 作为最后一个参数传入;
11
+ # Claude/Claude 风格 Stop Hook 会把 JSON 写到 stdin。
12
+ input="${2:-}"
13
+ if [[ -z "$input" ]]; then
14
+ input=$(cat)
15
+ fi
16
+ input_file=$(mktemp)
17
+ trap 'rm -f "$input_file"' EXIT
18
+ printf "%s" "$input" > "$input_file"
19
+
20
+ text=$(SOURCE="$SOURCE" HOOK_INPUT_FILE="$input_file" node <<'NODE' 2>/dev/null
21
+ const fs = require("fs");
22
+
23
+ {
24
+ const input = readFile(process.env.HOOK_INPUT_FILE || "");
25
+ const payload = parseJSON(input) || {};
26
+ const source = process.env.SOURCE || "";
27
+ const text = source.startsWith("codex")
28
+ ? lastCodexAssistant(payload)
29
+ : lastClaudeAssistant(payload);
30
+
31
+ if (text) process.stdout.write(text);
33
32
  }
34
33
 
35
- extract_recent_assistant_text() {
36
- local transcript="$1"
37
- local cutoff="$2"
38
-
39
- if command -v node >/dev/null 2>&1; then
40
- node -e '
41
- const fs = require("fs");
42
- const file = process.argv[1];
43
- const cutoff = Number(process.argv[2]);
44
- const out = [];
45
-
46
- function collectText(content) {
47
- if (typeof content === "string") {
48
- out.push(content);
49
- return;
50
- }
51
- if (!Array.isArray(content)) return;
52
- for (const item of content) {
53
- if (item && typeof item.text === "string") out.push(item.text);
54
- }
55
- }
34
+ function lastClaudeAssistant(payload) {
35
+ const direct = firstString(payload.last_assistant_message, payload.message);
36
+ if (direct) return direct;
56
37
 
57
- for (const line of fs.readFileSync(file, "utf8").split(/\r?\n/)) {
58
- if (!line.trim()) continue;
59
- try {
60
- const event = JSON.parse(line);
61
- if (typeof event.timestamp === "number" && event.timestamp < cutoff) continue;
62
- if (event.role === "assistant") collectText(event.content);
63
- if (event.message && event.message.role === "assistant") collectText(event.message.content);
64
- } catch (_) {}
65
- }
66
- process.stdout.write([...new Set(out.filter(Boolean))].join(" "));
67
- ' "$transcript" "$cutoff" 2>/dev/null
68
- return
69
- fi
70
-
71
- awk -v cutoff="$cutoff" '
72
- {
73
- if (match($0, /"timestamp"[[:space:]]*:[[:space:]]*[0-9]+/)) {
74
- ts = substr($0, RSTART, RLENGTH)
75
- gsub(/[^0-9]/, "", ts)
76
- ts = int(ts)
77
- if (ts < cutoff) next
78
- }
38
+ const transcript = firstString(payload.transcript_path, payload.transcriptPath);
39
+ return transcript ? lastAssistantFromTranscript(transcript, "claude") : "";
40
+ }
79
41
 
80
- if (match($0, /"role"[[:space:]]*:[[:space:]]*"assistant"/)) {
81
- if (match($0, /"content"[[:space:]]*:[[:space:]]*\[/)) {
82
- gsub(/[^{]*\[/, "", $0)
83
- gsub(/\].*/, "", $0)
84
- while (match($0, /"text"[[:space:]]*:[[:space:]]*"[^"]*"/)) {
85
- t = substr($0, RSTART, RLENGTH)
86
- gsub(/"text"[[:space:]]*:[[:space:]]*"/, "", t)
87
- gsub(/"$/, "", t)
88
- if (t != "") print t
89
- $0 = substr($0, RSTART + RLENGTH)
90
- }
91
- } else if (match($0, /"content"[[:space:]]*:[[:space:]]*"[^"]*"/)) {
92
- t = substr($0, RSTART, RLENGTH)
93
- gsub(/"content"[[:space:]]*:[[:space:]]*"/, "", t)
94
- gsub(/"$/, "", t)
95
- if (t != "") print t
96
- }
97
- }
98
- }
99
- ' "$transcript" 2>/dev/null | sort -u | tr '\n' ' '
42
+ function lastCodexAssistant(payload) {
43
+ const direct = firstString(
44
+ payload["last-assistant-message"],
45
+ payload.last_assistant_message,
46
+ payload.lastAssistantMessage,
47
+ payload.message,
48
+ payload.lastMessage
49
+ );
50
+ if (direct) return direct;
51
+
52
+ const transcript = firstString(
53
+ payload.transcript_path,
54
+ payload.transcriptPath,
55
+ payload["transcript-path"]
56
+ );
57
+ return transcript ? lastAssistantFromTranscript(transcript, "codex") : "";
58
+ }
59
+
60
+ function readFile(file) {
61
+ try {
62
+ return fs.readFileSync(file, "utf8");
63
+ } catch {
64
+ return "";
65
+ }
100
66
  }
101
67
 
102
- # stdin JSON 提取 transcript 路径和最后一条消息
103
- transcript=$(json_value "transcript_path")
104
- last_msg=$(json_value "last_assistant_message")
68
+ function parseJSON(text) {
69
+ try {
70
+ return JSON.parse(text);
71
+ } catch {
72
+ return null;
73
+ }
74
+ }
75
+
76
+ function firstString(...values) {
77
+ for (const value of values) {
78
+ if (typeof value === "string" && value !== "") return value;
79
+ }
80
+ return "";
81
+ }
82
+
83
+ function collectText(content) {
84
+ if (typeof content === "string") return content;
85
+ if (!Array.isArray(content)) return "";
86
+ return content
87
+ .map(item => item && typeof item.text === "string" ? item.text : "")
88
+ .filter(Boolean)
89
+ .join(" ");
90
+ }
105
91
 
106
- all_text="$last_msg"
92
+ function lastAssistantFromTranscript(file, source) {
93
+ let data = "";
94
+ try {
95
+ data = fs.readFileSync(file, "utf8");
96
+ } catch {
97
+ return "";
98
+ }
99
+
100
+ let last = "";
101
+ for (const line of data.split(/\r?\n/)) {
102
+ if (!line.trim()) continue;
103
+ const event = parseJSON(line);
104
+ if (!event) continue;
105
+
106
+ if (source === "claude") {
107
+ if (event.role === "assistant") {
108
+ last = collectText(event.content) || last;
109
+ }
110
+ if (event.message && event.message.role === "assistant") {
111
+ last = collectText(event.message.content) || last;
112
+ }
113
+ }
107
114
 
108
- # 如果有 transcript 文件,提取最近 30 秒内的所有 assistant 消息
109
- if [[ -n "$transcript" && -f "$transcript" ]]; then
110
- # 计算 30 秒前的时间戳(毫秒)
111
- cutoff=$(($(date +%s) * 1000 - 30000))
115
+ if (source === "codex" &&
116
+ event.type === "response_item" &&
117
+ event.payload &&
118
+ event.payload.type === "message" &&
119
+ event.payload.role === "assistant"
120
+ ) {
121
+ last = collectText(event.payload.content) || last;
122
+ }
123
+ }
124
+ return last;
125
+ }
112
126
 
113
- # 优先用 JSON parser,Node 不存在时回退到简易 awk。
114
- extra=$(extract_recent_assistant_text "$transcript" "$cutoff")
127
+ NODE
128
+ )
115
129
 
116
- if [[ -n "$extra" ]]; then
117
- all_text="$extra"
118
- fi
130
+ if [[ "$ISPEAK_HOOK_PRINT_TEXT" == "1" ]]; then
131
+ printf "%s" "$text"
132
+ exit 0
119
133
  fi
120
134
 
121
135
  echo "=== $(date) ===" >> "$LOG"
122
136
  echo "SOURCE: $SOURCE" >> "$LOG"
123
- echo "TEXT_LEN: ${#all_text}" >> "$LOG"
124
- echo "PREVIEW: ${all_text:0:150}" >> "$LOG"
137
+ echo "TEXT_LEN: ${#text}" >> "$LOG"
138
+ echo "PREVIEW: ${text:0:150}" >> "$LOG"
125
139
 
126
- if [[ -n "$all_text" && -S "$SOCK" ]]; then
127
- printf "{source:%s}%s" "$SOURCE" "$all_text" | nc -U -w5 "$SOCK" 2>> "$LOG"
140
+ if [[ -n "$text" && -S "$SOCK" ]]; then
141
+ printf "{source:%s}%s" "$SOURCE" "$text" | nc -U -w5 "$SOCK" 2>> "$LOG"
128
142
  echo "SPOKE: OK" >> "$LOG"
129
143
  else
130
144
  echo "SPOKE: SKIP" >> "$LOG"
package/main.go CHANGED
@@ -17,7 +17,6 @@ import (
17
17
  "os/exec"
18
18
  "os/signal"
19
19
  "path/filepath"
20
- "regexp"
21
20
  "strings"
22
21
  "sync"
23
22
  "syscall"
@@ -43,25 +42,6 @@ var tempDir string
43
42
 
44
43
  var errAlreadyRunning = errors.New("iSpeak already running")
45
44
 
46
- var (
47
- markdownLinkRe = regexp.MustCompile(`\[[^\]]+\]\(([^)]*)\)`)
48
- absolutePathRe = regexp.MustCompile(`/(?:Users|private|tmp|var|opt|usr|bin|sbin|etc|Library|Applications)/\S+`)
49
- commitHashRe = regexp.MustCompile(`\b[0-9a-f]{7,40}\b`)
50
- uuidRe = regexp.MustCompile(`\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b`)
51
- urlRe = regexp.MustCompile(`https?://\S+`)
52
- ansiEscapeRe = regexp.MustCompile(`\x1b\[[0-9;]*[A-Za-z]`)
53
- multiSpaceRe = regexp.MustCompile(`\s+`)
54
- markdownListRe = regexp.MustCompile(`^\s*(?:[-*+]\s+|\d+[.)]\s+)`)
55
- htmlTagRe = regexp.MustCompile(`<[^>]+>`)
56
- codeFenceStartRe = regexp.MustCompile("^```")
57
- artifactStartRe = regexp.MustCompile(`(?i)^<artifact\b`)
58
- htmlDocumentLineRe = regexp.MustCompile(`(?i)^<!doctype html|^<html\b|^<head\b|^<body\b|^<style\b|^</`)
59
- progressNoiseRe = regexp.MustCompile(`(?i)(^\s*\d{1,3}%\s*$|\d{1,3}%.*\d+(?:\.\d+)?\s*(?:kb|mb|gb)/s|\bETA\b|^\s*[-=]{3,}\s*$)`)
60
- speedNoiseRe = regexp.MustCompile(`(?i)\d+(?:\.\d+)?\s*(?:kb|mb|gb)/s`)
61
- etaNoiseRe = regexp.MustCompile(`(?i)\bETA\b|预计剩余|剩余时间`)
62
- fileListNoiseRe = regexp.MustCompile(`(?i)\.(?:go|js|ts|tsx|jsx|json|md|yaml|yml|toml|sum|mod|lock|html|css|sh|plist|safetensors|mp3|wav|png|jpg|jpeg|pdf|docx)\b`)
63
- )
64
-
65
45
  type StreamPlayer interface {
66
46
  Write(audio []byte) error
67
47
  CloseAndWait() error
@@ -463,7 +443,7 @@ func loadConfig() Config {
463
443
  // 回退到环境变量
464
444
  return Config{
465
445
  APIKey: envOrDefault("IAGENT_TTS_API_KEY", ""),
466
- Endpoint: envOrDefault("IAGENT_TTS_ENDPOINT", "https://openspeech.bytedance.com/api/v3/tts/unidirectional"),
446
+ Endpoint: envOrDefault("IAGENT_TTS_ENDPOINT", "https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse"),
467
447
  }
468
448
  }
469
449
 
@@ -590,8 +570,7 @@ func parseSSE(r io.Reader) ([]byte, error) {
590
570
 
591
571
  func parseSSEStream(r io.Reader, onAudio func([]byte) error) error {
592
572
  audioChunks := 0
593
- scanner := bufio.NewScanner(r)
594
- scanner.Buffer(make([]byte, 256*1024), 256*1024)
573
+ reader := bufio.NewReaderSize(r, 64*1024)
595
574
 
596
575
  var dataLines []string
597
576
 
@@ -608,40 +587,49 @@ func parseSSEStream(r io.Reader, onAudio func([]byte) error) error {
608
587
  return err
609
588
  }
610
589
 
611
- for scanner.Scan() {
612
- line := strings.TrimSpace(scanner.Text())
590
+ for {
591
+ rawLine, err := reader.ReadString('\n')
592
+ if err != nil && len(rawLine) == 0 {
593
+ if err == io.EOF {
594
+ break
595
+ }
596
+ return fmt.Errorf("read sse: %w", err)
597
+ }
598
+
599
+ line := strings.TrimSpace(rawLine)
613
600
  if line == "" {
614
601
  if err := flush(); err != nil {
615
602
  return err
616
603
  }
617
- continue
618
- }
619
- if strings.HasPrefix(line, ":") || strings.HasPrefix(line, "event:") ||
604
+ } else if strings.HasPrefix(line, ":") || strings.HasPrefix(line, "event:") ||
620
605
  strings.HasPrefix(line, "id:") || strings.HasPrefix(line, "retry:") {
621
- continue
622
- }
623
- if strings.HasPrefix(line, "data:") {
606
+ // SSE metadata, ignored.
607
+ } else if strings.HasPrefix(line, "data:") {
624
608
  dataLines = append(dataLines, strings.TrimPrefix(line, "data:"))
625
- continue
626
- }
627
- // 非标准 JSON 直出
628
- if err := flush(); err != nil {
629
- return err
630
- }
631
- ok, err := processEvent(line, onAudio)
632
- if ok {
633
- audioChunks++
609
+ } else {
610
+ // 非标准 JSON 直出
611
+ if err := flush(); err != nil {
612
+ return err
613
+ }
614
+ ok, err := processEvent(line, onAudio)
615
+ if ok {
616
+ audioChunks++
617
+ }
618
+ if err != nil {
619
+ return err
620
+ }
634
621
  }
622
+
635
623
  if err != nil {
636
- return err
624
+ if err == io.EOF {
625
+ break
626
+ }
627
+ return fmt.Errorf("read sse: %w", err)
637
628
  }
638
629
  }
639
630
  if err := flush(); err != nil {
640
631
  return err
641
632
  }
642
- if err := scanner.Err(); err != nil {
643
- return fmt.Errorf("scan: %w", err)
644
- }
645
633
 
646
634
  if audioChunks == 0 {
647
635
  return fmt.Errorf("no audio data")
@@ -661,6 +649,10 @@ func processEvent(payload string, onAudio func([]byte) error) (bool, error) {
661
649
  return false, nil
662
650
  }
663
651
 
652
+ if err := sseEventError(event); err != nil {
653
+ return false, err
654
+ }
655
+
664
656
  if b64 := extractAudioBase64(event); b64 != "" {
665
657
  data, err := base64.StdEncoding.DecodeString(b64)
666
658
  if err != nil {
@@ -675,6 +667,35 @@ func processEvent(payload string, onAudio func([]byte) error) (bool, error) {
675
667
  return false, nil
676
668
  }
677
669
 
670
+ func sseEventError(event map[string]any) error {
671
+ codeValue, ok := event["code"]
672
+ if !ok {
673
+ return nil
674
+ }
675
+
676
+ var code int64
677
+ switch v := codeValue.(type) {
678
+ case float64:
679
+ code = int64(v)
680
+ case int:
681
+ code = int64(v)
682
+ case int64:
683
+ code = v
684
+ default:
685
+ return nil
686
+ }
687
+
688
+ if code == 0 || code == 20000000 {
689
+ return nil
690
+ }
691
+
692
+ message, _ := event["message"].(string)
693
+ if message == "" {
694
+ message = "unknown error"
695
+ }
696
+ return fmt.Errorf("tts sse error: code=%d message=%s", code, message)
697
+ }
698
+
678
699
  func extractAudioBase64(event map[string]any) string {
679
700
  for _, key := range []string{"data", "audio", "audio_data"} {
680
701
  if v, ok := event[key].(string); ok && v != "" {
@@ -691,167 +712,6 @@ func extractAudioBase64(event map[string]any) string {
691
712
  return ""
692
713
  }
693
714
 
694
- // 过滤格式符号,保留自然朗读文本。
695
- // 顺序很重要:先跳过跨行块结构,再跳过整行噪声,最后清理行内符号。
696
- func cleanText(text string) string {
697
- var lines []string
698
- rawLines := strings.Split(text, "\n")
699
- inCodeBlock := false
700
- inArtifact := false
701
- inMarkdownTable := false
702
- for i := 0; i < len(rawLines); i++ {
703
- line := rawLines[i]
704
- line = strings.TrimSpace(line)
705
- if line == "" {
706
- inMarkdownTable = false
707
- continue
708
- }
709
- if codeFenceStartRe.MatchString(line) {
710
- inCodeBlock = !inCodeBlock
711
- continue
712
- }
713
- if inCodeBlock {
714
- continue
715
- }
716
- if artifactStartRe.MatchString(line) {
717
- inArtifact = !strings.Contains(strings.ToLower(line), "</artifact>")
718
- continue
719
- }
720
- if inArtifact {
721
- if strings.Contains(strings.ToLower(line), "</artifact>") {
722
- inArtifact = false
723
- }
724
- continue
725
- }
726
- if isMarkdownTableSeparator(line) {
727
- if len(lines) > 0 && isMarkdownTableRow(strings.TrimSpace(rawLines[i-1])) {
728
- lines = lines[:len(lines)-1]
729
- }
730
- inMarkdownTable = true
731
- continue
732
- }
733
- if inMarkdownTable {
734
- if isMarkdownTableRow(line) {
735
- continue
736
- }
737
- inMarkdownTable = false
738
- }
739
- if shouldSkipSpeechLine(line) {
740
- continue
741
- }
742
-
743
- cleaned := cleanSpeechLine(line)
744
- if cleaned != "" {
745
- lines = append(lines, cleaned)
746
- }
747
- }
748
- return strings.Join(lines, ",")
749
- }
750
-
751
- func shouldSkipSpeechLine(line string) bool {
752
- if isMarkdownTableSeparator(line) {
753
- return true
754
- }
755
- if strings.HasPrefix(line, "---") && strings.Count(line, "-") > 3 {
756
- return true
757
- }
758
- if htmlDocumentLineRe.MatchString(line) {
759
- return true
760
- }
761
- if isProgressNoiseLine(line) {
762
- return true
763
- }
764
- if isMostlyTableRow(line) {
765
- return true
766
- }
767
- if isMostlyFileListLine(line) {
768
- return true
769
- }
770
- return false
771
- }
772
-
773
- func isMarkdownTableSeparator(line string) bool {
774
- line = strings.TrimSpace(line)
775
- return strings.Contains(line, "|") && strings.Trim(line, "|-: ") == ""
776
- }
777
-
778
- func isMarkdownTableRow(line string) bool {
779
- line = strings.TrimSpace(line)
780
- return strings.Count(line, "|") >= 2
781
- }
782
-
783
- func cleanSpeechLine(line string) string {
784
- // Markdown 链接必须在 URL 删除前处理,否则会丢掉链接标题。
785
- line = ansiEscapeRe.ReplaceAllString(line, "")
786
- line = markdownListRe.ReplaceAllString(line, "")
787
- line = markdownLinkRe.ReplaceAllStringFunc(line, func(match string) string {
788
- if end := strings.Index(match, "]"); end > 1 {
789
- return match[1:end]
790
- }
791
- return ""
792
- })
793
- line = urlRe.ReplaceAllString(line, "")
794
- line = absolutePathRe.ReplaceAllString(line, " 路径 ")
795
- // UUID 必须在短 hash 前处理,避免先删短片段后破坏 UUID 识别。
796
- line = uuidRe.ReplaceAllString(line, "")
797
- line = commitHashRe.ReplaceAllString(line, "")
798
- line = htmlTagRe.ReplaceAllString(line, "")
799
- line = strings.NewReplacer(
800
- "**", "",
801
- "*", "",
802
- "`", "",
803
- "#", "",
804
- ">", "",
805
- "✅", "",
806
- "❌", "",
807
- "✓", "",
808
- "✗", "",
809
- "→", "到",
810
- ).Replace(line)
811
- line = strings.Trim(line, " \t-:|")
812
- line = multiSpaceRe.ReplaceAllString(line, " ")
813
- return strings.TrimSpace(line)
814
- }
815
-
816
- func isMostlyTableRow(line string) bool {
817
- if !strings.Contains(line, "|") {
818
- return false
819
- }
820
- return strings.Count(line, "|") >= 2 && len([]rune(line)) > 40
821
- }
822
-
823
- func isProgressNoiseLine(line string) bool {
824
- if !progressNoiseRe.MatchString(line) {
825
- return false
826
- }
827
- if speedNoiseRe.MatchString(line) || etaNoiseRe.MatchString(line) {
828
- return true
829
- }
830
- return !containsCJK(line)
831
- }
832
-
833
- func isMostlyFileListLine(line string) bool {
834
- if !fileListNoiseRe.MatchString(line) {
835
- return false
836
- }
837
- if containsCJK(line) {
838
- return false
839
- }
840
- if strings.Contains(line, ".safetensors") {
841
- return true
842
- }
843
- return strings.Count(line, ".") >= 2 || strings.Contains(line, "/") || strings.Contains(line, " - ")
844
- }
845
-
846
- func containsCJK(s string) bool {
847
- for _, r := range s {
848
- if r >= '\u4e00' && r <= '\u9fff' {
849
- return true
850
- }
851
- }
852
- return false
853
- }
854
-
855
715
  func main() {
856
716
  log.SetFlags(log.Ltime | log.Lshortfile)
857
717
 
@@ -61,6 +61,43 @@ function copyIfMissing(src, dst, mode) {
61
61
  console.log(`配置文件已创建: ${dst}`);
62
62
  }
63
63
 
64
+ function migrateDefaultEndpoint(configPath) {
65
+ if (!fs.existsSync(configPath)) {
66
+ return;
67
+ }
68
+ const oldEndpoint = "https://openspeech.bytedance.com/api/v3/tts/unidirectional";
69
+ const newEndpoint = "https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse";
70
+ let config;
71
+ try {
72
+ config = JSON.parse(fs.readFileSync(configPath, "utf8"));
73
+ } catch (_) {
74
+ return;
75
+ }
76
+ if (config.endpoint !== oldEndpoint) {
77
+ return;
78
+ }
79
+ fs.copyFileSync(configPath, `${configPath}.bak`);
80
+ config.endpoint = newEndpoint;
81
+ fs.writeFileSync(configPath, `${JSON.stringify(config, null, 2)}\n`);
82
+ console.log(`配置 endpoint 已迁移到 SSE,旧配置备份: ${configPath}.bak`);
83
+ }
84
+
85
+ function installHook(src, dst) {
86
+ if (fs.existsSync(dst)) {
87
+ try {
88
+ if (fs.readFileSync(src, "utf8") !== fs.readFileSync(dst, "utf8")) {
89
+ fs.copyFileSync(dst, `${dst}.bak`);
90
+ console.log(`旧 Hook 已备份: ${dst}.bak`);
91
+ }
92
+ } catch (_) {
93
+ fs.copyFileSync(dst, `${dst}.bak`);
94
+ console.log(`旧 Hook 已备份: ${dst}.bak`);
95
+ }
96
+ }
97
+ copyExecutable(src, dst);
98
+ console.log(`Hook 脚本已安装: ${dst}`);
99
+ }
100
+
64
101
  function sleep(ms) {
65
102
  Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
66
103
  }
@@ -106,8 +143,10 @@ function main() {
106
143
  symlinkForce(cliPath, path.join(binDir, "ispeak-claude"));
107
144
  symlinkForce(cliPath, path.join(binDir, "ispeak-codex"));
108
145
 
109
- copyIfMissing(path.join(root, "configs", "config.example.json"), path.join(configDir, "config.json"));
110
- copyIfMissing(path.join(root, "configs", "hook-speak.sh"), path.join(configDir, "hook-speak.sh"), 0o755);
146
+ const configPath = path.join(configDir, "config.json");
147
+ copyIfMissing(path.join(root, "configs", "config.example.json"), configPath);
148
+ migrateDefaultEndpoint(configPath);
149
+ installHook(path.join(root, "configs", "hook-speak.sh"), path.join(configDir, "hook-speak.sh"));
111
150
 
112
151
  const plist = fs
113
152
  .readFileSync(path.join(root, "configs", "com.iSpeak.plist"), "utf8")
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@xdfnet/ispeak",
3
- "version": "1.6.4",
3
+ "version": "1.6.6",
4
4
  "description": "Local macOS TTS daemon for AI coding assistants, powered by Volcengine streaming TTS.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://github.com/xdfnet/iSpeak#readme",
@@ -26,14 +26,16 @@
26
26
  "scripts": {
27
27
  "build": "go build -ldflags=\"-s -w\" -o build/ispeakd .",
28
28
  "test": "go test ./...",
29
+ "prepublishOnly": "make test",
29
30
  "postinstall": "node npm/postinstall.js",
30
31
  "pack:dry-run": "npm pack --dry-run"
31
32
  },
32
33
  "files": [
33
34
  "main.go",
35
+ "clean_text.go",
34
36
  "go.mod",
35
37
  "go.sum",
36
- "scripts/",
38
+ "scripts/ispeak",
37
39
  "configs/",
38
40
  "npm/",
39
41
  "Docs/",
package/scripts/ispeak CHANGED
@@ -2,7 +2,7 @@
2
2
  # ispeak — iSpeak 控制命令
3
3
  set -euo pipefail
4
4
 
5
- VERSION="1.6.3"
5
+ VERSION="1.6.6"
6
6
  SOCK="$HOME/.config/iSpeak/ispeak.sock"
7
7
  PLIST="$HOME/Library/LaunchAgents/com.iSpeak.plist"
8
8
  CMD_NAME="$(basename "$0")"