@xdfnet/ispeak 1.6.1 → 1.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,7 +34,7 @@ iSpeak 是一个运行在 macOS 上的本地 TTS 播报守护进程,通过 Uni
34
34
  │ ▼ │
35
35
  │ Speak Worker (single) │
36
36
  │ - pending_synth -> speaking │
37
- │ - 调用 TTS 流式接口(失败重试1次)
37
+ │ - 调用 TTS 流式接口(失败直接删除,不重试)
38
38
  │ - SSE audio chunk -> StreamPlayer.Write │
39
39
  │ - 播放完成后删除任务;连续失败删除任务 │
40
40
  │ │
@@ -107,12 +107,14 @@ pending_synth -> speaking -> delete
107
107
 
108
108
  `Submit(cleanedText, voice, cfg)` 原子执行:
109
109
  1. 删除所有 `pending_synth` 任务
110
- 2. 创建新任务(`pending_synth`)
111
- 3. 唤醒 speak worker
110
+ 2. 打断当前 `speaking` 任务(取消合成/停止播放)
111
+ 3. 创建新任务(`pending_synth`)
112
+ 4. 唤醒 speak worker
112
113
 
113
114
  策略说明:
114
- - 只清理“未开始合成”的任务
115
- - 不打断 `speaking`
115
+ - 未开始合成的旧任务直接删除
116
+ - 已领取但过期的旧任务在合成前跳过
117
+ - 正在合成/播放的旧任务会被新消息取消
116
118
 
117
119
  ### Speak worker 规则
118
120
 
@@ -130,9 +132,22 @@ pending_synth -> speaking -> delete
130
132
  `handleConnection()`:
131
133
  - 读取 socket 文本
132
134
  - 解析 `{source:xxx}` 音色前缀
133
- - `cleanText()` 过滤 Markdown/表格符号
135
+ - `cleanText()` 生成语音友好的文本
134
136
  - 将“过滤后文本”提交给 `TaskEngine.Submit`
135
137
 
138
+ `cleanText()` 只影响 TTS 播报,不改变屏幕显示内容。当前清洗规则:
139
+
140
+ - Markdown 格式符号:标题、加粗、反引号、引用符
141
+ - Markdown 表格整块:表头、分隔线、表格内容
142
+ - 代码块、artifact、HTML 页面源码
143
+ - Markdown 链接 URL,仅保留链接标题
144
+ - 绝对路径简化为“路径”
145
+ - 长 commit hash、UUID、长 ID
146
+ - 明显文件列表、模型分片列表、下载清单
147
+ - 下载进度、速度、进度条、ANSI 控制符等终端噪声
148
+
149
+ 清洗目标是保留适合听的内容:结论、成功/失败状态、下一步动作、关键错误原因。
150
+
136
151
  ### 2. 流式合成播放阶段
137
152
 
138
153
  - speak worker 领取任务
@@ -151,9 +166,9 @@ pending_synth -> speaking -> delete
151
166
 
152
167
  ## 失败与成本策略
153
168
 
154
- - 新任务到达时仅清理 `pending_synth`,避免无效合成
155
- - 流式合成/播放失败:整条播报重试 1 次后删除
156
- - 执行中任务不打断,行为稳定、可预期
169
+ - 新任务到达时清理 `pending_synth` 并打断当前任务,避免无效合成/播放
170
+ - 流式合成/播放失败:直接删除任务,不重试,避免重复播报
171
+ - 只保留最新消息优先播报,降低 TTS 成本
157
172
 
158
173
  ## 文件布局
159
174
 
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # iSpeak
2
2
 
3
- ![Version](https://img.shields.io/badge/version-1.6.1-blue)
3
+ ![Version](https://img.shields.io/badge/version-1.6.3-blue)
4
4
  [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
5
5
  [![Go Version](https://img.shields.io/badge/Go-1.26-blue)](https://golang.org/dl/)
6
6
  ![Platform](https://img.shields.io/badge/platform-macOS-green)
@@ -51,11 +51,12 @@ brew install ffmpeg
51
51
  git clone https://github.com/xdfnet/iSpeak.git && cd iSpeak && make install
52
52
  ```
53
53
 
54
- 安装时手动输入 API Key,然后验证:
54
+ 安装后编辑 API Key,然后验证:
55
55
 
56
56
  ```bash
57
+ open ~/.config/iSpeak/config.json
57
58
  ispeak status
58
- ispeak test "iSpeak 准备好了"
59
+ ispeak "iSpeak 准备好了"
59
60
  ```
60
61
 
61
62
  ## 工作原理
@@ -88,6 +89,26 @@ ispeak test "iSpeak 准备好了"
88
89
  pending_synth → speaking → delete
89
90
  ```
90
91
 
92
+ 新消息到达时会清理未开始任务,并打断当前合成/播放,只保留最新消息优先播报。
93
+
94
+ ## 语音清洗规则
95
+
96
+ 清洗只影响 TTS 播报内容,不改变 Claude/Codex 屏幕显示内容。
97
+
98
+ 播报前会过滤或简化这些内容:
99
+
100
+ - Markdown 格式符号:标题 `#`、加粗 `**`、反引号、引用 `>`
101
+ - Markdown 表格整块:表头、分隔线、表格内容都不播
102
+ - 代码块:``` 包裹的内容不播
103
+ - artifact / HTML 内容:不播生成的页面源码
104
+ - Markdown 链接:只保留链接标题,不播 URL
105
+ - 绝对路径:简化为“路径”
106
+ - 长 commit hash、UUID、长 ID:不播
107
+ - 明显文件列表:如模型分片、代码文件列表、下载文件清单
108
+ - 下载进度和终端噪声:百分比、速度、进度条、ANSI 控制符
109
+
110
+ 保留优先级:结论、成功/失败状态、需要用户操作的下一步、关键错误原因。
111
+
91
112
  ## 全部命令
92
113
 
93
114
  ```bash
@@ -11,52 +11,107 @@ LOG="$HOME/.config/iSpeak/hook.log"
11
11
 
12
12
  input=$(cat)
13
13
 
14
- # 从 stdin JSON 提取 transcript 路径和最后一条消息
15
- # 简单 JSON 解析(不依赖 python3)
16
- transcript=$(echo "$input" | sed -n 's/.*"transcript_path"\s*:\s*"\([^"]*\)".*/\1/p')
17
- last_msg=$(echo "$input" | sed -n 's/.*"last_assistant_message"\s*:\s*"\([^"]*\)".*/\1/p')
14
+ json_value() {
15
+ local key="$1"
16
+ if command -v node >/dev/null 2>&1; then
17
+ printf "%s" "$input" | node -e '
18
+ const key = process.argv[1];
19
+ let input = "";
20
+ process.stdin.setEncoding("utf8");
21
+ process.stdin.on("data", chunk => input += chunk);
22
+ process.stdin.on("end", () => {
23
+ try {
24
+ const value = JSON.parse(input)[key];
25
+ if (typeof value === "string") process.stdout.write(value);
26
+ } catch (_) {}
27
+ });
28
+ ' "$key"
29
+ return
30
+ fi
18
31
 
19
- all_text="$last_msg"
32
+ printf "%s" "$input" | sed -n "s/.*\"$key\"[[:space:]]*:[[:space:]]*\"\([^\"]*\)\".*/\1/p"
33
+ }
20
34
 
21
- # 如果有 transcript 文件,提取最近 30 秒内的所有 assistant 消息
22
- if [[ -n "$transcript" && -f "$transcript" ]]; then
23
- # 计算 30 秒前的时间戳(毫秒)
24
- cutoff=$(($(date +%s) * 1000 - 30000))
35
+ extract_recent_assistant_text() {
36
+ local transcript="$1"
37
+ local cutoff="$2"
25
38
 
26
- # awk 解析 JSON lines,提取 role=assistant 且 timestamp > cutoff 的 text
27
- extra=$(awk -v cutoff="$cutoff" '
39
+ if command -v node >/dev/null 2>&1; then
40
+ node -e '
41
+ const fs = require("fs");
42
+ const file = process.argv[1];
43
+ const cutoff = Number(process.argv[2]);
44
+ const out = [];
45
+
46
+ function collectText(content) {
47
+ if (typeof content === "string") {
48
+ out.push(content);
49
+ return;
50
+ }
51
+ if (!Array.isArray(content)) return;
52
+ for (const item of content) {
53
+ if (item && typeof item.text === "string") out.push(item.text);
54
+ }
55
+ }
56
+
57
+ for (const line of fs.readFileSync(file, "utf8").split(/\r?\n/)) {
58
+ if (!line.trim()) continue;
59
+ try {
60
+ const event = JSON.parse(line);
61
+ if (typeof event.timestamp === "number" && event.timestamp < cutoff) continue;
62
+ if (event.role === "assistant") collectText(event.content);
63
+ if (event.message && event.message.role === "assistant") collectText(event.message.content);
64
+ } catch (_) {}
65
+ }
66
+ process.stdout.write([...new Set(out.filter(Boolean))].join(" "));
67
+ ' "$transcript" "$cutoff" 2>/dev/null
68
+ return
69
+ fi
70
+
71
+ awk -v cutoff="$cutoff" '
28
72
  {
29
- # 提取 timestamp
30
- if (match($0, /"timestamp"\s*:\s*[0-9]+/)) {
73
+ if (match($0, /"timestamp"[[:space:]]*:[[:space:]]*[0-9]+/)) {
31
74
  ts = substr($0, RSTART, RLENGTH)
32
75
  gsub(/[^0-9]/, "", ts)
33
76
  ts = int(ts)
34
77
  if (ts < cutoff) next
35
78
  }
36
79
 
37
- # 提取 role
38
- if (match($0, /"role"\s*:\s*"assistant"/)) {
39
- # 提取 content(可能是字符串或数组)
40
- if (match($0, /"content"\s*:\s*\[/)) {
41
- # 数组形式,提取所有 text 字段
80
+ if (match($0, /"role"[[:space:]]*:[[:space:]]*"assistant"/)) {
81
+ if (match($0, /"content"[[:space:]]*:[[:space:]]*\[/)) {
42
82
  gsub(/[^{]*\[/, "", $0)
43
83
  gsub(/\].*/, "", $0)
44
- while (match($0, /"text"\s*:\s*"[^"]*"/)) {
84
+ while (match($0, /"text"[[:space:]]*:[[:space:]]*"[^"]*"/)) {
45
85
  t = substr($0, RSTART, RLENGTH)
46
- gsub(/"text"\s*:\s*"/, "", t)
86
+ gsub(/"text"[[:space:]]*:[[:space:]]*"/, "", t)
47
87
  gsub(/"$/, "", t)
48
88
  if (t != "") print t
49
89
  $0 = substr($0, RSTART + RLENGTH)
50
90
  }
51
- } else if (match($0, /"content"\s*:\s*"\([^"]*\)"/)) {
91
+ } else if (match($0, /"content"[[:space:]]*:[[:space:]]*"[^"]*"/)) {
52
92
  t = substr($0, RSTART, RLENGTH)
53
- gsub(/"content"\s*:\s*"/, "", t)
93
+ gsub(/"content"[[:space:]]*:[[:space:]]*"/, "", t)
54
94
  gsub(/"$/, "", t)
55
95
  if (t != "") print t
56
96
  }
57
97
  }
58
98
  }
59
- ' "$transcript" 2>/dev/null | sort -u | tr '\n' ' ')
99
+ ' "$transcript" 2>/dev/null | sort -u | tr '\n' ' '
100
+ }
101
+
102
+ # 从 stdin JSON 提取 transcript 路径和最后一条消息
103
+ transcript=$(json_value "transcript_path")
104
+ last_msg=$(json_value "last_assistant_message")
105
+
106
+ all_text="$last_msg"
107
+
108
+ # 如果有 transcript 文件,提取最近 30 秒内的所有 assistant 消息
109
+ if [[ -n "$transcript" && -f "$transcript" ]]; then
110
+ # 计算 30 秒前的时间戳(毫秒)
111
+ cutoff=$(($(date +%s) * 1000 - 30000))
112
+
113
+ # 优先用 JSON parser,Node 不存在时回退到简易 awk。
114
+ extra=$(extract_recent_assistant_text "$transcript" "$cutoff")
60
115
 
61
116
  if [[ -n "$extra" ]]; then
62
117
  all_text="$extra"
package/main.go CHANGED
@@ -17,6 +17,7 @@ import (
17
17
  "os/exec"
18
18
  "os/signal"
19
19
  "path/filepath"
20
+ "regexp"
20
21
  "strings"
21
22
  "sync"
22
23
  "syscall"
@@ -25,11 +26,6 @@ import (
25
26
  "gopkg.in/natefinch/lumberjack.v2"
26
27
  )
27
28
 
28
- const (
29
- ttsMaxAttempts = 2
30
- ttsRetryBackoff = 400 * time.Millisecond
31
- )
32
-
33
29
  var configDir = os.ExpandEnv("$HOME/.config/iSpeak")
34
30
 
35
31
  var (
@@ -47,6 +43,25 @@ var tempDir string
47
43
 
48
44
  var errAlreadyRunning = errors.New("iSpeak already running")
49
45
 
46
+ var (
47
+ markdownLinkRe = regexp.MustCompile(`\[[^\]]+\]\(([^)]*)\)`)
48
+ absolutePathRe = regexp.MustCompile(`/(?:Users|private|tmp|var|opt|usr|bin|sbin|etc|Library|Applications)/\S+`)
49
+ commitHashRe = regexp.MustCompile(`\b[0-9a-f]{7,40}\b`)
50
+ uuidRe = regexp.MustCompile(`\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b`)
51
+ urlRe = regexp.MustCompile(`https?://\S+`)
52
+ ansiEscapeRe = regexp.MustCompile(`\x1b\[[0-9;]*[A-Za-z]`)
53
+ multiSpaceRe = regexp.MustCompile(`\s+`)
54
+ markdownListRe = regexp.MustCompile(`^\s*(?:[-*+]\s+|\d+[.)]\s+)`)
55
+ htmlTagRe = regexp.MustCompile(`<[^>]+>`)
56
+ codeFenceStartRe = regexp.MustCompile("^```")
57
+ artifactStartRe = regexp.MustCompile(`(?i)^<artifact\b`)
58
+ htmlDocumentLineRe = regexp.MustCompile(`(?i)^<!doctype html|^<html\b|^<head\b|^<body\b|^<style\b|^</`)
59
+ progressNoiseRe = regexp.MustCompile(`(?i)(^\s*\d{1,3}%\s*$|\d{1,3}%.*\d+(?:\.\d+)?\s*(?:kb|mb|gb)/s|\bETA\b|^\s*[-=]{3,}\s*$)`)
60
+ speedNoiseRe = regexp.MustCompile(`(?i)\d+(?:\.\d+)?\s*(?:kb|mb|gb)/s`)
61
+ etaNoiseRe = regexp.MustCompile(`(?i)\bETA\b|预计剩余|剩余时间`)
62
+ fileListNoiseRe = regexp.MustCompile(`(?i)\.(?:go|js|ts|tsx|jsx|json|md|yaml|yml|toml|sum|mod|lock|html|css|sh|plist|safetensors|mp3|wav|png|jpg|jpeg|pdf|docx)\b`)
63
+ )
64
+
50
65
  type StreamPlayer interface {
51
66
  Write(audio []byte) error
52
67
  CloseAndWait() error
@@ -54,9 +69,13 @@ type StreamPlayer interface {
54
69
  }
55
70
 
56
71
  type ffplayStreamPlayer struct {
57
- path string
58
- cmd *exec.Cmd
59
- stdin io.WriteCloser
72
+ path string
73
+ cmd *exec.Cmd
74
+
75
+ mu sync.Mutex
76
+ stdin io.WriteCloser
77
+ waitOnce sync.Once
78
+ waitErr error
60
79
  }
61
80
 
62
81
  func newDefaultStreamPlayer() (StreamPlayer, error) {
@@ -99,35 +118,55 @@ func (p *ffplayStreamPlayer) Write(audio []byte) error {
99
118
  if len(audio) == 0 {
100
119
  return nil
101
120
  }
102
- if _, err := p.stdin.Write(audio); err != nil {
121
+ p.mu.Lock()
122
+ stdin := p.stdin
123
+ p.mu.Unlock()
124
+ if stdin == nil {
125
+ return fmt.Errorf("播放器输入已关闭")
126
+ }
127
+ if _, err := stdin.Write(audio); err != nil {
103
128
  return fmt.Errorf("写入播放器失败: %w", err)
104
129
  }
105
130
  return nil
106
131
  }
107
132
 
108
133
  func (p *ffplayStreamPlayer) CloseAndWait() error {
109
- if p.stdin != nil {
110
- if err := p.stdin.Close(); err != nil {
134
+ p.mu.Lock()
135
+ stdin := p.stdin
136
+ p.stdin = nil
137
+ p.mu.Unlock()
138
+ if stdin != nil {
139
+ if err := stdin.Close(); err != nil {
111
140
  return fmt.Errorf("关闭播放器输入失败: %w", err)
112
141
  }
113
- p.stdin = nil
114
142
  }
115
- if err := p.cmd.Wait(); err != nil {
143
+ if err := p.wait(); err != nil {
116
144
  return fmt.Errorf("ffplay failed: %w", err)
117
145
  }
118
146
  return nil
119
147
  }
120
148
 
121
149
  func (p *ffplayStreamPlayer) Abort() error {
122
- if p.stdin != nil {
123
- _ = p.stdin.Close()
124
- p.stdin = nil
150
+ p.mu.Lock()
151
+ stdin := p.stdin
152
+ p.stdin = nil
153
+ p.mu.Unlock()
154
+ if stdin != nil {
155
+ _ = stdin.Close()
125
156
  }
126
157
  if p.cmd != nil && p.cmd.Process != nil {
127
158
  _ = p.cmd.Process.Kill()
128
- _ = p.cmd.Wait()
129
159
  }
130
- return nil
160
+ return p.wait()
161
+ }
162
+
163
+ func (p *ffplayStreamPlayer) wait() error {
164
+ p.waitOnce.Do(func() {
165
+ if p.cmd != nil {
166
+ p.waitErr = p.cmd.Wait()
167
+ }
168
+ })
169
+ return p.waitErr
131
170
  }
132
171
 
133
172
  type bufferedStreamPlayer struct {
@@ -183,8 +222,11 @@ type TaskEngine struct {
183
222
  mu sync.Mutex
184
223
 
185
224
  nextID uint64
225
+ latestID uint64
186
226
  tasks map[uint64]*Task
187
227
  pendingSynth []uint64
228
+ activeID uint64
229
+ activeCancel context.CancelFunc
188
230
 
189
231
  synthWake chan struct{}
190
232
 
@@ -207,7 +249,6 @@ func (e *TaskEngine) Start() {
207
249
 
208
250
  func (e *TaskEngine) Submit(text string, voice VoiceInfo, cfg Config) uint64 {
209
251
  e.mu.Lock()
210
- defer e.mu.Unlock()
211
252
 
212
253
  // 新任务进来先删所有未开始合成任务
213
254
  for _, id := range e.pendingSynth {
@@ -216,6 +257,12 @@ func (e *TaskEngine) Submit(text string, voice VoiceInfo, cfg Config) uint64 {
216
257
  }
217
258
  e.pendingSynth = e.pendingSynth[:0]
218
259
 
260
+ cancelActive := e.activeCancel
261
+ activeID := e.activeID
262
+ if activeID != 0 {
263
+ log.Printf("打断当前播报任务: id=%d", activeID)
264
+ }
265
+
219
266
  e.nextID++
220
267
  task := &Task{
221
268
  ID: e.nextID,
@@ -225,10 +272,16 @@ func (e *TaskEngine) Submit(text string, voice VoiceInfo, cfg Config) uint64 {
225
272
  Cfg: cfg,
226
273
  }
227
274
  e.tasks[task.ID] = task
275
+ e.latestID = task.ID
228
276
  e.pendingSynth = append(e.pendingSynth, task.ID)
229
277
  log.Printf("任务创建: id=%d text=%s", task.ID, text)
230
278
 
231
279
  notify(e.synthWake)
280
+ e.mu.Unlock()
281
+
282
+ if cancelActive != nil {
283
+ cancelActive()
284
+ }
232
285
  return task.ID
233
286
  }
234
287
 
@@ -252,24 +305,27 @@ func (e *TaskEngine) processSpeakTask(id uint64) {
252
305
  }
253
306
  }()
254
307
 
308
+ ctx, cancel := context.WithCancel(context.Background())
309
+ e.setActiveTask(id, cancel)
310
+ defer e.clearActiveTask(id)
311
+
255
312
  task, ok := e.getTask(id)
256
313
  if !ok {
257
314
  return
258
315
  }
259
-
260
- var lastErr error
261
- for i := 1; i <= ttsMaxAttempts; i++ {
262
- lastErr = e.speakOnce(context.Background(), task)
263
- if lastErr == nil {
264
- break
265
- }
266
- if i < ttsMaxAttempts {
267
- time.Sleep(ttsRetryBackoff)
268
- }
316
+ if !e.isLatestTask(id) {
317
+ cancel()
318
+ log.Printf("跳过过期播报任务: id=%d", id)
319
+ e.deleteTask(id)
320
+ return
269
321
  }
270
322
 
271
- if lastErr != nil {
272
- log.Printf("播报失败并删除任务: id=%d err=%v", id, lastErr)
323
+ if err := e.speakOnce(ctx, task); err != nil {
324
+ if errors.Is(err, context.Canceled) {
325
+ log.Printf("播报已打断并删除任务: id=%d", id)
326
+ } else {
327
+ log.Printf("播报失败并删除任务: id=%d err=%v", id, err)
328
+ }
273
329
  e.deleteTask(id)
274
330
  return
275
331
  }
@@ -299,9 +355,21 @@ func (e *TaskEngine) speakOnce(ctx context.Context, task *Task) error {
299
355
  return err
300
356
  }
301
357
  log.Printf("TTS 流结束: id=%d elapsed=%s", task.ID, time.Since(startedAt).Round(time.Millisecond))
302
- if err := player.CloseAndWait(); err != nil {
358
+
359
+ done := make(chan error, 1)
360
+ go func() {
361
+ done <- player.CloseAndWait()
362
+ }()
363
+ select {
364
+ case err := <-done:
365
+ if err != nil {
366
+ _ = player.Abort()
367
+ return err
368
+ }
369
+ case <-ctx.Done():
303
370
  _ = player.Abort()
304
- return err
371
+ <-done
372
+ return ctx.Err()
305
373
  }
306
374
  return nil
307
375
  }
@@ -344,6 +412,28 @@ func (e *TaskEngine) deleteTask(id uint64) {
344
412
  delete(e.tasks, id)
345
413
  }
346
414
 
415
+ func (e *TaskEngine) setActiveTask(id uint64, cancel context.CancelFunc) {
416
+ e.mu.Lock()
417
+ defer e.mu.Unlock()
418
+ e.activeID = id
419
+ e.activeCancel = cancel
420
+ }
421
+
422
+ func (e *TaskEngine) clearActiveTask(id uint64) {
423
+ e.mu.Lock()
424
+ defer e.mu.Unlock()
425
+ if e.activeID == id {
426
+ e.activeID = 0
427
+ e.activeCancel = nil
428
+ }
429
+ }
430
+
431
+ func (e *TaskEngine) isLatestTask(id uint64) bool {
432
+ e.mu.Lock()
433
+ defer e.mu.Unlock()
434
+ return e.latestID == id
435
+ }
436
+
347
437
  func notify(ch chan struct{}) {
348
438
  select {
349
439
  case ch <- struct{}{}:
@@ -387,6 +477,17 @@ func loadConfig() Config {
387
477
  }
388
478
  var cfg Config
389
479
  if json.Unmarshal(data, &cfg) == nil && cfg.APIKey != "" {
480
+ if err := validateConfig(cfg); err != nil {
481
+ log.Printf("配置文件无效: %s err=%v", p, err)
482
+ configCacheMu.Lock()
483
+ if configCacheValid {
484
+ cached := configCache
485
+ configCacheMu.Unlock()
486
+ return cached
487
+ }
488
+ configCacheMu.Unlock()
489
+ return cfg
490
+ }
390
491
  log.Printf("配置文件: %s", p)
391
492
  if cfg.DefaultVoice != nil {
392
493
  log.Printf("默认音色: %s (%s)", cfg.DefaultVoice.VoiceType, cfg.DefaultVoice.ResourceID)
@@ -637,29 +738,56 @@ func extractAudioBase64(event map[string]any) string {
637
738
  return ""
638
739
  }
639
740
 
640
- // 过滤格式符号,保留自然朗读文本
741
+ // 过滤格式符号,保留自然朗读文本。
742
+ // 顺序很重要:先跳过跨行块结构,再跳过整行噪声,最后清理行内符号。
641
743
  func cleanText(text string) string {
642
744
  var lines []string
643
- for _, line := range strings.Split(text, "\n") {
745
+ rawLines := strings.Split(text, "\n")
746
+ inCodeBlock := false
747
+ inArtifact := false
748
+ inMarkdownTable := false
749
+ for i := 0; i < len(rawLines); i++ {
750
+ line := rawLines[i]
644
751
  line = strings.TrimSpace(line)
645
- if strings.HasPrefix(line, "|---") || strings.HasPrefix(line, "|:---") {
752
+ if line == "" {
753
+ inMarkdownTable = false
754
+ continue
755
+ }
756
+ if codeFenceStartRe.MatchString(line) {
757
+ inCodeBlock = !inCodeBlock
758
+ continue
759
+ }
760
+ if inCodeBlock {
761
+ continue
762
+ }
763
+ if artifactStartRe.MatchString(line) {
764
+ inArtifact = !strings.Contains(strings.ToLower(line), "</artifact>")
646
765
  continue
647
766
  }
648
- if strings.HasPrefix(line, "---") && strings.Count(line, "-") > 3 {
767
+ if inArtifact {
768
+ if strings.Contains(strings.ToLower(line), "</artifact>") {
769
+ inArtifact = false
770
+ }
771
+ continue
772
+ }
773
+ if isMarkdownTableSeparator(line) {
774
+ if len(lines) > 0 && isMarkdownTableRow(strings.TrimSpace(rawLines[i-1])) {
775
+ lines = lines[:len(lines)-1]
776
+ }
777
+ inMarkdownTable = true
649
778
  continue
650
779
  }
651
- // 过滤纯表格分隔行(|---|---|、:---|:---| 等)
652
- if strings.Trim(line, "|-: ") == "" {
780
+ if inMarkdownTable {
781
+ if isMarkdownTableRow(line) {
782
+ continue
783
+ }
784
+ inMarkdownTable = false
785
+ }
786
+ if shouldSkipSpeechLine(line) {
653
787
  continue
654
788
  }
655
- cleaned := strings.NewReplacer(
656
- "**", "",
657
- "*", "",
658
- "`", "",
659
- "#", "",
660
- ">", "",
661
- ).Replace(line)
662
- cleaned = strings.TrimSpace(cleaned)
789
+
790
+ cleaned := cleanSpeechLine(line)
663
791
  if cleaned != "" {
664
792
  lines = append(lines, cleaned)
665
793
  }
@@ -667,6 +795,110 @@ func cleanText(text string) string {
667
795
  return strings.Join(lines, ",")
668
796
  }
669
797
 
798
+ func shouldSkipSpeechLine(line string) bool {
799
+ if isMarkdownTableSeparator(line) {
800
+ return true
801
+ }
802
+ if strings.HasPrefix(line, "---") && strings.Count(line, "-") > 3 {
803
+ return true
804
+ }
805
+ if htmlDocumentLineRe.MatchString(line) {
806
+ return true
807
+ }
808
+ if isProgressNoiseLine(line) {
809
+ return true
810
+ }
811
+ if isMostlyTableRow(line) {
812
+ return true
813
+ }
814
+ if isMostlyFileListLine(line) {
815
+ return true
816
+ }
817
+ return false
818
+ }
819
+
820
+ func isMarkdownTableSeparator(line string) bool {
821
+ line = strings.TrimSpace(line)
822
+ return strings.Contains(line, "|") && strings.Trim(line, "|-: ") == ""
823
+ }
824
+
825
+ func isMarkdownTableRow(line string) bool {
826
+ line = strings.TrimSpace(line)
827
+ return strings.Count(line, "|") >= 2
828
+ }
829
+
830
+ func cleanSpeechLine(line string) string {
831
+ // Markdown 链接必须在 URL 删除前处理,否则会丢掉链接标题。
832
+ line = ansiEscapeRe.ReplaceAllString(line, "")
833
+ line = markdownListRe.ReplaceAllString(line, "")
834
+ line = markdownLinkRe.ReplaceAllStringFunc(line, func(match string) string {
835
+ if end := strings.Index(match, "]"); end > 1 {
836
+ return match[1:end]
837
+ }
838
+ return ""
839
+ })
840
+ line = urlRe.ReplaceAllString(line, "")
841
+ line = absolutePathRe.ReplaceAllString(line, " 路径 ")
842
+ // UUID 必须在短 hash 前处理,避免先删短片段后破坏 UUID 识别。
843
+ line = uuidRe.ReplaceAllString(line, "")
844
+ line = commitHashRe.ReplaceAllString(line, "")
845
+ line = htmlTagRe.ReplaceAllString(line, "")
846
+ line = strings.NewReplacer(
847
+ "**", "",
848
+ "*", "",
849
+ "`", "",
850
+ "#", "",
851
+ ">", "",
852
+ "✅", "",
853
+ "❌", "",
854
+ "✓", "",
855
+ "✗", "",
856
+ "→", "到",
857
+ ).Replace(line)
858
+ line = strings.Trim(line, " \t-:|")
859
+ line = multiSpaceRe.ReplaceAllString(line, " ")
860
+ return strings.TrimSpace(line)
861
+ }
862
+
863
+ func isMostlyTableRow(line string) bool {
864
+ if !strings.Contains(line, "|") {
865
+ return false
866
+ }
867
+ return strings.Count(line, "|") >= 2 && len([]rune(line)) > 40
868
+ }
869
+
870
+ func isProgressNoiseLine(line string) bool {
871
+ if !progressNoiseRe.MatchString(line) {
872
+ return false
873
+ }
874
+ if speedNoiseRe.MatchString(line) || etaNoiseRe.MatchString(line) {
875
+ return true
876
+ }
877
+ return !containsCJK(line)
878
+ }
879
+
880
+ func isMostlyFileListLine(line string) bool {
881
+ if !fileListNoiseRe.MatchString(line) {
882
+ return false
883
+ }
884
+ if containsCJK(line) {
885
+ return false
886
+ }
887
+ if strings.Contains(line, ".safetensors") {
888
+ return true
889
+ }
890
+ return strings.Count(line, ".") >= 2 || strings.Contains(line, "/") || strings.Contains(line, " - ")
891
+ }
892
+
893
+ func containsCJK(s string) bool {
894
+ for _, r := range s {
895
+ if r >= '\u4e00' && r <= '\u9fff' {
896
+ return true
897
+ }
898
+ }
899
+ return false
900
+ }
901
+
670
902
  func main() {
671
903
  log.SetFlags(log.Ltime | log.Lshortfile)
672
904
 
@@ -779,8 +1011,26 @@ func validateConfig(cfg Config) error {
779
1011
  if cfg.Endpoint == "" {
780
1012
  return fmt.Errorf("endpoint 未设置")
781
1013
  }
782
- if cfg.DefaultVoice == nil || cfg.DefaultVoice.VoiceType == "" {
783
- return fmt.Errorf("defaultVoice 未设置")
1014
+ if err := validateVoiceInfo("defaultVoice", cfg.DefaultVoice); err != nil {
1015
+ return err
1016
+ }
1017
+ for source, voice := range cfg.SourceVoices {
1018
+ if err := validateVoiceInfo(fmt.Sprintf("sourceVoices.%s", source), voice); err != nil {
1019
+ return err
1020
+ }
1021
+ }
1022
+ return nil
1023
+ }
1024
+
1025
+ func validateVoiceInfo(name string, voice *VoiceInfo) error {
1026
+ if voice == nil {
1027
+ return fmt.Errorf("%s 未设置", name)
1028
+ }
1029
+ if voice.VoiceType == "" {
1030
+ return fmt.Errorf("%s.voice_type 未设置", name)
1031
+ }
1032
+ if voice.ResourceID == "" {
1033
+ return fmt.Errorf("%s.resourceId 未设置", name)
784
1034
  }
785
1035
  return nil
786
1036
  }
@@ -809,13 +1059,24 @@ func handleConnection(conn net.Conn, engine *TaskEngine) {
809
1059
  defer conn.Close()
810
1060
 
811
1061
  cfg := loadConfig()
1062
+ if err := validateConfig(cfg); err != nil {
1063
+ log.Printf("配置错误,跳过本次播报: %v", err)
1064
+ return
1065
+ }
812
1066
 
813
1067
  var sb strings.Builder
814
1068
  scanner := bufio.NewScanner(conn)
815
1069
  scanner.Buffer(make([]byte, 1*1024*1024), 1*1024*1024)
816
1070
  for scanner.Scan() {
1071
+ if sb.Len() > 0 {
1072
+ sb.WriteByte('\n')
1073
+ }
817
1074
  sb.WriteString(scanner.Text())
818
1075
  }
1076
+ if err := scanner.Err(); err != nil {
1077
+ log.Printf("读取 socket 消息失败: %v", err)
1078
+ return
1079
+ }
819
1080
 
820
1081
  text := strings.TrimSpace(sb.String())
821
1082
  if text == "" {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@xdfnet/ispeak",
3
- "version": "1.6.1",
3
+ "version": "1.6.3",
4
4
  "description": "Local macOS TTS daemon for AI coding assistants, powered by Volcengine streaming TTS.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://github.com/xdfnet/iSpeak#readme",
package/scripts/ispeak CHANGED
@@ -2,7 +2,7 @@
2
2
  # ispeak — iSpeak 控制命令
3
3
  set -euo pipefail
4
4
 
5
- VERSION="1.6.1"
5
+ VERSION="1.6.3"
6
6
  SOCK="$HOME/.config/iSpeak/ispeak.sock"
7
7
  PLIST="$HOME/Library/LaunchAgents/com.iSpeak.plist"
8
8
  CMD_NAME="$(basename "$0")"