@xdfnet/ispeak 1.6.6 → 1.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -205,6 +205,7 @@ fi
205
205
 
206
206
  - Codex `notify`:读 `$2`
207
207
  - Claude / Codex Stop Hook:读 stdin
208
+ - 如果 Codex 的 `notify` 和 `Stop` 同时启用,脚本会按 `turn_id` 去重,避免同一回合播两次
208
209
 
209
210
  Codex 文本字段优先级:
210
211
 
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # iSpeak
2
2
 
3
- ![Version](https://img.shields.io/badge/version-1.6.6-blue)
3
+ ![Version](https://img.shields.io/badge/version-1.6.7-blue)
4
4
  [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
5
5
  [![Go Version](https://img.shields.io/badge/Go-1.26-blue)](https://golang.org/dl/)
6
6
  ![Platform](https://img.shields.io/badge/platform-macOS-green)
@@ -39,11 +39,7 @@ ispeak-codex "构建完成,耗时 12 秒"
39
39
  npm i -g @xdfnet/ispeak
40
40
  ```
41
41
 
42
- 当前 npm 安装会在本机编译 `ispeakd`,需要已安装 Go。没有 `ffplay` 时会自动回退 `afplay`;推荐安装 `ffmpeg` 获得流式播放:
43
-
44
- ```bash
45
- brew install ffmpeg
46
- ```
42
+ 当前 npm 安装会在本机编译 `ispeakd`,需要已安装 Go。主播放链路使用 macOS 原生 `AVAudioEngine`,不依赖 `ffmpeg`。失败时直接记录日志并删除任务。
47
43
 
48
44
  **源码安装:**
49
45
 
@@ -76,11 +72,11 @@ ispeak "iSpeak 准备好了"
76
72
  │ │ │
77
73
  │ ▼ │
78
74
  │ 单 Worker 流式链路 │
79
- │ (SSE audio chunk → 播放器 stdin
75
+ │ (SSE PCM chunk → AVAudioEngine
80
76
  │ │ │
81
77
  │ ▼ │
82
- 流式播放器
83
- (优先 ffplay stdin,无 ffplay 回退 afplay)
78
+ 错误处理
79
+ (失败时记录日志并删除任务)
84
80
  └─────────────────────────────────────────────────────┘
85
81
  ```
86
82
 
@@ -194,6 +190,8 @@ notify = ["bash", "/Users/你的用户名/.config/iSpeak/hook-speak.sh", "codex"
194
190
  }
195
191
  ```
196
192
 
193
+ `hook-speak.sh` 会按 `turn_id` 做一次去重,所以即使 `notify` 和 `Stop` 都启用,同一回合也只会播一次。
194
+
197
195
  ## 开发命令
198
196
 
199
197
  ```bash
@@ -6,6 +6,7 @@
6
6
  SOURCE="${1:-claude}"
7
7
  SOCK="$HOME/.config/iSpeak/ispeak.sock"
8
8
  LOG="$HOME/.config/iSpeak/hook.log"
9
+ STATE_FILE="$HOME/.config/iSpeak/hook.last"
9
10
 
10
11
  # Codex `notify` 会把 JSON 作为最后一个参数传入;
11
12
  # Claude/Claude 风格 Stop Hook 会把 JSON 写到 stdin。
@@ -17,26 +18,41 @@ input_file=$(mktemp)
17
18
  trap 'rm -f "$input_file"' EXIT
18
19
  printf "%s" "$input" > "$input_file"
19
20
 
20
- text=$(SOURCE="$SOURCE" HOOK_INPUT_FILE="$input_file" node <<'NODE' 2>/dev/null
21
+ result=$(SOURCE="$SOURCE" HOOK_INPUT_FILE="$input_file" HOOK_STATE_FILE="$STATE_FILE" node <<'NODE' 2>/dev/null
21
22
  const fs = require("fs");
23
+ const crypto = require("crypto");
22
24
 
23
- {
25
+ (() => {
24
26
  const input = readFile(process.env.HOOK_INPUT_FILE || "");
25
27
  const payload = parseJSON(input) || {};
26
28
  const source = process.env.SOURCE || "";
27
- const text = source.startsWith("codex")
29
+ const stateFile = process.env.HOOK_STATE_FILE || "";
30
+ const result = source.startsWith("codex")
28
31
  ? lastCodexAssistant(payload)
29
32
  : lastClaudeAssistant(payload);
30
33
 
31
- if (text) process.stdout.write(text);
32
- }
34
+ if (!result.text) {
35
+ return;
36
+ }
37
+
38
+ if (stateFile && result.turnId) {
39
+ if (isDuplicateTurn(stateFile, source, result.turnId)) {
40
+ return;
41
+ }
42
+ saveTurn(stateFile, source, result.turnId, result.text);
43
+ } else if (stateFile) {
44
+ saveTurn(stateFile, source, "", result.text);
45
+ }
46
+
47
+ process.stdout.write(result.text);
48
+ })();
33
49
 
34
50
  function lastClaudeAssistant(payload) {
35
51
  const direct = firstString(payload.last_assistant_message, payload.message);
36
- if (direct) return direct;
52
+ if (direct) return { text: direct, turnId: extractTurnId(payload) };
37
53
 
38
54
  const transcript = firstString(payload.transcript_path, payload.transcriptPath);
39
- return transcript ? lastAssistantFromTranscript(transcript, "claude") : "";
55
+ return transcript ? lastAssistantFromTranscript(transcript, "claude") : { text: "", turnId: extractTurnId(payload) };
40
56
  }
41
57
 
42
58
  function lastCodexAssistant(payload) {
@@ -47,14 +63,14 @@ function lastCodexAssistant(payload) {
47
63
  payload.message,
48
64
  payload.lastMessage
49
65
  );
50
- if (direct) return direct;
66
+ if (direct) return { text: direct, turnId: extractTurnId(payload) };
51
67
 
52
68
  const transcript = firstString(
53
69
  payload.transcript_path,
54
70
  payload.transcriptPath,
55
71
  payload["transcript-path"]
56
72
  );
57
- return transcript ? lastAssistantFromTranscript(transcript, "codex") : "";
73
+ return transcript ? lastAssistantFromTranscript(transcript, "codex") : { text: "", turnId: extractTurnId(payload) };
58
74
  }
59
75
 
60
76
  function readFile(file) {
@@ -98,6 +114,7 @@ function lastAssistantFromTranscript(file, source) {
98
114
  }
99
115
 
100
116
  let last = "";
117
+ let turnId = "";
101
118
  for (const line of data.split(/\r?\n/)) {
102
119
  if (!line.trim()) continue;
103
120
  const event = parseJSON(line);
@@ -119,26 +136,64 @@ function lastAssistantFromTranscript(file, source) {
119
136
  event.payload.role === "assistant"
120
137
  ) {
121
138
  last = collectText(event.payload.content) || last;
139
+ turnId = turnId || extractTurnId(event) || extractTurnId(event.payload);
122
140
  }
123
141
  }
124
- return last;
142
+ return { text: last, turnId };
143
+ }
144
+
145
+ function extractTurnId(payload) {
146
+ return firstString(
147
+ payload.turn_id,
148
+ payload.turnId,
149
+ payload["turn-id"],
150
+ payload.session_id,
151
+ payload.sessionId,
152
+ payload["session-id"],
153
+ payload.thread_id,
154
+ payload.threadId,
155
+ payload["thread-id"]
156
+ );
157
+ }
158
+
159
+ function isDuplicateTurn(stateFile, source, turnId) {
160
+ const current = `${source}:${turnId}`;
161
+ try {
162
+ return fs.readFileSync(stateFile, "utf8").trim() === current;
163
+ } catch {
164
+ return false;
165
+ }
166
+ }
167
+
168
+ function saveTurn(stateFile, source, turnId, text) {
169
+ const current = `${source}:${turnId || textHash(text)}`;
170
+ try {
171
+ fs.mkdirSync(require("path").dirname(stateFile), { recursive: true });
172
+ fs.writeFileSync(stateFile, current, "utf8");
173
+ } catch {
174
+ // 去重失败不影响播报。
175
+ }
176
+ }
177
+
178
+ function textHash(text) {
179
+ return crypto.createHash("sha1").update(text, "utf8").digest("hex");
125
180
  }
126
181
 
127
182
  NODE
128
183
  )
129
184
 
130
185
  if [[ "$ISPEAK_HOOK_PRINT_TEXT" == "1" ]]; then
131
- printf "%s" "$text"
186
+ printf "%s" "$result"
132
187
  exit 0
133
188
  fi
134
189
 
135
190
  echo "=== $(date) ===" >> "$LOG"
136
191
  echo "SOURCE: $SOURCE" >> "$LOG"
137
- echo "TEXT_LEN: ${#text}" >> "$LOG"
138
- echo "PREVIEW: ${text:0:150}" >> "$LOG"
192
+ echo "TEXT_LEN: ${#result}" >> "$LOG"
193
+ echo "PREVIEW: ${result:0:150}" >> "$LOG"
139
194
 
140
- if [[ -n "$text" && -S "$SOCK" ]]; then
141
- printf "{source:%s}%s" "$SOURCE" "$text" | nc -U -w5 "$SOCK" 2>> "$LOG"
195
+ if [[ -n "$result" && -S "$SOCK" ]]; then
196
+ printf "{source:%s}%s" "$SOURCE" "$result" | nc -U -w5 "$SOCK" 2>> "$LOG"
142
197
  echo "SPOKE: OK" >> "$LOG"
143
198
  else
144
199
  echo "SPOKE: SKIP" >> "$LOG"
package/main.go CHANGED
@@ -1,5 +1,5 @@
1
1
  // ttsd — 独立 TTS 播报守护进程
2
- // 监听 Unix Socket,收到文本 → 字节跳动 TTS SSE → 流式播放
2
+ // 监听 Unix Socket,收到文本 → 字节跳动 TTS SSE/PCM原生流式播放
3
3
  package main
4
4
 
5
5
  import (
@@ -14,7 +14,6 @@ import (
14
14
  "net"
15
15
  "net/http"
16
16
  "os"
17
- "os/exec"
18
17
  "os/signal"
19
18
  "path/filepath"
20
19
  "strings"
@@ -48,137 +47,6 @@ type StreamPlayer interface {
48
47
  Abort() error
49
48
  }
50
49
 
51
- type ffplayStreamPlayer struct {
52
- path string
53
- cmd *exec.Cmd
54
-
55
- mu sync.Mutex
56
- stdin io.WriteCloser
57
- waitOnce sync.Once
58
- waitErr error
59
- }
60
-
61
- func newDefaultStreamPlayer() (StreamPlayer, error) {
62
- if path, ok := findExecutable("ffplay", "/opt/homebrew/bin/ffplay", "/usr/local/bin/ffplay"); ok {
63
- log.Printf("播放器模式: ffplay 流式 stdin (%s)", path)
64
- return newFFplayStreamPlayer(path)
65
- }
66
-
67
- log.Printf("播放器模式: afplay 完整音频 fallback")
68
- return &bufferedStreamPlayer{}, nil
69
- }
70
-
71
- func findExecutable(name string, candidates ...string) (string, bool) {
72
- if path, err := exec.LookPath(name); err == nil {
73
- return path, true
74
- }
75
- for _, path := range candidates {
76
- if st, err := os.Stat(path); err == nil && !st.IsDir() && st.Mode()&0111 != 0 {
77
- return path, true
78
- }
79
- }
80
- return "", false
81
- }
82
-
83
- func newFFplayStreamPlayer(path string) (*ffplayStreamPlayer, error) {
84
- cmd := exec.Command(path, "-nodisp", "-autoexit", "-loglevel", "error", "-i", "pipe:0")
85
- stdin, err := cmd.StdinPipe()
86
- if err != nil {
87
- return nil, err
88
- }
89
- cmd.Stderr = os.Stderr
90
- if err := cmd.Start(); err != nil {
91
- _ = stdin.Close()
92
- return nil, err
93
- }
94
- return &ffplayStreamPlayer{path: path, cmd: cmd, stdin: stdin}, nil
95
- }
96
-
97
- func (p *ffplayStreamPlayer) Write(audio []byte) error {
98
- if len(audio) == 0 {
99
- return nil
100
- }
101
- p.mu.Lock()
102
- stdin := p.stdin
103
- p.mu.Unlock()
104
- if stdin == nil {
105
- return fmt.Errorf("播放器输入已关闭")
106
- }
107
- if _, err := stdin.Write(audio); err != nil {
108
- return fmt.Errorf("写入播放器失败: %w", err)
109
- }
110
- return nil
111
- }
112
-
113
- func (p *ffplayStreamPlayer) CloseAndWait() error {
114
- p.mu.Lock()
115
- stdin := p.stdin
116
- p.stdin = nil
117
- p.mu.Unlock()
118
- if stdin != nil {
119
- if err := stdin.Close(); err != nil {
120
- return fmt.Errorf("关闭播放器输入失败: %w", err)
121
- }
122
- }
123
- if err := p.wait(); err != nil {
124
- return fmt.Errorf("ffplay failed: %w", err)
125
- }
126
- return nil
127
- }
128
-
129
- func (p *ffplayStreamPlayer) Abort() error {
130
- p.mu.Lock()
131
- stdin := p.stdin
132
- p.stdin = nil
133
- p.mu.Unlock()
134
- if stdin != nil {
135
- _ = stdin.Close()
136
- }
137
- if p.cmd != nil && p.cmd.Process != nil {
138
- _ = p.cmd.Process.Kill()
139
- }
140
- return p.wait()
141
- }
142
-
143
- func (p *ffplayStreamPlayer) wait() error {
144
- p.waitOnce.Do(func() {
145
- if p.cmd != nil {
146
- p.waitErr = p.cmd.Wait()
147
- }
148
- })
149
- return p.waitErr
150
- }
151
-
152
- type bufferedStreamPlayer struct {
153
- chunks [][]byte
154
- }
155
-
156
- func (p *bufferedStreamPlayer) Write(audio []byte) error {
157
- if len(audio) == 0 {
158
- return nil
159
- }
160
- chunk := append([]byte(nil), audio...)
161
- p.chunks = append(p.chunks, chunk)
162
- return nil
163
- }
164
-
165
- func (p *bufferedStreamPlayer) CloseAndWait() error {
166
- total := 0
167
- for _, chunk := range p.chunks {
168
- total += len(chunk)
169
- }
170
- audio := make([]byte, 0, total)
171
- for _, chunk := range p.chunks {
172
- audio = append(audio, chunk...)
173
- }
174
- return playAudio(audio)
175
- }
176
-
177
- func (p *bufferedStreamPlayer) Abort() error {
178
- p.chunks = nil
179
- return nil
180
- }
181
-
182
50
  // 任务状态
183
51
  // 生命周期:pending -> running -> delete
184
52
  type TaskStatus int
@@ -312,13 +180,13 @@ func (e *TaskEngine) runTransaction(task *Task) error {
312
180
 
313
181
  if err := e.synthesizeStreamFn(context.Background(), task.Cfg, task.Text, &task.Voice, onAudio); err != nil {
314
182
  _ = player.Abort()
315
- return err
183
+ return fmt.Errorf("TTS 合成失败: id=%d: %w", task.ID, err)
316
184
  }
317
185
  log.Printf("TTS 流结束: id=%d elapsed=%s", task.ID, time.Since(startedAt).Round(time.Millisecond))
318
186
 
319
187
  if err := player.CloseAndWait(); err != nil {
320
188
  _ = player.Abort()
321
- return err
189
+ return fmt.Errorf("播放器失败: id=%d: %w", task.ID, err)
322
190
  }
323
191
  return nil
324
192
  }
@@ -476,28 +344,6 @@ type ttsAudioParams struct {
476
344
  SampleRate int `json:"sample_rate"`
477
345
  }
478
346
 
479
- // 调用字节跳动 TTS API,返回完整 MP3 音频数据。保留给测试和 fallback 使用。
480
- func synthesize(ctx context.Context, cfg Config, text string, voice *VoiceInfo) ([]byte, error) {
481
- var chunks [][]byte
482
- if err := synthesizeStream(ctx, cfg, text, voice, func(audio []byte) error {
483
- chunk := append([]byte(nil), audio...)
484
- chunks = append(chunks, chunk)
485
- return nil
486
- }); err != nil {
487
- return nil, err
488
- }
489
-
490
- total := 0
491
- for _, c := range chunks {
492
- total += len(c)
493
- }
494
- result := make([]byte, 0, total)
495
- for _, c := range chunks {
496
- result = append(result, c...)
497
- }
498
- return result, nil
499
- }
500
-
501
347
  // 调用字节跳动 TTS API,边解析 SSE 边回调 MP3 音频块
502
348
  func synthesizeStream(ctx context.Context, cfg Config, text string, voice *VoiceInfo, onAudio func([]byte) error) error {
503
349
  speaker := voice.VoiceType
@@ -512,8 +358,8 @@ func synthesizeStream(ctx context.Context, cfg Config, text string, voice *Voice
512
358
  Text: text,
513
359
  Speaker: speaker,
514
360
  AudioParams: ttsAudioParams{
515
- Format: "mp3",
516
- SampleRate: 24000,
361
+ Format: "pcm",
362
+ SampleRate: 48000,
517
363
  },
518
364
  },
519
365
  }
@@ -848,21 +694,6 @@ func validateVoiceInfo(name string, voice *VoiceInfo) error {
848
694
  return nil
849
695
  }
850
696
 
851
- func playAudio(data []byte) error {
852
- tmpFile := filepath.Join(tempDir, fmt.Sprintf("ttsd-%d.mp3", time.Now().UnixNano()))
853
- if err := os.WriteFile(tmpFile, data, 0644); err != nil {
854
- return fmt.Errorf("写入临时文件失败: %w", err)
855
- }
856
- defer os.Remove(tmpFile)
857
-
858
- cmd := exec.Command("/usr/bin/afplay", tmpFile)
859
- log.Printf("播放开始: %s", filepath.Base(tmpFile))
860
- if err := cmd.Run(); err != nil {
861
- return fmt.Errorf("播放失败: %w", err)
862
- }
863
- return nil
864
- }
865
-
866
697
  func handleConnection(conn net.Conn, engine *TaskEngine) {
867
698
  defer func() {
868
699
  if r := recover(); r != nil {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@xdfnet/ispeak",
3
- "version": "1.6.6",
3
+ "version": "1.6.7",
4
4
  "description": "Local macOS TTS daemon for AI coding assistants, powered by Volcengine streaming TTS.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://github.com/xdfnet/iSpeak#readme",
package/scripts/ispeak CHANGED
@@ -2,7 +2,7 @@
2
2
  # ispeak — iSpeak 控制命令
3
3
  set -euo pipefail
4
4
 
5
- VERSION="1.6.6"
5
+ VERSION="1.6.7"
6
6
  SOCK="$HOME/.config/iSpeak/ispeak.sock"
7
7
  PLIST="$HOME/Library/LaunchAgents/com.iSpeak.plist"
8
8
  CMD_NAME="$(basename "$0")"