@xdfnet/ispeak 1.6.6 → 1.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Docs/HOOK_TEXT_EXTRACTION.md +1 -0
- package/README.md +7 -9
- package/configs/hook-speak.sh +70 -15
- package/main.go +5 -174
- package/package.json +1 -1
- package/scripts/ispeak +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# iSpeak
|
|
2
2
|
|
|
3
|
-

|
|
4
4
|
[](https://opensource.org/licenses/MIT)
|
|
5
5
|
[](https://golang.org/dl/)
|
|
6
6
|

|
|
@@ -39,11 +39,7 @@ ispeak-codex "构建完成,耗时 12 秒"
|
|
|
39
39
|
npm i -g @xdfnet/ispeak
|
|
40
40
|
```
|
|
41
41
|
|
|
42
|
-
当前 npm 安装会在本机编译 `ispeakd`,需要已安装 Go
|
|
43
|
-
|
|
44
|
-
```bash
|
|
45
|
-
brew install ffmpeg
|
|
46
|
-
```
|
|
42
|
+
当前 npm 安装会在本机编译 `ispeakd`,需要已安装 Go。主播放链路使用 macOS 原生 `AVAudioEngine`,不依赖 `ffmpeg`。失败时直接记录日志并删除任务。
|
|
47
43
|
|
|
48
44
|
**源码安装:**
|
|
49
45
|
|
|
@@ -76,11 +72,11 @@ ispeak "iSpeak 准备好了"
|
|
|
76
72
|
│ │ │
|
|
77
73
|
│ ▼ │
|
|
78
74
|
│ 单 Worker 流式链路 │
|
|
79
|
-
│ (SSE
|
|
75
|
+
│ (SSE PCM chunk → AVAudioEngine) │
|
|
80
76
|
│ │ │
|
|
81
77
|
│ ▼ │
|
|
82
|
-
│
|
|
83
|
-
│
|
|
78
|
+
│ 错误处理 │
|
|
79
|
+
│ (失败时记录日志并删除任务) │
|
|
84
80
|
└─────────────────────────────────────────────────────┘
|
|
85
81
|
```
|
|
86
82
|
|
|
@@ -194,6 +190,8 @@ notify = ["bash", "/Users/你的用户名/.config/iSpeak/hook-speak.sh", "codex"
|
|
|
194
190
|
}
|
|
195
191
|
```
|
|
196
192
|
|
|
193
|
+
`hook-speak.sh` 会按 `turn_id` 做一次去重,所以即使 `notify` 和 `Stop` 都启用,同一回合也只会播一次。
|
|
194
|
+
|
|
197
195
|
## 开发命令
|
|
198
196
|
|
|
199
197
|
```bash
|
package/configs/hook-speak.sh
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
SOURCE="${1:-claude}"
|
|
7
7
|
SOCK="$HOME/.config/iSpeak/ispeak.sock"
|
|
8
8
|
LOG="$HOME/.config/iSpeak/hook.log"
|
|
9
|
+
STATE_FILE="$HOME/.config/iSpeak/hook.last"
|
|
9
10
|
|
|
10
11
|
# Codex `notify` 会把 JSON 作为最后一个参数传入;
|
|
11
12
|
# Claude/Claude 风格 Stop Hook 会把 JSON 写到 stdin。
|
|
@@ -17,26 +18,41 @@ input_file=$(mktemp)
|
|
|
17
18
|
trap 'rm -f "$input_file"' EXIT
|
|
18
19
|
printf "%s" "$input" > "$input_file"
|
|
19
20
|
|
|
20
|
-
|
|
21
|
+
result=$(SOURCE="$SOURCE" HOOK_INPUT_FILE="$input_file" HOOK_STATE_FILE="$STATE_FILE" node <<'NODE' 2>/dev/null
|
|
21
22
|
const fs = require("fs");
|
|
23
|
+
const crypto = require("crypto");
|
|
22
24
|
|
|
23
|
-
{
|
|
25
|
+
(() => {
|
|
24
26
|
const input = readFile(process.env.HOOK_INPUT_FILE || "");
|
|
25
27
|
const payload = parseJSON(input) || {};
|
|
26
28
|
const source = process.env.SOURCE || "";
|
|
27
|
-
const
|
|
29
|
+
const stateFile = process.env.HOOK_STATE_FILE || "";
|
|
30
|
+
const result = source.startsWith("codex")
|
|
28
31
|
? lastCodexAssistant(payload)
|
|
29
32
|
: lastClaudeAssistant(payload);
|
|
30
33
|
|
|
31
|
-
if (text)
|
|
32
|
-
|
|
34
|
+
if (!result.text) {
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (stateFile && result.turnId) {
|
|
39
|
+
if (isDuplicateTurn(stateFile, source, result.turnId)) {
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
saveTurn(stateFile, source, result.turnId, result.text);
|
|
43
|
+
} else if (stateFile) {
|
|
44
|
+
saveTurn(stateFile, source, "", result.text);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
process.stdout.write(result.text);
|
|
48
|
+
})();
|
|
33
49
|
|
|
34
50
|
function lastClaudeAssistant(payload) {
|
|
35
51
|
const direct = firstString(payload.last_assistant_message, payload.message);
|
|
36
|
-
if (direct) return direct;
|
|
52
|
+
if (direct) return { text: direct, turnId: extractTurnId(payload) };
|
|
37
53
|
|
|
38
54
|
const transcript = firstString(payload.transcript_path, payload.transcriptPath);
|
|
39
|
-
return transcript ? lastAssistantFromTranscript(transcript, "claude") : "";
|
|
55
|
+
return transcript ? lastAssistantFromTranscript(transcript, "claude") : { text: "", turnId: extractTurnId(payload) };
|
|
40
56
|
}
|
|
41
57
|
|
|
42
58
|
function lastCodexAssistant(payload) {
|
|
@@ -47,14 +63,14 @@ function lastCodexAssistant(payload) {
|
|
|
47
63
|
payload.message,
|
|
48
64
|
payload.lastMessage
|
|
49
65
|
);
|
|
50
|
-
if (direct) return direct;
|
|
66
|
+
if (direct) return { text: direct, turnId: extractTurnId(payload) };
|
|
51
67
|
|
|
52
68
|
const transcript = firstString(
|
|
53
69
|
payload.transcript_path,
|
|
54
70
|
payload.transcriptPath,
|
|
55
71
|
payload["transcript-path"]
|
|
56
72
|
);
|
|
57
|
-
return transcript ? lastAssistantFromTranscript(transcript, "codex") : "";
|
|
73
|
+
return transcript ? lastAssistantFromTranscript(transcript, "codex") : { text: "", turnId: extractTurnId(payload) };
|
|
58
74
|
}
|
|
59
75
|
|
|
60
76
|
function readFile(file) {
|
|
@@ -98,6 +114,7 @@ function lastAssistantFromTranscript(file, source) {
|
|
|
98
114
|
}
|
|
99
115
|
|
|
100
116
|
let last = "";
|
|
117
|
+
let turnId = "";
|
|
101
118
|
for (const line of data.split(/\r?\n/)) {
|
|
102
119
|
if (!line.trim()) continue;
|
|
103
120
|
const event = parseJSON(line);
|
|
@@ -119,26 +136,64 @@ function lastAssistantFromTranscript(file, source) {
|
|
|
119
136
|
event.payload.role === "assistant"
|
|
120
137
|
) {
|
|
121
138
|
last = collectText(event.payload.content) || last;
|
|
139
|
+
turnId = turnId || extractTurnId(event) || extractTurnId(event.payload);
|
|
122
140
|
}
|
|
123
141
|
}
|
|
124
|
-
return last;
|
|
142
|
+
return { text: last, turnId };
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function extractTurnId(payload) {
|
|
146
|
+
return firstString(
|
|
147
|
+
payload.turn_id,
|
|
148
|
+
payload.turnId,
|
|
149
|
+
payload["turn-id"],
|
|
150
|
+
payload.session_id,
|
|
151
|
+
payload.sessionId,
|
|
152
|
+
payload["session-id"],
|
|
153
|
+
payload.thread_id,
|
|
154
|
+
payload.threadId,
|
|
155
|
+
payload["thread-id"]
|
|
156
|
+
);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function isDuplicateTurn(stateFile, source, turnId) {
|
|
160
|
+
const current = `${source}:${turnId}`;
|
|
161
|
+
try {
|
|
162
|
+
return fs.readFileSync(stateFile, "utf8").trim() === current;
|
|
163
|
+
} catch {
|
|
164
|
+
return false;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function saveTurn(stateFile, source, turnId, text) {
|
|
169
|
+
const current = `${source}:${turnId || textHash(text)}`;
|
|
170
|
+
try {
|
|
171
|
+
fs.mkdirSync(require("path").dirname(stateFile), { recursive: true });
|
|
172
|
+
fs.writeFileSync(stateFile, current, "utf8");
|
|
173
|
+
} catch {
|
|
174
|
+
// 去重失败不影响播报。
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function textHash(text) {
|
|
179
|
+
return crypto.createHash("sha1").update(text, "utf8").digest("hex");
|
|
125
180
|
}
|
|
126
181
|
|
|
127
182
|
NODE
|
|
128
183
|
)
|
|
129
184
|
|
|
130
185
|
if [[ "$ISPEAK_HOOK_PRINT_TEXT" == "1" ]]; then
|
|
131
|
-
printf "%s" "$
|
|
186
|
+
printf "%s" "$result"
|
|
132
187
|
exit 0
|
|
133
188
|
fi
|
|
134
189
|
|
|
135
190
|
echo "=== $(date) ===" >> "$LOG"
|
|
136
191
|
echo "SOURCE: $SOURCE" >> "$LOG"
|
|
137
|
-
echo "TEXT_LEN: ${#
|
|
138
|
-
echo "PREVIEW: ${
|
|
192
|
+
echo "TEXT_LEN: ${#result}" >> "$LOG"
|
|
193
|
+
echo "PREVIEW: ${result:0:150}" >> "$LOG"
|
|
139
194
|
|
|
140
|
-
if [[ -n "$
|
|
141
|
-
printf "{source:%s}%s" "$SOURCE" "$
|
|
195
|
+
if [[ -n "$result" && -S "$SOCK" ]]; then
|
|
196
|
+
printf "{source:%s}%s" "$SOURCE" "$result" | nc -U -w5 "$SOCK" 2>> "$LOG"
|
|
142
197
|
echo "SPOKE: OK" >> "$LOG"
|
|
143
198
|
else
|
|
144
199
|
echo "SPOKE: SKIP" >> "$LOG"
|
package/main.go
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// ttsd — 独立 TTS 播报守护进程
|
|
2
|
-
// 监听 Unix Socket,收到文本 → 字节跳动 TTS SSE →
|
|
2
|
+
// 监听 Unix Socket,收到文本 → 字节跳动 TTS SSE/PCM → 原生流式播放
|
|
3
3
|
package main
|
|
4
4
|
|
|
5
5
|
import (
|
|
@@ -14,7 +14,6 @@ import (
|
|
|
14
14
|
"net"
|
|
15
15
|
"net/http"
|
|
16
16
|
"os"
|
|
17
|
-
"os/exec"
|
|
18
17
|
"os/signal"
|
|
19
18
|
"path/filepath"
|
|
20
19
|
"strings"
|
|
@@ -48,137 +47,6 @@ type StreamPlayer interface {
|
|
|
48
47
|
Abort() error
|
|
49
48
|
}
|
|
50
49
|
|
|
51
|
-
type ffplayStreamPlayer struct {
|
|
52
|
-
path string
|
|
53
|
-
cmd *exec.Cmd
|
|
54
|
-
|
|
55
|
-
mu sync.Mutex
|
|
56
|
-
stdin io.WriteCloser
|
|
57
|
-
waitOnce sync.Once
|
|
58
|
-
waitErr error
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
func newDefaultStreamPlayer() (StreamPlayer, error) {
|
|
62
|
-
if path, ok := findExecutable("ffplay", "/opt/homebrew/bin/ffplay", "/usr/local/bin/ffplay"); ok {
|
|
63
|
-
log.Printf("播放器模式: ffplay 流式 stdin (%s)", path)
|
|
64
|
-
return newFFplayStreamPlayer(path)
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
log.Printf("播放器模式: afplay 完整音频 fallback")
|
|
68
|
-
return &bufferedStreamPlayer{}, nil
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
func findExecutable(name string, candidates ...string) (string, bool) {
|
|
72
|
-
if path, err := exec.LookPath(name); err == nil {
|
|
73
|
-
return path, true
|
|
74
|
-
}
|
|
75
|
-
for _, path := range candidates {
|
|
76
|
-
if st, err := os.Stat(path); err == nil && !st.IsDir() && st.Mode()&0111 != 0 {
|
|
77
|
-
return path, true
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
return "", false
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
func newFFplayStreamPlayer(path string) (*ffplayStreamPlayer, error) {
|
|
84
|
-
cmd := exec.Command(path, "-nodisp", "-autoexit", "-loglevel", "error", "-i", "pipe:0")
|
|
85
|
-
stdin, err := cmd.StdinPipe()
|
|
86
|
-
if err != nil {
|
|
87
|
-
return nil, err
|
|
88
|
-
}
|
|
89
|
-
cmd.Stderr = os.Stderr
|
|
90
|
-
if err := cmd.Start(); err != nil {
|
|
91
|
-
_ = stdin.Close()
|
|
92
|
-
return nil, err
|
|
93
|
-
}
|
|
94
|
-
return &ffplayStreamPlayer{path: path, cmd: cmd, stdin: stdin}, nil
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
func (p *ffplayStreamPlayer) Write(audio []byte) error {
|
|
98
|
-
if len(audio) == 0 {
|
|
99
|
-
return nil
|
|
100
|
-
}
|
|
101
|
-
p.mu.Lock()
|
|
102
|
-
stdin := p.stdin
|
|
103
|
-
p.mu.Unlock()
|
|
104
|
-
if stdin == nil {
|
|
105
|
-
return fmt.Errorf("播放器输入已关闭")
|
|
106
|
-
}
|
|
107
|
-
if _, err := stdin.Write(audio); err != nil {
|
|
108
|
-
return fmt.Errorf("写入播放器失败: %w", err)
|
|
109
|
-
}
|
|
110
|
-
return nil
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
func (p *ffplayStreamPlayer) CloseAndWait() error {
|
|
114
|
-
p.mu.Lock()
|
|
115
|
-
stdin := p.stdin
|
|
116
|
-
p.stdin = nil
|
|
117
|
-
p.mu.Unlock()
|
|
118
|
-
if stdin != nil {
|
|
119
|
-
if err := stdin.Close(); err != nil {
|
|
120
|
-
return fmt.Errorf("关闭播放器输入失败: %w", err)
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
if err := p.wait(); err != nil {
|
|
124
|
-
return fmt.Errorf("ffplay failed: %w", err)
|
|
125
|
-
}
|
|
126
|
-
return nil
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
func (p *ffplayStreamPlayer) Abort() error {
|
|
130
|
-
p.mu.Lock()
|
|
131
|
-
stdin := p.stdin
|
|
132
|
-
p.stdin = nil
|
|
133
|
-
p.mu.Unlock()
|
|
134
|
-
if stdin != nil {
|
|
135
|
-
_ = stdin.Close()
|
|
136
|
-
}
|
|
137
|
-
if p.cmd != nil && p.cmd.Process != nil {
|
|
138
|
-
_ = p.cmd.Process.Kill()
|
|
139
|
-
}
|
|
140
|
-
return p.wait()
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
func (p *ffplayStreamPlayer) wait() error {
|
|
144
|
-
p.waitOnce.Do(func() {
|
|
145
|
-
if p.cmd != nil {
|
|
146
|
-
p.waitErr = p.cmd.Wait()
|
|
147
|
-
}
|
|
148
|
-
})
|
|
149
|
-
return p.waitErr
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
type bufferedStreamPlayer struct {
|
|
153
|
-
chunks [][]byte
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
func (p *bufferedStreamPlayer) Write(audio []byte) error {
|
|
157
|
-
if len(audio) == 0 {
|
|
158
|
-
return nil
|
|
159
|
-
}
|
|
160
|
-
chunk := append([]byte(nil), audio...)
|
|
161
|
-
p.chunks = append(p.chunks, chunk)
|
|
162
|
-
return nil
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
func (p *bufferedStreamPlayer) CloseAndWait() error {
|
|
166
|
-
total := 0
|
|
167
|
-
for _, chunk := range p.chunks {
|
|
168
|
-
total += len(chunk)
|
|
169
|
-
}
|
|
170
|
-
audio := make([]byte, 0, total)
|
|
171
|
-
for _, chunk := range p.chunks {
|
|
172
|
-
audio = append(audio, chunk...)
|
|
173
|
-
}
|
|
174
|
-
return playAudio(audio)
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
func (p *bufferedStreamPlayer) Abort() error {
|
|
178
|
-
p.chunks = nil
|
|
179
|
-
return nil
|
|
180
|
-
}
|
|
181
|
-
|
|
182
50
|
// 任务状态
|
|
183
51
|
// 生命周期:pending -> running -> delete
|
|
184
52
|
type TaskStatus int
|
|
@@ -312,13 +180,13 @@ func (e *TaskEngine) runTransaction(task *Task) error {
|
|
|
312
180
|
|
|
313
181
|
if err := e.synthesizeStreamFn(context.Background(), task.Cfg, task.Text, &task.Voice, onAudio); err != nil {
|
|
314
182
|
_ = player.Abort()
|
|
315
|
-
return err
|
|
183
|
+
return fmt.Errorf("TTS 合成失败: id=%d: %w", task.ID, err)
|
|
316
184
|
}
|
|
317
185
|
log.Printf("TTS 流结束: id=%d elapsed=%s", task.ID, time.Since(startedAt).Round(time.Millisecond))
|
|
318
186
|
|
|
319
187
|
if err := player.CloseAndWait(); err != nil {
|
|
320
188
|
_ = player.Abort()
|
|
321
|
-
return err
|
|
189
|
+
return fmt.Errorf("播放器失败: id=%d: %w", task.ID, err)
|
|
322
190
|
}
|
|
323
191
|
return nil
|
|
324
192
|
}
|
|
@@ -476,28 +344,6 @@ type ttsAudioParams struct {
|
|
|
476
344
|
SampleRate int `json:"sample_rate"`
|
|
477
345
|
}
|
|
478
346
|
|
|
479
|
-
// 调用字节跳动 TTS API,返回完整 MP3 音频数据。保留给测试和 fallback 使用。
|
|
480
|
-
func synthesize(ctx context.Context, cfg Config, text string, voice *VoiceInfo) ([]byte, error) {
|
|
481
|
-
var chunks [][]byte
|
|
482
|
-
if err := synthesizeStream(ctx, cfg, text, voice, func(audio []byte) error {
|
|
483
|
-
chunk := append([]byte(nil), audio...)
|
|
484
|
-
chunks = append(chunks, chunk)
|
|
485
|
-
return nil
|
|
486
|
-
}); err != nil {
|
|
487
|
-
return nil, err
|
|
488
|
-
}
|
|
489
|
-
|
|
490
|
-
total := 0
|
|
491
|
-
for _, c := range chunks {
|
|
492
|
-
total += len(c)
|
|
493
|
-
}
|
|
494
|
-
result := make([]byte, 0, total)
|
|
495
|
-
for _, c := range chunks {
|
|
496
|
-
result = append(result, c...)
|
|
497
|
-
}
|
|
498
|
-
return result, nil
|
|
499
|
-
}
|
|
500
|
-
|
|
501
347
|
// 调用字节跳动 TTS API,边解析 SSE 边回调 MP3 音频块
|
|
502
348
|
func synthesizeStream(ctx context.Context, cfg Config, text string, voice *VoiceInfo, onAudio func([]byte) error) error {
|
|
503
349
|
speaker := voice.VoiceType
|
|
@@ -512,8 +358,8 @@ func synthesizeStream(ctx context.Context, cfg Config, text string, voice *Voice
|
|
|
512
358
|
Text: text,
|
|
513
359
|
Speaker: speaker,
|
|
514
360
|
AudioParams: ttsAudioParams{
|
|
515
|
-
Format: "
|
|
516
|
-
SampleRate:
|
|
361
|
+
Format: "pcm",
|
|
362
|
+
SampleRate: 48000,
|
|
517
363
|
},
|
|
518
364
|
},
|
|
519
365
|
}
|
|
@@ -848,21 +694,6 @@ func validateVoiceInfo(name string, voice *VoiceInfo) error {
|
|
|
848
694
|
return nil
|
|
849
695
|
}
|
|
850
696
|
|
|
851
|
-
func playAudio(data []byte) error {
|
|
852
|
-
tmpFile := filepath.Join(tempDir, fmt.Sprintf("ttsd-%d.mp3", time.Now().UnixNano()))
|
|
853
|
-
if err := os.WriteFile(tmpFile, data, 0644); err != nil {
|
|
854
|
-
return fmt.Errorf("写入临时文件失败: %w", err)
|
|
855
|
-
}
|
|
856
|
-
defer os.Remove(tmpFile)
|
|
857
|
-
|
|
858
|
-
cmd := exec.Command("/usr/bin/afplay", tmpFile)
|
|
859
|
-
log.Printf("播放开始: %s", filepath.Base(tmpFile))
|
|
860
|
-
if err := cmd.Run(); err != nil {
|
|
861
|
-
return fmt.Errorf("播放失败: %w", err)
|
|
862
|
-
}
|
|
863
|
-
return nil
|
|
864
|
-
}
|
|
865
|
-
|
|
866
697
|
func handleConnection(conn net.Conn, engine *TaskEngine) {
|
|
867
698
|
defer func() {
|
|
868
699
|
if r := recover(); r != nil {
|
package/package.json
CHANGED