@xdfnet/ispeak 1.6.1 → 1.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Docs/ARCHITECTURE.md +24 -9
- package/README.md +24 -3
- package/configs/hook-speak.sh +78 -23
- package/main.go +310 -49
- package/package.json +1 -1
- package/scripts/ispeak +1 -1
package/Docs/ARCHITECTURE.md
CHANGED
|
@@ -34,7 +34,7 @@ iSpeak 是一个运行在 macOS 上的本地 TTS 播报守护进程,通过 Uni
|
|
|
34
34
|
│ ▼ │
|
|
35
35
|
│ Speak Worker (single) │
|
|
36
36
|
│ - pending_synth -> speaking │
|
|
37
|
-
│ - 调用 TTS
|
|
37
|
+
│ - 调用 TTS 流式接口(失败直接删除,不重试) │
|
|
38
38
|
│ - SSE audio chunk -> StreamPlayer.Write │
|
|
39
39
|
│ - 播放完成后删除任务;连续失败删除任务 │
|
|
40
40
|
│ │
|
|
@@ -107,12 +107,14 @@ pending_synth -> speaking -> delete
|
|
|
107
107
|
|
|
108
108
|
`Submit(cleanedText, voice, cfg)` 原子执行:
|
|
109
109
|
1. 删除所有 `pending_synth` 任务
|
|
110
|
-
2.
|
|
111
|
-
3.
|
|
110
|
+
2. 打断当前 `speaking` 任务(取消合成/停止播放)
|
|
111
|
+
3. 创建新任务(`pending_synth`)
|
|
112
|
+
4. 唤醒 speak worker
|
|
112
113
|
|
|
113
114
|
策略说明:
|
|
114
|
-
-
|
|
115
|
-
-
|
|
115
|
+
- 未开始合成的旧任务直接删除
|
|
116
|
+
- 已领取但过期的旧任务在合成前跳过
|
|
117
|
+
- 正在合成/播放的旧任务会被新消息取消
|
|
116
118
|
|
|
117
119
|
### Speak worker 规则
|
|
118
120
|
|
|
@@ -130,9 +132,22 @@ pending_synth -> speaking -> delete
|
|
|
130
132
|
`handleConnection()`:
|
|
131
133
|
- 读取 socket 文本
|
|
132
134
|
- 解析 `{source:xxx}` 音色前缀
|
|
133
|
-
- `cleanText()`
|
|
135
|
+
- `cleanText()` 生成语音友好的文本
|
|
134
136
|
- 将“过滤后文本”提交给 `TaskEngine.Submit`
|
|
135
137
|
|
|
138
|
+
`cleanText()` 只影响 TTS 播报,不改变屏幕显示内容。当前清洗规则:
|
|
139
|
+
|
|
140
|
+
- Markdown 格式符号:标题、加粗、反引号、引用符
|
|
141
|
+
- Markdown 表格整块:表头、分隔线、表格内容
|
|
142
|
+
- 代码块、artifact、HTML 页面源码
|
|
143
|
+
- Markdown 链接 URL,仅保留链接标题
|
|
144
|
+
- 绝对路径简化为“路径”
|
|
145
|
+
- 长 commit hash、UUID、长 ID
|
|
146
|
+
- 明显文件列表、模型分片列表、下载清单
|
|
147
|
+
- 下载进度、速度、进度条、ANSI 控制符等终端噪声
|
|
148
|
+
|
|
149
|
+
清洗目标是保留适合听的内容:结论、成功/失败状态、下一步动作、关键错误原因。
|
|
150
|
+
|
|
136
151
|
### 2. 流式合成播放阶段
|
|
137
152
|
|
|
138
153
|
- speak worker 领取任务
|
|
@@ -151,9 +166,9 @@ pending_synth -> speaking -> delete
|
|
|
151
166
|
|
|
152
167
|
## 失败与成本策略
|
|
153
168
|
|
|
154
|
-
-
|
|
155
|
-
-
|
|
156
|
-
-
|
|
169
|
+
- 新任务到达时清理 `pending_synth` 并打断当前任务,避免无效合成/播放
|
|
170
|
+
- 流式合成/播放失败:直接删除任务,不重试,避免重复播报
|
|
171
|
+
- 只保留最新消息优先播报,降低 TTS 成本
|
|
157
172
|
|
|
158
173
|
## 文件布局
|
|
159
174
|
|
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# iSpeak
|
|
2
2
|
|
|
3
|
-

|
|
4
4
|
[](https://opensource.org/licenses/MIT)
|
|
5
5
|
[](https://golang.org/dl/)
|
|
6
6
|

|
|
@@ -51,11 +51,12 @@ brew install ffmpeg
|
|
|
51
51
|
git clone https://github.com/xdfnet/iSpeak.git && cd iSpeak && make install
|
|
52
52
|
```
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
安装后编辑 API Key,然后验证:
|
|
55
55
|
|
|
56
56
|
```bash
|
|
57
|
+
open ~/.config/iSpeak/config.json
|
|
57
58
|
ispeak status
|
|
58
|
-
ispeak
|
|
59
|
+
ispeak "iSpeak 准备好了"
|
|
59
60
|
```
|
|
60
61
|
|
|
61
62
|
## 工作原理
|
|
@@ -88,6 +89,26 @@ ispeak test "iSpeak 准备好了"
|
|
|
88
89
|
pending_synth → speaking → delete
|
|
89
90
|
```
|
|
90
91
|
|
|
92
|
+
新消息到达时会清理未开始任务,并打断当前合成/播放,只保留最新消息优先播报。
|
|
93
|
+
|
|
94
|
+
## 语音清洗规则
|
|
95
|
+
|
|
96
|
+
清洗只影响 TTS 播报内容,不改变 Claude/Codex 屏幕显示内容。
|
|
97
|
+
|
|
98
|
+
播报前会过滤或简化这些内容:
|
|
99
|
+
|
|
100
|
+
- Markdown 格式符号:标题 `#`、加粗 `**`、反引号、引用 `>`
|
|
101
|
+
- Markdown 表格整块:表头、分隔线、表格内容都不播
|
|
102
|
+
- 代码块:``` 包裹的内容不播
|
|
103
|
+
- artifact / HTML 内容:不播生成的页面源码
|
|
104
|
+
- Markdown 链接:只保留链接标题,不播 URL
|
|
105
|
+
- 绝对路径:简化为“路径”
|
|
106
|
+
- 长 commit hash、UUID、长 ID:不播
|
|
107
|
+
- 明显文件列表:如模型分片、代码文件列表、下载文件清单
|
|
108
|
+
- 下载进度和终端噪声:百分比、速度、进度条、ANSI 控制符
|
|
109
|
+
|
|
110
|
+
保留优先级:结论、成功/失败状态、需要用户操作的下一步、关键错误原因。
|
|
111
|
+
|
|
91
112
|
## 全部命令
|
|
92
113
|
|
|
93
114
|
```bash
|
package/configs/hook-speak.sh
CHANGED
|
@@ -11,52 +11,107 @@ LOG="$HOME/.config/iSpeak/hook.log"
|
|
|
11
11
|
|
|
12
12
|
input=$(cat)
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
14
|
+
json_value() {
|
|
15
|
+
local key="$1"
|
|
16
|
+
if command -v node >/dev/null 2>&1; then
|
|
17
|
+
printf "%s" "$input" | node -e '
|
|
18
|
+
const key = process.argv[1];
|
|
19
|
+
let input = "";
|
|
20
|
+
process.stdin.setEncoding("utf8");
|
|
21
|
+
process.stdin.on("data", chunk => input += chunk);
|
|
22
|
+
process.stdin.on("end", () => {
|
|
23
|
+
try {
|
|
24
|
+
const value = JSON.parse(input)[key];
|
|
25
|
+
if (typeof value === "string") process.stdout.write(value);
|
|
26
|
+
} catch (_) {}
|
|
27
|
+
});
|
|
28
|
+
' "$key"
|
|
29
|
+
return
|
|
30
|
+
fi
|
|
18
31
|
|
|
19
|
-
|
|
32
|
+
printf "%s" "$input" | sed -n "s/.*\"$key\"[[:space:]]*:[[:space:]]*\"\([^\"]*\)\".*/\1/p"
|
|
33
|
+
}
|
|
20
34
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
cutoff=$(($(date +%s) * 1000 - 30000))
|
|
35
|
+
extract_recent_assistant_text() {
|
|
36
|
+
local transcript="$1"
|
|
37
|
+
local cutoff="$2"
|
|
25
38
|
|
|
26
|
-
|
|
27
|
-
|
|
39
|
+
if command -v node >/dev/null 2>&1; then
|
|
40
|
+
node -e '
|
|
41
|
+
const fs = require("fs");
|
|
42
|
+
const file = process.argv[1];
|
|
43
|
+
const cutoff = Number(process.argv[2]);
|
|
44
|
+
const out = [];
|
|
45
|
+
|
|
46
|
+
function collectText(content) {
|
|
47
|
+
if (typeof content === "string") {
|
|
48
|
+
out.push(content);
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
if (!Array.isArray(content)) return;
|
|
52
|
+
for (const item of content) {
|
|
53
|
+
if (item && typeof item.text === "string") out.push(item.text);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
for (const line of fs.readFileSync(file, "utf8").split(/\r?\n/)) {
|
|
58
|
+
if (!line.trim()) continue;
|
|
59
|
+
try {
|
|
60
|
+
const event = JSON.parse(line);
|
|
61
|
+
if (typeof event.timestamp === "number" && event.timestamp < cutoff) continue;
|
|
62
|
+
if (event.role === "assistant") collectText(event.content);
|
|
63
|
+
if (event.message && event.message.role === "assistant") collectText(event.message.content);
|
|
64
|
+
} catch (_) {}
|
|
65
|
+
}
|
|
66
|
+
process.stdout.write([...new Set(out.filter(Boolean))].join(" "));
|
|
67
|
+
' "$transcript" "$cutoff" 2>/dev/null
|
|
68
|
+
return
|
|
69
|
+
fi
|
|
70
|
+
|
|
71
|
+
awk -v cutoff="$cutoff" '
|
|
28
72
|
{
|
|
29
|
-
|
|
30
|
-
if (match($0, /"timestamp"\s*:\s*[0-9]+/)) {
|
|
73
|
+
if (match($0, /"timestamp"[[:space:]]*:[[:space:]]*[0-9]+/)) {
|
|
31
74
|
ts = substr($0, RSTART, RLENGTH)
|
|
32
75
|
gsub(/[^0-9]/, "", ts)
|
|
33
76
|
ts = int(ts)
|
|
34
77
|
if (ts < cutoff) next
|
|
35
78
|
}
|
|
36
79
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
# 提取 content(可能是字符串或数组)
|
|
40
|
-
if (match($0, /"content"\s*:\s*\[/)) {
|
|
41
|
-
# 数组形式,提取所有 text 字段
|
|
80
|
+
if (match($0, /"role"[[:space:]]*:[[:space:]]*"assistant"/)) {
|
|
81
|
+
if (match($0, /"content"[[:space:]]*:[[:space:]]*\[/)) {
|
|
42
82
|
gsub(/[^{]*\[/, "", $0)
|
|
43
83
|
gsub(/\].*/, "", $0)
|
|
44
|
-
while (match($0, /"text"
|
|
84
|
+
while (match($0, /"text"[[:space:]]*:[[:space:]]*"[^"]*"/)) {
|
|
45
85
|
t = substr($0, RSTART, RLENGTH)
|
|
46
|
-
gsub(/"text"
|
|
86
|
+
gsub(/"text"[[:space:]]*:[[:space:]]*"/, "", t)
|
|
47
87
|
gsub(/"$/, "", t)
|
|
48
88
|
if (t != "") print t
|
|
49
89
|
$0 = substr($0, RSTART + RLENGTH)
|
|
50
90
|
}
|
|
51
|
-
} else if (match($0, /"content"
|
|
91
|
+
} else if (match($0, /"content"[[:space:]]*:[[:space:]]*"[^"]*"/)) {
|
|
52
92
|
t = substr($0, RSTART, RLENGTH)
|
|
53
|
-
gsub(/"content"
|
|
93
|
+
gsub(/"content"[[:space:]]*:[[:space:]]*"/, "", t)
|
|
54
94
|
gsub(/"$/, "", t)
|
|
55
95
|
if (t != "") print t
|
|
56
96
|
}
|
|
57
97
|
}
|
|
58
98
|
}
|
|
59
|
-
' "$transcript" 2>/dev/null | sort -u | tr '\n' ' '
|
|
99
|
+
' "$transcript" 2>/dev/null | sort -u | tr '\n' ' '
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
# 从 stdin JSON 提取 transcript 路径和最后一条消息
|
|
103
|
+
transcript=$(json_value "transcript_path")
|
|
104
|
+
last_msg=$(json_value "last_assistant_message")
|
|
105
|
+
|
|
106
|
+
all_text="$last_msg"
|
|
107
|
+
|
|
108
|
+
# 如果有 transcript 文件,提取最近 30 秒内的所有 assistant 消息
|
|
109
|
+
if [[ -n "$transcript" && -f "$transcript" ]]; then
|
|
110
|
+
# 计算 30 秒前的时间戳(毫秒)
|
|
111
|
+
cutoff=$(($(date +%s) * 1000 - 30000))
|
|
112
|
+
|
|
113
|
+
# 优先用 JSON parser,Node 不存在时回退到简易 awk。
|
|
114
|
+
extra=$(extract_recent_assistant_text "$transcript" "$cutoff")
|
|
60
115
|
|
|
61
116
|
if [[ -n "$extra" ]]; then
|
|
62
117
|
all_text="$extra"
|
package/main.go
CHANGED
|
@@ -17,6 +17,7 @@ import (
|
|
|
17
17
|
"os/exec"
|
|
18
18
|
"os/signal"
|
|
19
19
|
"path/filepath"
|
|
20
|
+
"regexp"
|
|
20
21
|
"strings"
|
|
21
22
|
"sync"
|
|
22
23
|
"syscall"
|
|
@@ -25,11 +26,6 @@ import (
|
|
|
25
26
|
"gopkg.in/natefinch/lumberjack.v2"
|
|
26
27
|
)
|
|
27
28
|
|
|
28
|
-
const (
|
|
29
|
-
ttsMaxAttempts = 2
|
|
30
|
-
ttsRetryBackoff = 400 * time.Millisecond
|
|
31
|
-
)
|
|
32
|
-
|
|
33
29
|
var configDir = os.ExpandEnv("$HOME/.config/iSpeak")
|
|
34
30
|
|
|
35
31
|
var (
|
|
@@ -47,6 +43,25 @@ var tempDir string
|
|
|
47
43
|
|
|
48
44
|
var errAlreadyRunning = errors.New("iSpeak already running")
|
|
49
45
|
|
|
46
|
+
var (
|
|
47
|
+
markdownLinkRe = regexp.MustCompile(`\[[^\]]+\]\(([^)]*)\)`)
|
|
48
|
+
absolutePathRe = regexp.MustCompile(`/(?:Users|private|tmp|var|opt|usr|bin|sbin|etc|Library|Applications)/\S+`)
|
|
49
|
+
commitHashRe = regexp.MustCompile(`\b[0-9a-f]{7,40}\b`)
|
|
50
|
+
uuidRe = regexp.MustCompile(`\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b`)
|
|
51
|
+
urlRe = regexp.MustCompile(`https?://\S+`)
|
|
52
|
+
ansiEscapeRe = regexp.MustCompile(`\x1b\[[0-9;]*[A-Za-z]`)
|
|
53
|
+
multiSpaceRe = regexp.MustCompile(`\s+`)
|
|
54
|
+
markdownListRe = regexp.MustCompile(`^\s*(?:[-*+]\s+|\d+[.)]\s+)`)
|
|
55
|
+
htmlTagRe = regexp.MustCompile(`<[^>]+>`)
|
|
56
|
+
codeFenceStartRe = regexp.MustCompile("^```")
|
|
57
|
+
artifactStartRe = regexp.MustCompile(`(?i)^<artifact\b`)
|
|
58
|
+
htmlDocumentLineRe = regexp.MustCompile(`(?i)^<!doctype html|^<html\b|^<head\b|^<body\b|^<style\b|^</`)
|
|
59
|
+
progressNoiseRe = regexp.MustCompile(`(?i)(^\s*\d{1,3}%\s*$|\d{1,3}%.*\d+(?:\.\d+)?\s*(?:kb|mb|gb)/s|\bETA\b|^\s*[-=]{3,}\s*$)`)
|
|
60
|
+
speedNoiseRe = regexp.MustCompile(`(?i)\d+(?:\.\d+)?\s*(?:kb|mb|gb)/s`)
|
|
61
|
+
etaNoiseRe = regexp.MustCompile(`(?i)\bETA\b|预计剩余|剩余时间`)
|
|
62
|
+
fileListNoiseRe = regexp.MustCompile(`(?i)\.(?:go|js|ts|tsx|jsx|json|md|yaml|yml|toml|sum|mod|lock|html|css|sh|plist|safetensors|mp3|wav|png|jpg|jpeg|pdf|docx)\b`)
|
|
63
|
+
)
|
|
64
|
+
|
|
50
65
|
type StreamPlayer interface {
|
|
51
66
|
Write(audio []byte) error
|
|
52
67
|
CloseAndWait() error
|
|
@@ -54,9 +69,13 @@ type StreamPlayer interface {
|
|
|
54
69
|
}
|
|
55
70
|
|
|
56
71
|
type ffplayStreamPlayer struct {
|
|
57
|
-
path
|
|
58
|
-
cmd
|
|
59
|
-
|
|
72
|
+
path string
|
|
73
|
+
cmd *exec.Cmd
|
|
74
|
+
|
|
75
|
+
mu sync.Mutex
|
|
76
|
+
stdin io.WriteCloser
|
|
77
|
+
waitOnce sync.Once
|
|
78
|
+
waitErr error
|
|
60
79
|
}
|
|
61
80
|
|
|
62
81
|
func newDefaultStreamPlayer() (StreamPlayer, error) {
|
|
@@ -99,35 +118,55 @@ func (p *ffplayStreamPlayer) Write(audio []byte) error {
|
|
|
99
118
|
if len(audio) == 0 {
|
|
100
119
|
return nil
|
|
101
120
|
}
|
|
102
|
-
|
|
121
|
+
p.mu.Lock()
|
|
122
|
+
stdin := p.stdin
|
|
123
|
+
p.mu.Unlock()
|
|
124
|
+
if stdin == nil {
|
|
125
|
+
return fmt.Errorf("播放器输入已关闭")
|
|
126
|
+
}
|
|
127
|
+
if _, err := stdin.Write(audio); err != nil {
|
|
103
128
|
return fmt.Errorf("写入播放器失败: %w", err)
|
|
104
129
|
}
|
|
105
130
|
return nil
|
|
106
131
|
}
|
|
107
132
|
|
|
108
133
|
func (p *ffplayStreamPlayer) CloseAndWait() error {
|
|
109
|
-
|
|
110
|
-
|
|
134
|
+
p.mu.Lock()
|
|
135
|
+
stdin := p.stdin
|
|
136
|
+
p.stdin = nil
|
|
137
|
+
p.mu.Unlock()
|
|
138
|
+
if stdin != nil {
|
|
139
|
+
if err := stdin.Close(); err != nil {
|
|
111
140
|
return fmt.Errorf("关闭播放器输入失败: %w", err)
|
|
112
141
|
}
|
|
113
|
-
p.stdin = nil
|
|
114
142
|
}
|
|
115
|
-
if err := p.
|
|
143
|
+
if err := p.wait(); err != nil {
|
|
116
144
|
return fmt.Errorf("ffplay failed: %w", err)
|
|
117
145
|
}
|
|
118
146
|
return nil
|
|
119
147
|
}
|
|
120
148
|
|
|
121
149
|
func (p *ffplayStreamPlayer) Abort() error {
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
150
|
+
p.mu.Lock()
|
|
151
|
+
stdin := p.stdin
|
|
152
|
+
p.stdin = nil
|
|
153
|
+
p.mu.Unlock()
|
|
154
|
+
if stdin != nil {
|
|
155
|
+
_ = stdin.Close()
|
|
125
156
|
}
|
|
126
157
|
if p.cmd != nil && p.cmd.Process != nil {
|
|
127
158
|
_ = p.cmd.Process.Kill()
|
|
128
|
-
_ = p.cmd.Wait()
|
|
129
159
|
}
|
|
130
|
-
return
|
|
160
|
+
return p.wait()
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
func (p *ffplayStreamPlayer) wait() error {
|
|
164
|
+
p.waitOnce.Do(func() {
|
|
165
|
+
if p.cmd != nil {
|
|
166
|
+
p.waitErr = p.cmd.Wait()
|
|
167
|
+
}
|
|
168
|
+
})
|
|
169
|
+
return p.waitErr
|
|
131
170
|
}
|
|
132
171
|
|
|
133
172
|
type bufferedStreamPlayer struct {
|
|
@@ -183,8 +222,11 @@ type TaskEngine struct {
|
|
|
183
222
|
mu sync.Mutex
|
|
184
223
|
|
|
185
224
|
nextID uint64
|
|
225
|
+
latestID uint64
|
|
186
226
|
tasks map[uint64]*Task
|
|
187
227
|
pendingSynth []uint64
|
|
228
|
+
activeID uint64
|
|
229
|
+
activeCancel context.CancelFunc
|
|
188
230
|
|
|
189
231
|
synthWake chan struct{}
|
|
190
232
|
|
|
@@ -207,7 +249,6 @@ func (e *TaskEngine) Start() {
|
|
|
207
249
|
|
|
208
250
|
func (e *TaskEngine) Submit(text string, voice VoiceInfo, cfg Config) uint64 {
|
|
209
251
|
e.mu.Lock()
|
|
210
|
-
defer e.mu.Unlock()
|
|
211
252
|
|
|
212
253
|
// 新任务进来先删所有未开始合成任务
|
|
213
254
|
for _, id := range e.pendingSynth {
|
|
@@ -216,6 +257,12 @@ func (e *TaskEngine) Submit(text string, voice VoiceInfo, cfg Config) uint64 {
|
|
|
216
257
|
}
|
|
217
258
|
e.pendingSynth = e.pendingSynth[:0]
|
|
218
259
|
|
|
260
|
+
cancelActive := e.activeCancel
|
|
261
|
+
activeID := e.activeID
|
|
262
|
+
if activeID != 0 {
|
|
263
|
+
log.Printf("打断当前播报任务: id=%d", activeID)
|
|
264
|
+
}
|
|
265
|
+
|
|
219
266
|
e.nextID++
|
|
220
267
|
task := &Task{
|
|
221
268
|
ID: e.nextID,
|
|
@@ -225,10 +272,16 @@ func (e *TaskEngine) Submit(text string, voice VoiceInfo, cfg Config) uint64 {
|
|
|
225
272
|
Cfg: cfg,
|
|
226
273
|
}
|
|
227
274
|
e.tasks[task.ID] = task
|
|
275
|
+
e.latestID = task.ID
|
|
228
276
|
e.pendingSynth = append(e.pendingSynth, task.ID)
|
|
229
277
|
log.Printf("任务创建: id=%d text=%s", task.ID, text)
|
|
230
278
|
|
|
231
279
|
notify(e.synthWake)
|
|
280
|
+
e.mu.Unlock()
|
|
281
|
+
|
|
282
|
+
if cancelActive != nil {
|
|
283
|
+
cancelActive()
|
|
284
|
+
}
|
|
232
285
|
return task.ID
|
|
233
286
|
}
|
|
234
287
|
|
|
@@ -252,24 +305,27 @@ func (e *TaskEngine) processSpeakTask(id uint64) {
|
|
|
252
305
|
}
|
|
253
306
|
}()
|
|
254
307
|
|
|
308
|
+
ctx, cancel := context.WithCancel(context.Background())
|
|
309
|
+
e.setActiveTask(id, cancel)
|
|
310
|
+
defer e.clearActiveTask(id)
|
|
311
|
+
|
|
255
312
|
task, ok := e.getTask(id)
|
|
256
313
|
if !ok {
|
|
257
314
|
return
|
|
258
315
|
}
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
break
|
|
265
|
-
}
|
|
266
|
-
if i < ttsMaxAttempts {
|
|
267
|
-
time.Sleep(ttsRetryBackoff)
|
|
268
|
-
}
|
|
316
|
+
if !e.isLatestTask(id) {
|
|
317
|
+
cancel()
|
|
318
|
+
log.Printf("跳过过期播报任务: id=%d", id)
|
|
319
|
+
e.deleteTask(id)
|
|
320
|
+
return
|
|
269
321
|
}
|
|
270
322
|
|
|
271
|
-
if
|
|
272
|
-
|
|
323
|
+
if err := e.speakOnce(ctx, task); err != nil {
|
|
324
|
+
if errors.Is(err, context.Canceled) {
|
|
325
|
+
log.Printf("播报已打断并删除任务: id=%d", id)
|
|
326
|
+
} else {
|
|
327
|
+
log.Printf("播报失败并删除任务: id=%d err=%v", id, err)
|
|
328
|
+
}
|
|
273
329
|
e.deleteTask(id)
|
|
274
330
|
return
|
|
275
331
|
}
|
|
@@ -299,9 +355,21 @@ func (e *TaskEngine) speakOnce(ctx context.Context, task *Task) error {
|
|
|
299
355
|
return err
|
|
300
356
|
}
|
|
301
357
|
log.Printf("TTS 流结束: id=%d elapsed=%s", task.ID, time.Since(startedAt).Round(time.Millisecond))
|
|
302
|
-
|
|
358
|
+
|
|
359
|
+
done := make(chan error, 1)
|
|
360
|
+
go func() {
|
|
361
|
+
done <- player.CloseAndWait()
|
|
362
|
+
}()
|
|
363
|
+
select {
|
|
364
|
+
case err := <-done:
|
|
365
|
+
if err != nil {
|
|
366
|
+
_ = player.Abort()
|
|
367
|
+
return err
|
|
368
|
+
}
|
|
369
|
+
case <-ctx.Done():
|
|
303
370
|
_ = player.Abort()
|
|
304
|
-
|
|
371
|
+
<-done
|
|
372
|
+
return ctx.Err()
|
|
305
373
|
}
|
|
306
374
|
return nil
|
|
307
375
|
}
|
|
@@ -344,6 +412,28 @@ func (e *TaskEngine) deleteTask(id uint64) {
|
|
|
344
412
|
delete(e.tasks, id)
|
|
345
413
|
}
|
|
346
414
|
|
|
415
|
+
func (e *TaskEngine) setActiveTask(id uint64, cancel context.CancelFunc) {
|
|
416
|
+
e.mu.Lock()
|
|
417
|
+
defer e.mu.Unlock()
|
|
418
|
+
e.activeID = id
|
|
419
|
+
e.activeCancel = cancel
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
func (e *TaskEngine) clearActiveTask(id uint64) {
|
|
423
|
+
e.mu.Lock()
|
|
424
|
+
defer e.mu.Unlock()
|
|
425
|
+
if e.activeID == id {
|
|
426
|
+
e.activeID = 0
|
|
427
|
+
e.activeCancel = nil
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
func (e *TaskEngine) isLatestTask(id uint64) bool {
|
|
432
|
+
e.mu.Lock()
|
|
433
|
+
defer e.mu.Unlock()
|
|
434
|
+
return e.latestID == id
|
|
435
|
+
}
|
|
436
|
+
|
|
347
437
|
func notify(ch chan struct{}) {
|
|
348
438
|
select {
|
|
349
439
|
case ch <- struct{}{}:
|
|
@@ -387,6 +477,17 @@ func loadConfig() Config {
|
|
|
387
477
|
}
|
|
388
478
|
var cfg Config
|
|
389
479
|
if json.Unmarshal(data, &cfg) == nil && cfg.APIKey != "" {
|
|
480
|
+
if err := validateConfig(cfg); err != nil {
|
|
481
|
+
log.Printf("配置文件无效: %s err=%v", p, err)
|
|
482
|
+
configCacheMu.Lock()
|
|
483
|
+
if configCacheValid {
|
|
484
|
+
cached := configCache
|
|
485
|
+
configCacheMu.Unlock()
|
|
486
|
+
return cached
|
|
487
|
+
}
|
|
488
|
+
configCacheMu.Unlock()
|
|
489
|
+
return cfg
|
|
490
|
+
}
|
|
390
491
|
log.Printf("配置文件: %s", p)
|
|
391
492
|
if cfg.DefaultVoice != nil {
|
|
392
493
|
log.Printf("默认音色: %s (%s)", cfg.DefaultVoice.VoiceType, cfg.DefaultVoice.ResourceID)
|
|
@@ -637,29 +738,56 @@ func extractAudioBase64(event map[string]any) string {
|
|
|
637
738
|
return ""
|
|
638
739
|
}
|
|
639
740
|
|
|
640
|
-
//
|
|
741
|
+
// 过滤格式符号,保留自然朗读文本。
|
|
742
|
+
// 顺序很重要:先跳过跨行块结构,再跳过整行噪声,最后清理行内符号。
|
|
641
743
|
func cleanText(text string) string {
|
|
642
744
|
var lines []string
|
|
643
|
-
|
|
745
|
+
rawLines := strings.Split(text, "\n")
|
|
746
|
+
inCodeBlock := false
|
|
747
|
+
inArtifact := false
|
|
748
|
+
inMarkdownTable := false
|
|
749
|
+
for i := 0; i < len(rawLines); i++ {
|
|
750
|
+
line := rawLines[i]
|
|
644
751
|
line = strings.TrimSpace(line)
|
|
645
|
-
if
|
|
752
|
+
if line == "" {
|
|
753
|
+
inMarkdownTable = false
|
|
754
|
+
continue
|
|
755
|
+
}
|
|
756
|
+
if codeFenceStartRe.MatchString(line) {
|
|
757
|
+
inCodeBlock = !inCodeBlock
|
|
758
|
+
continue
|
|
759
|
+
}
|
|
760
|
+
if inCodeBlock {
|
|
761
|
+
continue
|
|
762
|
+
}
|
|
763
|
+
if artifactStartRe.MatchString(line) {
|
|
764
|
+
inArtifact = !strings.Contains(strings.ToLower(line), "</artifact>")
|
|
646
765
|
continue
|
|
647
766
|
}
|
|
648
|
-
if
|
|
767
|
+
if inArtifact {
|
|
768
|
+
if strings.Contains(strings.ToLower(line), "</artifact>") {
|
|
769
|
+
inArtifact = false
|
|
770
|
+
}
|
|
771
|
+
continue
|
|
772
|
+
}
|
|
773
|
+
if isMarkdownTableSeparator(line) {
|
|
774
|
+
if len(lines) > 0 && isMarkdownTableRow(strings.TrimSpace(rawLines[i-1])) {
|
|
775
|
+
lines = lines[:len(lines)-1]
|
|
776
|
+
}
|
|
777
|
+
inMarkdownTable = true
|
|
649
778
|
continue
|
|
650
779
|
}
|
|
651
|
-
|
|
652
|
-
|
|
780
|
+
if inMarkdownTable {
|
|
781
|
+
if isMarkdownTableRow(line) {
|
|
782
|
+
continue
|
|
783
|
+
}
|
|
784
|
+
inMarkdownTable = false
|
|
785
|
+
}
|
|
786
|
+
if shouldSkipSpeechLine(line) {
|
|
653
787
|
continue
|
|
654
788
|
}
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
"*", "",
|
|
658
|
-
"`", "",
|
|
659
|
-
"#", "",
|
|
660
|
-
">", "",
|
|
661
|
-
).Replace(line)
|
|
662
|
-
cleaned = strings.TrimSpace(cleaned)
|
|
789
|
+
|
|
790
|
+
cleaned := cleanSpeechLine(line)
|
|
663
791
|
if cleaned != "" {
|
|
664
792
|
lines = append(lines, cleaned)
|
|
665
793
|
}
|
|
@@ -667,6 +795,110 @@ func cleanText(text string) string {
|
|
|
667
795
|
return strings.Join(lines, ",")
|
|
668
796
|
}
|
|
669
797
|
|
|
798
|
+
func shouldSkipSpeechLine(line string) bool {
|
|
799
|
+
if isMarkdownTableSeparator(line) {
|
|
800
|
+
return true
|
|
801
|
+
}
|
|
802
|
+
if strings.HasPrefix(line, "---") && strings.Count(line, "-") > 3 {
|
|
803
|
+
return true
|
|
804
|
+
}
|
|
805
|
+
if htmlDocumentLineRe.MatchString(line) {
|
|
806
|
+
return true
|
|
807
|
+
}
|
|
808
|
+
if isProgressNoiseLine(line) {
|
|
809
|
+
return true
|
|
810
|
+
}
|
|
811
|
+
if isMostlyTableRow(line) {
|
|
812
|
+
return true
|
|
813
|
+
}
|
|
814
|
+
if isMostlyFileListLine(line) {
|
|
815
|
+
return true
|
|
816
|
+
}
|
|
817
|
+
return false
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
func isMarkdownTableSeparator(line string) bool {
|
|
821
|
+
line = strings.TrimSpace(line)
|
|
822
|
+
return strings.Contains(line, "|") && strings.Trim(line, "|-: ") == ""
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
func isMarkdownTableRow(line string) bool {
|
|
826
|
+
line = strings.TrimSpace(line)
|
|
827
|
+
return strings.Count(line, "|") >= 2
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
func cleanSpeechLine(line string) string {
|
|
831
|
+
// Markdown 链接必须在 URL 删除前处理,否则会丢掉链接标题。
|
|
832
|
+
line = ansiEscapeRe.ReplaceAllString(line, "")
|
|
833
|
+
line = markdownListRe.ReplaceAllString(line, "")
|
|
834
|
+
line = markdownLinkRe.ReplaceAllStringFunc(line, func(match string) string {
|
|
835
|
+
if end := strings.Index(match, "]"); end > 1 {
|
|
836
|
+
return match[1:end]
|
|
837
|
+
}
|
|
838
|
+
return ""
|
|
839
|
+
})
|
|
840
|
+
line = urlRe.ReplaceAllString(line, "")
|
|
841
|
+
line = absolutePathRe.ReplaceAllString(line, " 路径 ")
|
|
842
|
+
// UUID 必须在短 hash 前处理,避免先删短片段后破坏 UUID 识别。
|
|
843
|
+
line = uuidRe.ReplaceAllString(line, "")
|
|
844
|
+
line = commitHashRe.ReplaceAllString(line, "")
|
|
845
|
+
line = htmlTagRe.ReplaceAllString(line, "")
|
|
846
|
+
line = strings.NewReplacer(
|
|
847
|
+
"**", "",
|
|
848
|
+
"*", "",
|
|
849
|
+
"`", "",
|
|
850
|
+
"#", "",
|
|
851
|
+
">", "",
|
|
852
|
+
"✅", "",
|
|
853
|
+
"❌", "",
|
|
854
|
+
"✓", "",
|
|
855
|
+
"✗", "",
|
|
856
|
+
"→", "到",
|
|
857
|
+
).Replace(line)
|
|
858
|
+
line = strings.Trim(line, " \t-:|")
|
|
859
|
+
line = multiSpaceRe.ReplaceAllString(line, " ")
|
|
860
|
+
return strings.TrimSpace(line)
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
func isMostlyTableRow(line string) bool {
|
|
864
|
+
if !strings.Contains(line, "|") {
|
|
865
|
+
return false
|
|
866
|
+
}
|
|
867
|
+
return strings.Count(line, "|") >= 2 && len([]rune(line)) > 40
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
func isProgressNoiseLine(line string) bool {
|
|
871
|
+
if !progressNoiseRe.MatchString(line) {
|
|
872
|
+
return false
|
|
873
|
+
}
|
|
874
|
+
if speedNoiseRe.MatchString(line) || etaNoiseRe.MatchString(line) {
|
|
875
|
+
return true
|
|
876
|
+
}
|
|
877
|
+
return !containsCJK(line)
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
func isMostlyFileListLine(line string) bool {
|
|
881
|
+
if !fileListNoiseRe.MatchString(line) {
|
|
882
|
+
return false
|
|
883
|
+
}
|
|
884
|
+
if containsCJK(line) {
|
|
885
|
+
return false
|
|
886
|
+
}
|
|
887
|
+
if strings.Contains(line, ".safetensors") {
|
|
888
|
+
return true
|
|
889
|
+
}
|
|
890
|
+
return strings.Count(line, ".") >= 2 || strings.Contains(line, "/") || strings.Contains(line, " - ")
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
func containsCJK(s string) bool {
|
|
894
|
+
for _, r := range s {
|
|
895
|
+
if r >= '\u4e00' && r <= '\u9fff' {
|
|
896
|
+
return true
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
return false
|
|
900
|
+
}
|
|
901
|
+
|
|
670
902
|
func main() {
|
|
671
903
|
log.SetFlags(log.Ltime | log.Lshortfile)
|
|
672
904
|
|
|
@@ -779,8 +1011,26 @@ func validateConfig(cfg Config) error {
|
|
|
779
1011
|
if cfg.Endpoint == "" {
|
|
780
1012
|
return fmt.Errorf("endpoint 未设置")
|
|
781
1013
|
}
|
|
782
|
-
if
|
|
783
|
-
return
|
|
1014
|
+
if err := validateVoiceInfo("defaultVoice", cfg.DefaultVoice); err != nil {
|
|
1015
|
+
return err
|
|
1016
|
+
}
|
|
1017
|
+
for source, voice := range cfg.SourceVoices {
|
|
1018
|
+
if err := validateVoiceInfo(fmt.Sprintf("sourceVoices.%s", source), voice); err != nil {
|
|
1019
|
+
return err
|
|
1020
|
+
}
|
|
1021
|
+
}
|
|
1022
|
+
return nil
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
func validateVoiceInfo(name string, voice *VoiceInfo) error {
|
|
1026
|
+
if voice == nil {
|
|
1027
|
+
return fmt.Errorf("%s 未设置", name)
|
|
1028
|
+
}
|
|
1029
|
+
if voice.VoiceType == "" {
|
|
1030
|
+
return fmt.Errorf("%s.voice_type 未设置", name)
|
|
1031
|
+
}
|
|
1032
|
+
if voice.ResourceID == "" {
|
|
1033
|
+
return fmt.Errorf("%s.resourceId 未设置", name)
|
|
784
1034
|
}
|
|
785
1035
|
return nil
|
|
786
1036
|
}
|
|
@@ -809,13 +1059,24 @@ func handleConnection(conn net.Conn, engine *TaskEngine) {
|
|
|
809
1059
|
defer conn.Close()
|
|
810
1060
|
|
|
811
1061
|
cfg := loadConfig()
|
|
1062
|
+
if err := validateConfig(cfg); err != nil {
|
|
1063
|
+
log.Printf("配置错误,跳过本次播报: %v", err)
|
|
1064
|
+
return
|
|
1065
|
+
}
|
|
812
1066
|
|
|
813
1067
|
var sb strings.Builder
|
|
814
1068
|
scanner := bufio.NewScanner(conn)
|
|
815
1069
|
scanner.Buffer(make([]byte, 1*1024*1024), 1*1024*1024)
|
|
816
1070
|
for scanner.Scan() {
|
|
1071
|
+
if sb.Len() > 0 {
|
|
1072
|
+
sb.WriteByte('\n')
|
|
1073
|
+
}
|
|
817
1074
|
sb.WriteString(scanner.Text())
|
|
818
1075
|
}
|
|
1076
|
+
if err := scanner.Err(); err != nil {
|
|
1077
|
+
log.Printf("读取 socket 消息失败: %v", err)
|
|
1078
|
+
return
|
|
1079
|
+
}
|
|
819
1080
|
|
|
820
1081
|
text := strings.TrimSpace(sb.String())
|
|
821
1082
|
if text == "" {
|
package/package.json
CHANGED