@xdfnet/ispeak 1.6.5 → 1.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Docs/ARCHITECTURE.md +1 -1
- package/Docs/HOOK_TEXT_EXTRACTION.md +241 -0
- package/README.md +17 -14
- package/clean_text.go +153 -0
- package/configs/config.example.json +1 -1
- package/configs/hook-speak.sh +173 -96
- package/main.go +70 -379
- package/npm/postinstall.js +41 -2
- package/package.json +4 -2
- package/scripts/ispeak +1 -1
package/main.go
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// ttsd — 独立 TTS 播报守护进程
|
|
2
|
-
// 监听 Unix Socket,收到文本 → 字节跳动 TTS SSE →
|
|
2
|
+
// 监听 Unix Socket,收到文本 → 字节跳动 TTS SSE/PCM → 原生流式播放
|
|
3
3
|
package main
|
|
4
4
|
|
|
5
5
|
import (
|
|
@@ -14,10 +14,8 @@ import (
|
|
|
14
14
|
"net"
|
|
15
15
|
"net/http"
|
|
16
16
|
"os"
|
|
17
|
-
"os/exec"
|
|
18
17
|
"os/signal"
|
|
19
18
|
"path/filepath"
|
|
20
|
-
"regexp"
|
|
21
19
|
"strings"
|
|
22
20
|
"sync"
|
|
23
21
|
"syscall"
|
|
@@ -43,162 +41,12 @@ var tempDir string
|
|
|
43
41
|
|
|
44
42
|
var errAlreadyRunning = errors.New("iSpeak already running")
|
|
45
43
|
|
|
46
|
-
var (
|
|
47
|
-
markdownLinkRe = regexp.MustCompile(`\[[^\]]+\]\(([^)]*)\)`)
|
|
48
|
-
absolutePathRe = regexp.MustCompile(`/(?:Users|private|tmp|var|opt|usr|bin|sbin|etc|Library|Applications)/\S+`)
|
|
49
|
-
commitHashRe = regexp.MustCompile(`\b[0-9a-f]{7,40}\b`)
|
|
50
|
-
uuidRe = regexp.MustCompile(`\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b`)
|
|
51
|
-
urlRe = regexp.MustCompile(`https?://\S+`)
|
|
52
|
-
ansiEscapeRe = regexp.MustCompile(`\x1b\[[0-9;]*[A-Za-z]`)
|
|
53
|
-
multiSpaceRe = regexp.MustCompile(`\s+`)
|
|
54
|
-
markdownListRe = regexp.MustCompile(`^\s*(?:[-*+]\s+|\d+[.)]\s+)`)
|
|
55
|
-
htmlTagRe = regexp.MustCompile(`<[^>]+>`)
|
|
56
|
-
codeFenceStartRe = regexp.MustCompile("^```")
|
|
57
|
-
artifactStartRe = regexp.MustCompile(`(?i)^<artifact\b`)
|
|
58
|
-
htmlDocumentLineRe = regexp.MustCompile(`(?i)^<!doctype html|^<html\b|^<head\b|^<body\b|^<style\b|^</`)
|
|
59
|
-
progressNoiseRe = regexp.MustCompile(`(?i)(^\s*\d{1,3}%\s*$|\d{1,3}%.*\d+(?:\.\d+)?\s*(?:kb|mb|gb)/s|\bETA\b|^\s*[-=]{3,}\s*$)`)
|
|
60
|
-
speedNoiseRe = regexp.MustCompile(`(?i)\d+(?:\.\d+)?\s*(?:kb|mb|gb)/s`)
|
|
61
|
-
etaNoiseRe = regexp.MustCompile(`(?i)\bETA\b|预计剩余|剩余时间`)
|
|
62
|
-
fileListNoiseRe = regexp.MustCompile(`(?i)\.(?:go|js|ts|tsx|jsx|json|md|yaml|yml|toml|sum|mod|lock|html|css|sh|plist|safetensors|mp3|wav|png|jpg|jpeg|pdf|docx)\b`)
|
|
63
|
-
)
|
|
64
|
-
|
|
65
44
|
type StreamPlayer interface {
|
|
66
45
|
Write(audio []byte) error
|
|
67
46
|
CloseAndWait() error
|
|
68
47
|
Abort() error
|
|
69
48
|
}
|
|
70
49
|
|
|
71
|
-
type ffplayStreamPlayer struct {
|
|
72
|
-
path string
|
|
73
|
-
cmd *exec.Cmd
|
|
74
|
-
|
|
75
|
-
mu sync.Mutex
|
|
76
|
-
stdin io.WriteCloser
|
|
77
|
-
waitOnce sync.Once
|
|
78
|
-
waitErr error
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
func newDefaultStreamPlayer() (StreamPlayer, error) {
|
|
82
|
-
if path, ok := findExecutable("ffplay", "/opt/homebrew/bin/ffplay", "/usr/local/bin/ffplay"); ok {
|
|
83
|
-
log.Printf("播放器模式: ffplay 流式 stdin (%s)", path)
|
|
84
|
-
return newFFplayStreamPlayer(path)
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
log.Printf("播放器模式: afplay 完整音频 fallback")
|
|
88
|
-
return &bufferedStreamPlayer{}, nil
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
func findExecutable(name string, candidates ...string) (string, bool) {
|
|
92
|
-
if path, err := exec.LookPath(name); err == nil {
|
|
93
|
-
return path, true
|
|
94
|
-
}
|
|
95
|
-
for _, path := range candidates {
|
|
96
|
-
if st, err := os.Stat(path); err == nil && !st.IsDir() && st.Mode()&0111 != 0 {
|
|
97
|
-
return path, true
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
return "", false
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
func newFFplayStreamPlayer(path string) (*ffplayStreamPlayer, error) {
|
|
104
|
-
cmd := exec.Command(path, "-nodisp", "-autoexit", "-loglevel", "error", "-i", "pipe:0")
|
|
105
|
-
stdin, err := cmd.StdinPipe()
|
|
106
|
-
if err != nil {
|
|
107
|
-
return nil, err
|
|
108
|
-
}
|
|
109
|
-
cmd.Stderr = os.Stderr
|
|
110
|
-
if err := cmd.Start(); err != nil {
|
|
111
|
-
_ = stdin.Close()
|
|
112
|
-
return nil, err
|
|
113
|
-
}
|
|
114
|
-
return &ffplayStreamPlayer{path: path, cmd: cmd, stdin: stdin}, nil
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
func (p *ffplayStreamPlayer) Write(audio []byte) error {
|
|
118
|
-
if len(audio) == 0 {
|
|
119
|
-
return nil
|
|
120
|
-
}
|
|
121
|
-
p.mu.Lock()
|
|
122
|
-
stdin := p.stdin
|
|
123
|
-
p.mu.Unlock()
|
|
124
|
-
if stdin == nil {
|
|
125
|
-
return fmt.Errorf("播放器输入已关闭")
|
|
126
|
-
}
|
|
127
|
-
if _, err := stdin.Write(audio); err != nil {
|
|
128
|
-
return fmt.Errorf("写入播放器失败: %w", err)
|
|
129
|
-
}
|
|
130
|
-
return nil
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
func (p *ffplayStreamPlayer) CloseAndWait() error {
|
|
134
|
-
p.mu.Lock()
|
|
135
|
-
stdin := p.stdin
|
|
136
|
-
p.stdin = nil
|
|
137
|
-
p.mu.Unlock()
|
|
138
|
-
if stdin != nil {
|
|
139
|
-
if err := stdin.Close(); err != nil {
|
|
140
|
-
return fmt.Errorf("关闭播放器输入失败: %w", err)
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
if err := p.wait(); err != nil {
|
|
144
|
-
return fmt.Errorf("ffplay failed: %w", err)
|
|
145
|
-
}
|
|
146
|
-
return nil
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
func (p *ffplayStreamPlayer) Abort() error {
|
|
150
|
-
p.mu.Lock()
|
|
151
|
-
stdin := p.stdin
|
|
152
|
-
p.stdin = nil
|
|
153
|
-
p.mu.Unlock()
|
|
154
|
-
if stdin != nil {
|
|
155
|
-
_ = stdin.Close()
|
|
156
|
-
}
|
|
157
|
-
if p.cmd != nil && p.cmd.Process != nil {
|
|
158
|
-
_ = p.cmd.Process.Kill()
|
|
159
|
-
}
|
|
160
|
-
return p.wait()
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
func (p *ffplayStreamPlayer) wait() error {
|
|
164
|
-
p.waitOnce.Do(func() {
|
|
165
|
-
if p.cmd != nil {
|
|
166
|
-
p.waitErr = p.cmd.Wait()
|
|
167
|
-
}
|
|
168
|
-
})
|
|
169
|
-
return p.waitErr
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
type bufferedStreamPlayer struct {
|
|
173
|
-
chunks [][]byte
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
func (p *bufferedStreamPlayer) Write(audio []byte) error {
|
|
177
|
-
if len(audio) == 0 {
|
|
178
|
-
return nil
|
|
179
|
-
}
|
|
180
|
-
chunk := append([]byte(nil), audio...)
|
|
181
|
-
p.chunks = append(p.chunks, chunk)
|
|
182
|
-
return nil
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
func (p *bufferedStreamPlayer) CloseAndWait() error {
|
|
186
|
-
total := 0
|
|
187
|
-
for _, chunk := range p.chunks {
|
|
188
|
-
total += len(chunk)
|
|
189
|
-
}
|
|
190
|
-
audio := make([]byte, 0, total)
|
|
191
|
-
for _, chunk := range p.chunks {
|
|
192
|
-
audio = append(audio, chunk...)
|
|
193
|
-
}
|
|
194
|
-
return playAudio(audio)
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
func (p *bufferedStreamPlayer) Abort() error {
|
|
198
|
-
p.chunks = nil
|
|
199
|
-
return nil
|
|
200
|
-
}
|
|
201
|
-
|
|
202
50
|
// 任务状态
|
|
203
51
|
// 生命周期:pending -> running -> delete
|
|
204
52
|
type TaskStatus int
|
|
@@ -332,13 +180,13 @@ func (e *TaskEngine) runTransaction(task *Task) error {
|
|
|
332
180
|
|
|
333
181
|
if err := e.synthesizeStreamFn(context.Background(), task.Cfg, task.Text, &task.Voice, onAudio); err != nil {
|
|
334
182
|
_ = player.Abort()
|
|
335
|
-
return err
|
|
183
|
+
return fmt.Errorf("TTS 合成失败: id=%d: %w", task.ID, err)
|
|
336
184
|
}
|
|
337
185
|
log.Printf("TTS 流结束: id=%d elapsed=%s", task.ID, time.Since(startedAt).Round(time.Millisecond))
|
|
338
186
|
|
|
339
187
|
if err := player.CloseAndWait(); err != nil {
|
|
340
188
|
_ = player.Abort()
|
|
341
|
-
return err
|
|
189
|
+
return fmt.Errorf("播放器失败: id=%d: %w", task.ID, err)
|
|
342
190
|
}
|
|
343
191
|
return nil
|
|
344
192
|
}
|
|
@@ -463,7 +311,7 @@ func loadConfig() Config {
|
|
|
463
311
|
// 回退到环境变量
|
|
464
312
|
return Config{
|
|
465
313
|
APIKey: envOrDefault("IAGENT_TTS_API_KEY", ""),
|
|
466
|
-
Endpoint: envOrDefault("IAGENT_TTS_ENDPOINT", "https://openspeech.bytedance.com/api/v3/tts/unidirectional"),
|
|
314
|
+
Endpoint: envOrDefault("IAGENT_TTS_ENDPOINT", "https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse"),
|
|
467
315
|
}
|
|
468
316
|
}
|
|
469
317
|
|
|
@@ -496,28 +344,6 @@ type ttsAudioParams struct {
|
|
|
496
344
|
SampleRate int `json:"sample_rate"`
|
|
497
345
|
}
|
|
498
346
|
|
|
499
|
-
// 调用字节跳动 TTS API,返回完整 MP3 音频数据。保留给测试和 fallback 使用。
|
|
500
|
-
func synthesize(ctx context.Context, cfg Config, text string, voice *VoiceInfo) ([]byte, error) {
|
|
501
|
-
var chunks [][]byte
|
|
502
|
-
if err := synthesizeStream(ctx, cfg, text, voice, func(audio []byte) error {
|
|
503
|
-
chunk := append([]byte(nil), audio...)
|
|
504
|
-
chunks = append(chunks, chunk)
|
|
505
|
-
return nil
|
|
506
|
-
}); err != nil {
|
|
507
|
-
return nil, err
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
total := 0
|
|
511
|
-
for _, c := range chunks {
|
|
512
|
-
total += len(c)
|
|
513
|
-
}
|
|
514
|
-
result := make([]byte, 0, total)
|
|
515
|
-
for _, c := range chunks {
|
|
516
|
-
result = append(result, c...)
|
|
517
|
-
}
|
|
518
|
-
return result, nil
|
|
519
|
-
}
|
|
520
|
-
|
|
521
347
|
// 调用字节跳动 TTS API,边解析 SSE 边回调 MP3 音频块
|
|
522
348
|
func synthesizeStream(ctx context.Context, cfg Config, text string, voice *VoiceInfo, onAudio func([]byte) error) error {
|
|
523
349
|
speaker := voice.VoiceType
|
|
@@ -532,8 +358,8 @@ func synthesizeStream(ctx context.Context, cfg Config, text string, voice *Voice
|
|
|
532
358
|
Text: text,
|
|
533
359
|
Speaker: speaker,
|
|
534
360
|
AudioParams: ttsAudioParams{
|
|
535
|
-
Format: "
|
|
536
|
-
SampleRate:
|
|
361
|
+
Format: "pcm",
|
|
362
|
+
SampleRate: 48000,
|
|
537
363
|
},
|
|
538
364
|
},
|
|
539
365
|
}
|
|
@@ -590,8 +416,7 @@ func parseSSE(r io.Reader) ([]byte, error) {
|
|
|
590
416
|
|
|
591
417
|
func parseSSEStream(r io.Reader, onAudio func([]byte) error) error {
|
|
592
418
|
audioChunks := 0
|
|
593
|
-
|
|
594
|
-
scanner.Buffer(make([]byte, 256*1024), 256*1024)
|
|
419
|
+
reader := bufio.NewReaderSize(r, 64*1024)
|
|
595
420
|
|
|
596
421
|
var dataLines []string
|
|
597
422
|
|
|
@@ -608,40 +433,49 @@ func parseSSEStream(r io.Reader, onAudio func([]byte) error) error {
|
|
|
608
433
|
return err
|
|
609
434
|
}
|
|
610
435
|
|
|
611
|
-
for
|
|
612
|
-
|
|
436
|
+
for {
|
|
437
|
+
rawLine, err := reader.ReadString('\n')
|
|
438
|
+
if err != nil && len(rawLine) == 0 {
|
|
439
|
+
if err == io.EOF {
|
|
440
|
+
break
|
|
441
|
+
}
|
|
442
|
+
return fmt.Errorf("read sse: %w", err)
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
line := strings.TrimSpace(rawLine)
|
|
613
446
|
if line == "" {
|
|
614
447
|
if err := flush(); err != nil {
|
|
615
448
|
return err
|
|
616
449
|
}
|
|
617
|
-
|
|
618
|
-
}
|
|
619
|
-
if strings.HasPrefix(line, ":") || strings.HasPrefix(line, "event:") ||
|
|
450
|
+
} else if strings.HasPrefix(line, ":") || strings.HasPrefix(line, "event:") ||
|
|
620
451
|
strings.HasPrefix(line, "id:") || strings.HasPrefix(line, "retry:") {
|
|
621
|
-
|
|
622
|
-
}
|
|
623
|
-
if strings.HasPrefix(line, "data:") {
|
|
452
|
+
// SSE metadata, ignored.
|
|
453
|
+
} else if strings.HasPrefix(line, "data:") {
|
|
624
454
|
dataLines = append(dataLines, strings.TrimPrefix(line, "data:"))
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
455
|
+
} else {
|
|
456
|
+
// 非标准 JSON 直出
|
|
457
|
+
if err := flush(); err != nil {
|
|
458
|
+
return err
|
|
459
|
+
}
|
|
460
|
+
ok, err := processEvent(line, onAudio)
|
|
461
|
+
if ok {
|
|
462
|
+
audioChunks++
|
|
463
|
+
}
|
|
464
|
+
if err != nil {
|
|
465
|
+
return err
|
|
466
|
+
}
|
|
634
467
|
}
|
|
468
|
+
|
|
635
469
|
if err != nil {
|
|
636
|
-
|
|
470
|
+
if err == io.EOF {
|
|
471
|
+
break
|
|
472
|
+
}
|
|
473
|
+
return fmt.Errorf("read sse: %w", err)
|
|
637
474
|
}
|
|
638
475
|
}
|
|
639
476
|
if err := flush(); err != nil {
|
|
640
477
|
return err
|
|
641
478
|
}
|
|
642
|
-
if err := scanner.Err(); err != nil {
|
|
643
|
-
return fmt.Errorf("scan: %w", err)
|
|
644
|
-
}
|
|
645
479
|
|
|
646
480
|
if audioChunks == 0 {
|
|
647
481
|
return fmt.Errorf("no audio data")
|
|
@@ -661,6 +495,10 @@ func processEvent(payload string, onAudio func([]byte) error) (bool, error) {
|
|
|
661
495
|
return false, nil
|
|
662
496
|
}
|
|
663
497
|
|
|
498
|
+
if err := sseEventError(event); err != nil {
|
|
499
|
+
return false, err
|
|
500
|
+
}
|
|
501
|
+
|
|
664
502
|
if b64 := extractAudioBase64(event); b64 != "" {
|
|
665
503
|
data, err := base64.StdEncoding.DecodeString(b64)
|
|
666
504
|
if err != nil {
|
|
@@ -675,6 +513,35 @@ func processEvent(payload string, onAudio func([]byte) error) (bool, error) {
|
|
|
675
513
|
return false, nil
|
|
676
514
|
}
|
|
677
515
|
|
|
516
|
+
func sseEventError(event map[string]any) error {
|
|
517
|
+
codeValue, ok := event["code"]
|
|
518
|
+
if !ok {
|
|
519
|
+
return nil
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
var code int64
|
|
523
|
+
switch v := codeValue.(type) {
|
|
524
|
+
case float64:
|
|
525
|
+
code = int64(v)
|
|
526
|
+
case int:
|
|
527
|
+
code = int64(v)
|
|
528
|
+
case int64:
|
|
529
|
+
code = v
|
|
530
|
+
default:
|
|
531
|
+
return nil
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
if code == 0 || code == 20000000 {
|
|
535
|
+
return nil
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
message, _ := event["message"].(string)
|
|
539
|
+
if message == "" {
|
|
540
|
+
message = "unknown error"
|
|
541
|
+
}
|
|
542
|
+
return fmt.Errorf("tts sse error: code=%d message=%s", code, message)
|
|
543
|
+
}
|
|
544
|
+
|
|
678
545
|
func extractAudioBase64(event map[string]any) string {
|
|
679
546
|
for _, key := range []string{"data", "audio", "audio_data"} {
|
|
680
547
|
if v, ok := event[key].(string); ok && v != "" {
|
|
@@ -691,167 +558,6 @@ func extractAudioBase64(event map[string]any) string {
|
|
|
691
558
|
return ""
|
|
692
559
|
}
|
|
693
560
|
|
|
694
|
-
// 过滤格式符号,保留自然朗读文本。
|
|
695
|
-
// 顺序很重要:先跳过跨行块结构,再跳过整行噪声,最后清理行内符号。
|
|
696
|
-
func cleanText(text string) string {
|
|
697
|
-
var lines []string
|
|
698
|
-
rawLines := strings.Split(text, "\n")
|
|
699
|
-
inCodeBlock := false
|
|
700
|
-
inArtifact := false
|
|
701
|
-
inMarkdownTable := false
|
|
702
|
-
for i := 0; i < len(rawLines); i++ {
|
|
703
|
-
line := rawLines[i]
|
|
704
|
-
line = strings.TrimSpace(line)
|
|
705
|
-
if line == "" {
|
|
706
|
-
inMarkdownTable = false
|
|
707
|
-
continue
|
|
708
|
-
}
|
|
709
|
-
if codeFenceStartRe.MatchString(line) {
|
|
710
|
-
inCodeBlock = !inCodeBlock
|
|
711
|
-
continue
|
|
712
|
-
}
|
|
713
|
-
if inCodeBlock {
|
|
714
|
-
continue
|
|
715
|
-
}
|
|
716
|
-
if artifactStartRe.MatchString(line) {
|
|
717
|
-
inArtifact = !strings.Contains(strings.ToLower(line), "</artifact>")
|
|
718
|
-
continue
|
|
719
|
-
}
|
|
720
|
-
if inArtifact {
|
|
721
|
-
if strings.Contains(strings.ToLower(line), "</artifact>") {
|
|
722
|
-
inArtifact = false
|
|
723
|
-
}
|
|
724
|
-
continue
|
|
725
|
-
}
|
|
726
|
-
if isMarkdownTableSeparator(line) {
|
|
727
|
-
if len(lines) > 0 && isMarkdownTableRow(strings.TrimSpace(rawLines[i-1])) {
|
|
728
|
-
lines = lines[:len(lines)-1]
|
|
729
|
-
}
|
|
730
|
-
inMarkdownTable = true
|
|
731
|
-
continue
|
|
732
|
-
}
|
|
733
|
-
if inMarkdownTable {
|
|
734
|
-
if isMarkdownTableRow(line) {
|
|
735
|
-
continue
|
|
736
|
-
}
|
|
737
|
-
inMarkdownTable = false
|
|
738
|
-
}
|
|
739
|
-
if shouldSkipSpeechLine(line) {
|
|
740
|
-
continue
|
|
741
|
-
}
|
|
742
|
-
|
|
743
|
-
cleaned := cleanSpeechLine(line)
|
|
744
|
-
if cleaned != "" {
|
|
745
|
-
lines = append(lines, cleaned)
|
|
746
|
-
}
|
|
747
|
-
}
|
|
748
|
-
return strings.Join(lines, ",")
|
|
749
|
-
}
|
|
750
|
-
|
|
751
|
-
func shouldSkipSpeechLine(line string) bool {
|
|
752
|
-
if isMarkdownTableSeparator(line) {
|
|
753
|
-
return true
|
|
754
|
-
}
|
|
755
|
-
if strings.HasPrefix(line, "---") && strings.Count(line, "-") > 3 {
|
|
756
|
-
return true
|
|
757
|
-
}
|
|
758
|
-
if htmlDocumentLineRe.MatchString(line) {
|
|
759
|
-
return true
|
|
760
|
-
}
|
|
761
|
-
if isProgressNoiseLine(line) {
|
|
762
|
-
return true
|
|
763
|
-
}
|
|
764
|
-
if isMostlyTableRow(line) {
|
|
765
|
-
return true
|
|
766
|
-
}
|
|
767
|
-
if isMostlyFileListLine(line) {
|
|
768
|
-
return true
|
|
769
|
-
}
|
|
770
|
-
return false
|
|
771
|
-
}
|
|
772
|
-
|
|
773
|
-
func isMarkdownTableSeparator(line string) bool {
|
|
774
|
-
line = strings.TrimSpace(line)
|
|
775
|
-
return strings.Contains(line, "|") && strings.Trim(line, "|-: ") == ""
|
|
776
|
-
}
|
|
777
|
-
|
|
778
|
-
func isMarkdownTableRow(line string) bool {
|
|
779
|
-
line = strings.TrimSpace(line)
|
|
780
|
-
return strings.Count(line, "|") >= 2
|
|
781
|
-
}
|
|
782
|
-
|
|
783
|
-
func cleanSpeechLine(line string) string {
|
|
784
|
-
// Markdown 链接必须在 URL 删除前处理,否则会丢掉链接标题。
|
|
785
|
-
line = ansiEscapeRe.ReplaceAllString(line, "")
|
|
786
|
-
line = markdownListRe.ReplaceAllString(line, "")
|
|
787
|
-
line = markdownLinkRe.ReplaceAllStringFunc(line, func(match string) string {
|
|
788
|
-
if end := strings.Index(match, "]"); end > 1 {
|
|
789
|
-
return match[1:end]
|
|
790
|
-
}
|
|
791
|
-
return ""
|
|
792
|
-
})
|
|
793
|
-
line = urlRe.ReplaceAllString(line, "")
|
|
794
|
-
line = absolutePathRe.ReplaceAllString(line, " 路径 ")
|
|
795
|
-
// UUID 必须在短 hash 前处理,避免先删短片段后破坏 UUID 识别。
|
|
796
|
-
line = uuidRe.ReplaceAllString(line, "")
|
|
797
|
-
line = commitHashRe.ReplaceAllString(line, "")
|
|
798
|
-
line = htmlTagRe.ReplaceAllString(line, "")
|
|
799
|
-
line = strings.NewReplacer(
|
|
800
|
-
"**", "",
|
|
801
|
-
"*", "",
|
|
802
|
-
"`", "",
|
|
803
|
-
"#", "",
|
|
804
|
-
">", "",
|
|
805
|
-
"✅", "",
|
|
806
|
-
"❌", "",
|
|
807
|
-
"✓", "",
|
|
808
|
-
"✗", "",
|
|
809
|
-
"→", "到",
|
|
810
|
-
).Replace(line)
|
|
811
|
-
line = strings.Trim(line, " \t-:|")
|
|
812
|
-
line = multiSpaceRe.ReplaceAllString(line, " ")
|
|
813
|
-
return strings.TrimSpace(line)
|
|
814
|
-
}
|
|
815
|
-
|
|
816
|
-
func isMostlyTableRow(line string) bool {
|
|
817
|
-
if !strings.Contains(line, "|") {
|
|
818
|
-
return false
|
|
819
|
-
}
|
|
820
|
-
return strings.Count(line, "|") >= 2 && len([]rune(line)) > 40
|
|
821
|
-
}
|
|
822
|
-
|
|
823
|
-
func isProgressNoiseLine(line string) bool {
|
|
824
|
-
if !progressNoiseRe.MatchString(line) {
|
|
825
|
-
return false
|
|
826
|
-
}
|
|
827
|
-
if speedNoiseRe.MatchString(line) || etaNoiseRe.MatchString(line) {
|
|
828
|
-
return true
|
|
829
|
-
}
|
|
830
|
-
return !containsCJK(line)
|
|
831
|
-
}
|
|
832
|
-
|
|
833
|
-
func isMostlyFileListLine(line string) bool {
|
|
834
|
-
if !fileListNoiseRe.MatchString(line) {
|
|
835
|
-
return false
|
|
836
|
-
}
|
|
837
|
-
if containsCJK(line) {
|
|
838
|
-
return false
|
|
839
|
-
}
|
|
840
|
-
if strings.Contains(line, ".safetensors") {
|
|
841
|
-
return true
|
|
842
|
-
}
|
|
843
|
-
return strings.Count(line, ".") >= 2 || strings.Contains(line, "/") || strings.Contains(line, " - ")
|
|
844
|
-
}
|
|
845
|
-
|
|
846
|
-
func containsCJK(s string) bool {
|
|
847
|
-
for _, r := range s {
|
|
848
|
-
if r >= '\u4e00' && r <= '\u9fff' {
|
|
849
|
-
return true
|
|
850
|
-
}
|
|
851
|
-
}
|
|
852
|
-
return false
|
|
853
|
-
}
|
|
854
|
-
|
|
855
561
|
func main() {
|
|
856
562
|
log.SetFlags(log.Ltime | log.Lshortfile)
|
|
857
563
|
|
|
@@ -988,21 +694,6 @@ func validateVoiceInfo(name string, voice *VoiceInfo) error {
|
|
|
988
694
|
return nil
|
|
989
695
|
}
|
|
990
696
|
|
|
991
|
-
func playAudio(data []byte) error {
|
|
992
|
-
tmpFile := filepath.Join(tempDir, fmt.Sprintf("ttsd-%d.mp3", time.Now().UnixNano()))
|
|
993
|
-
if err := os.WriteFile(tmpFile, data, 0644); err != nil {
|
|
994
|
-
return fmt.Errorf("写入临时文件失败: %w", err)
|
|
995
|
-
}
|
|
996
|
-
defer os.Remove(tmpFile)
|
|
997
|
-
|
|
998
|
-
cmd := exec.Command("/usr/bin/afplay", tmpFile)
|
|
999
|
-
log.Printf("播放开始: %s", filepath.Base(tmpFile))
|
|
1000
|
-
if err := cmd.Run(); err != nil {
|
|
1001
|
-
return fmt.Errorf("播放失败: %w", err)
|
|
1002
|
-
}
|
|
1003
|
-
return nil
|
|
1004
|
-
}
|
|
1005
|
-
|
|
1006
697
|
func handleConnection(conn net.Conn, engine *TaskEngine) {
|
|
1007
698
|
defer func() {
|
|
1008
699
|
if r := recover(); r != nil {
|
package/npm/postinstall.js
CHANGED
|
@@ -61,6 +61,43 @@ function copyIfMissing(src, dst, mode) {
|
|
|
61
61
|
console.log(`配置文件已创建: ${dst}`);
|
|
62
62
|
}
|
|
63
63
|
|
|
64
|
+
function migrateDefaultEndpoint(configPath) {
|
|
65
|
+
if (!fs.existsSync(configPath)) {
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
const oldEndpoint = "https://openspeech.bytedance.com/api/v3/tts/unidirectional";
|
|
69
|
+
const newEndpoint = "https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse";
|
|
70
|
+
let config;
|
|
71
|
+
try {
|
|
72
|
+
config = JSON.parse(fs.readFileSync(configPath, "utf8"));
|
|
73
|
+
} catch (_) {
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
if (config.endpoint !== oldEndpoint) {
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
fs.copyFileSync(configPath, `${configPath}.bak`);
|
|
80
|
+
config.endpoint = newEndpoint;
|
|
81
|
+
fs.writeFileSync(configPath, `${JSON.stringify(config, null, 2)}\n`);
|
|
82
|
+
console.log(`配置 endpoint 已迁移到 SSE,旧配置备份: ${configPath}.bak`);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function installHook(src, dst) {
|
|
86
|
+
if (fs.existsSync(dst)) {
|
|
87
|
+
try {
|
|
88
|
+
if (fs.readFileSync(src, "utf8") !== fs.readFileSync(dst, "utf8")) {
|
|
89
|
+
fs.copyFileSync(dst, `${dst}.bak`);
|
|
90
|
+
console.log(`旧 Hook 已备份: ${dst}.bak`);
|
|
91
|
+
}
|
|
92
|
+
} catch (_) {
|
|
93
|
+
fs.copyFileSync(dst, `${dst}.bak`);
|
|
94
|
+
console.log(`旧 Hook 已备份: ${dst}.bak`);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
copyExecutable(src, dst);
|
|
98
|
+
console.log(`Hook 脚本已安装: ${dst}`);
|
|
99
|
+
}
|
|
100
|
+
|
|
64
101
|
function sleep(ms) {
|
|
65
102
|
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
|
|
66
103
|
}
|
|
@@ -106,8 +143,10 @@ function main() {
|
|
|
106
143
|
symlinkForce(cliPath, path.join(binDir, "ispeak-claude"));
|
|
107
144
|
symlinkForce(cliPath, path.join(binDir, "ispeak-codex"));
|
|
108
145
|
|
|
109
|
-
|
|
110
|
-
copyIfMissing(path.join(root, "configs", "
|
|
146
|
+
const configPath = path.join(configDir, "config.json");
|
|
147
|
+
copyIfMissing(path.join(root, "configs", "config.example.json"), configPath);
|
|
148
|
+
migrateDefaultEndpoint(configPath);
|
|
149
|
+
installHook(path.join(root, "configs", "hook-speak.sh"), path.join(configDir, "hook-speak.sh"));
|
|
111
150
|
|
|
112
151
|
const plist = fs
|
|
113
152
|
.readFileSync(path.join(root, "configs", "com.iSpeak.plist"), "utf8")
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@xdfnet/ispeak",
|
|
3
|
-
"version": "1.6.
|
|
3
|
+
"version": "1.6.7",
|
|
4
4
|
"description": "Local macOS TTS daemon for AI coding assistants, powered by Volcengine streaming TTS.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"homepage": "https://github.com/xdfnet/iSpeak#readme",
|
|
@@ -26,14 +26,16 @@
|
|
|
26
26
|
"scripts": {
|
|
27
27
|
"build": "go build -ldflags=\"-s -w\" -o build/ispeakd .",
|
|
28
28
|
"test": "go test ./...",
|
|
29
|
+
"prepublishOnly": "make test",
|
|
29
30
|
"postinstall": "node npm/postinstall.js",
|
|
30
31
|
"pack:dry-run": "npm pack --dry-run"
|
|
31
32
|
},
|
|
32
33
|
"files": [
|
|
33
34
|
"main.go",
|
|
35
|
+
"clean_text.go",
|
|
34
36
|
"go.mod",
|
|
35
37
|
"go.sum",
|
|
36
|
-
"scripts/",
|
|
38
|
+
"scripts/ispeak",
|
|
37
39
|
"configs/",
|
|
38
40
|
"npm/",
|
|
39
41
|
"Docs/",
|