npm - @xdfnet/ispeak - Versions diffs - 1.6.15 → 1.7.0 - Mend

@xdfnet/ispeak 1.6.15 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/AGENTS.md +12 -19
package/Makefile +1 -1
package/README.md +9 -18
package/avaudioengine_player_darwin.go +0 -6
package/configs/hook-speak.sh +12 -192
package/docs/architecture.md +91 -117
package/docs/hook-text-extraction.md +68 -216
package/main.go +54 -45
package/package.json +1 -1
package/scripts/ispeak +1 -1

package/AGENTS.md CHANGED Viewed

@@ -28,11 +28,9 @@ make help       # 显示帮助
 ispeak (CLI, bash)
   └─ nc -U ~/.config/iSpeak/ispeak.sock
       └─ ispeakd (Go daemon)
-           ├─ Task Engine (任务仓库)
-           │    └─ pending FIFO
-           └─ transactionWorker (single)
-                └─ pending -> running -> delete
-                     └─ SSE PCM chunk -> AVAudioEngine
+           └─ Player (channel, buffer=1)
+                └─ loop goroutine: 单 AVAudioEngine 实例复用
+                     └─ SSE PCM chunk → AVAudioEngine
 ```
 - **Socket**: `~/.config/iSpeak/ispeak.sock`
@@ -42,7 +40,7 @@ ispeak (CLI, bash)
 ## 核心文件
-- `main.go` — 守护进程、任务引擎、TTS 流式请求、SSE 解析、流式播放
+- `main.go` — 守护进程、Player (channel 驱动)、TTS 流式请求、SSE 解析
 - `avaudioengine_player_darwin.go` — macOS 原生 `AVAudioEngine` PCM 播放器
 - `clean_text.go` — TTS 播报文本清洗
 - `main_test.go` — 任务引擎关键行为测试
@@ -62,18 +60,13 @@ CLI 与 daemon 通过 socket 传输原始文本，支持音色前缀：
 ## 任务策略（节省 TTS 费用）
 新消息到达时：
-1. 删除所有 `pending` 任务（未开始）
-2. 不打断当前 `running` 事务
-3. 创建新任务并进入 `pending`
-**任务状态流转：**
-```
-pending → running → delete
-```
+1. 丢弃 channel 中排队的旧消息
+2. 不打断当前正在合成/播放的任务
+3. 新消息入队
 ## 失败策略
-- 流式合成/播放失败：直接删除任务，不重试，避免重复播报
+- 流式合成/播放失败：日志记录，继续处理下一条，不重试
 ## 配置
@@ -93,11 +86,11 @@ pending → running → delete
 ## 稳定性设计
-- 单 transaction worker，合成与播放同链路，降低首播延迟
-- 关键 goroutine 有 `panic recover`
+- 单 Player goroutine，合成与播放同链路，降低首播延迟
+- AVAudioEngine 实例复用，避免重复初始化开销
+- Channel buffer=1 + drain，新消息自动丢弃旧排队消息
 - 配置热更新（mtime 缓存 + 自动重载）
 - TTS HTTP Client 复用，减少连接开销
 - 主链路使用 macOS 原生 `AVAudioEngine` 播放 PCM
-- 播放失败直接删除任务，不重试
+- 合成/播放失败直接跳过，不重试
 - 日志轮转，防止文件过大
-- 进程级 temp 目录，退出时自动清理

package/Makefile CHANGED Viewed

@@ -1,6 +1,6 @@
 .PHONY: build test pack release push install deploy uninstall clean help
-VERSION  := 1.6.9
+VERSION  := 1.7.0
 TAG      := v$(VERSION)
 NPM_PKG  := @xdfnet/ispeak
 BIN     := build/ispeakd

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # iSpeak
-![Version](https://img.shields.io/badge/version-1.6.9-blue)
+![Version](https://img.shields.io/badge/version-1.7.0-blue)
 [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![Go Version](https://img.shields.io/badge/Go-1.26-blue)](https://golang.org/dl/)
 ![Platform](https://img.shields.io/badge/platform-macOS-green)
@@ -20,8 +20,8 @@ ispeak "Pull request 已合并，3 个测试通过"
 | 问题 | 方案 |
 |------|------|
-| AI 生成多条回复，TTS 账单飞涨 | 新消息只保留最新待执行任务，避免无效合成 |
-| 回复快慢不一，音频播报乱序 | 单 transaction worker，FIFO 顺序稳定 |
+| AI 生成多条回复，TTS 账单飞涨 | 新消息丢弃旧排队消息，避免无效合成 |
+| 回复快慢不一，音频播报乱序 | 单 channel goroutine，串行顺序稳定 |
 | 修改配置要重启服务 | 热更新：编辑 `config.json` 立即生效 |
 | 默认音色太无聊 | hook 按来源前缀选择音色 |
@@ -33,7 +33,7 @@ ispeak "Pull request 已合并，3 个测试通过"
 npm i -g @xdfnet/ispeak
 ```
-当前 npm 安装会在本机编译 `ispeakd`，需要已安装 Go。主播放链路使用 macOS 原生 `AVAudioEngine`，不依赖 `ffmpeg`。失败时直接记录日志并删除任务。
+当前 npm 安装会在本机编译 `ispeakd`，需要已安装 Go。主播放链路使用 macOS 原生 `AVAudioEngine`，不依赖 `ffmpeg`。合成失败记录日志，播放器异常自动重建。
 **源码安装：**
@@ -61,26 +61,17 @@ ispeak "iSpeak 准备好了"
 │   通过 Unix Socket 接收文本                          │
 │         │                                            │
 │         ▼                                           │
-│   任务引擎                                           │
-│   （pending → running → delete）              │
+│   Player (channel)                                  │
+│   buffer=1 + drain（新消息丢弃旧排队消息）             │
 │         │                                            │
 │         ▼                                           │
-│   单 Worker 流式链路                                 │
-│   （SSE PCM chunk → AVAudioEngine）                 │
+│   TTS SSE → AVAudioEngine（单实例复用）              │
 │         │                                            │
 │         ▼                                           │
-│   错误处理                                          │
-│   （失败时记录日志并删除任务）                        │
+│   失败记录日志，播放器异常自动重建                    │
 └─────────────────────────────────────────────────────┘
 ```
-**任务状态流转：**
-```
-pending → running → delete
-```
-新消息到达时只清理未开始任务，不打断当前合成/播放；当前事务结束后再播最新消息。
 ## 语音清洗规则
 清洗只影响 TTS 播报内容，不改变 Claude/Codex 屏幕显示内容。
@@ -138,7 +129,7 @@ ispeak version   # 版本
 Claude Code 和 Codex 的详细 hook 配置见 [docs/hook-text-extraction.md](/Users/admin/iCode/iSpeak/docs/hook-text-extraction.md)。
-`hook-speak.sh` 会按 `turn_id` 做一次去重，所以同一回合不会播两次。
+`hook-speak.sh` 会自动跳过 Codex 遗留 notify 的 `agent-turn-complete` 事件，避免同一回合重复播报。
 ## 开发命令

package/avaudioengine_player_darwin.go CHANGED Viewed

@@ -252,12 +252,6 @@ func (p *avAudioEngineStreamPlayer) CloseAndWait() error {
 	return p.closeLocked()
 }
-func (p *avAudioEngineStreamPlayer) Abort() error {
-	p.mu.Lock()
-	defer p.mu.Unlock()
-	return p.closeLocked()
-}
 func (p *avAudioEngineStreamPlayer) writeChunk(data []byte) error {
 	if len(data) == 0 {
 		return nil

package/configs/hook-speak.sh CHANGED Viewed

@@ -1,15 +1,14 @@
 #!/bin/bash
 # Claude Code / Codex 共用播报 Hook：
-# 只取最后一条 assistant 回复，加 `{source:<name>}` 前缀后发给 ispeakd。
+# 取 last_assistant_message，加 {source:<name>} 前缀后发给 ispeakd。
+# Claude: payload.last_assistant_message (snake_case)
+# Codex:  payload["last-assistant-message"] (kebab-case)
 [[ "$ISPEAK_SKIP" == "1" ]] && exit 0
 SOURCE="${1:-claude}"
 SOCK="$HOME/.config/iSpeak/ispeak.sock"
 LOG="$HOME/.config/iSpeak/hook.log"
-STATE_FILE="$HOME/.config/iSpeak/hook.last"
-# Codex `notify` 会把 JSON 作为最后一个参数传入；
-# Claude/Claude 风格 Stop Hook 会把 JSON 写到 stdin。
 input="${2:-}"
 if [[ -z "$input" ]]; then
   input=$(cat)
@@ -18,183 +17,29 @@ input_file=$(mktemp)
 trap 'rm -f "$input_file"' EXIT
 printf "%s" "$input" > "$input_file"
-result=$(SOURCE="$SOURCE" HOOK_INPUT_FILE="$input_file" HOOK_STATE_FILE="$STATE_FILE" node <<'NODE' 2>/dev/null
+result=$(HOOK_INPUT_FILE="$input_file" node <<'NODE' 2>>"$LOG"
 const fs = require("fs");
-const crypto = require("crypto");
 (() => {
   const input = readFile(process.env.HOOK_INPUT_FILE || "");
   const payload = parseJSON(input) || {};
-  const source = process.env.SOURCE || "";
-  const stateFile = process.env.HOOK_STATE_FILE || "";
-  const result = source.startsWith("codex")
-    ? lastCodexAssistant(payload)
-    : lastClaudeAssistant(payload);
-  if (!result.text) {
-    return;
-  }
+  // Codex Stop hook 会在 agent-turn-complete 事件中重复触发，跳过
+  if (payload.type === "agent-turn-complete") return;
-  if (stateFile && result.turnId) {
-    if (isDuplicateTurn(stateFile, source, result.turnId)) {
-      return;
-    }
-    saveTurn(stateFile, source, result.turnId, result.text);
-  } else if (stateFile) {
-    saveTurn(stateFile, source, "", result.text);
-  }
+  const text = payload.last_assistant_message
+    || payload["last-assistant-message"]
+    || "";
-  process.stdout.write(result.text);
+  if (text) process.stdout.write(text);
 })();
-function lastClaudeAssistant(payload) {
-  const direct = firstString(payload.last_assistant_message, payload.message);
-  if (direct) return { text: direct, turnId: extractTurnId(payload) };
-  const transcript = firstString(payload.transcript_path, payload.transcriptPath);
-  return transcript ? lastClaudeTranscript(transcript, payload) : { text: "", turnId: extractTurnId(payload) };
-}
-function lastCodexAssistant(payload) {
-  const direct = firstString(
-    payload["last-assistant-message"],
-    payload.last_assistant_message,
-    payload.lastAssistantMessage,
-    payload.message,
-    payload.lastMessage
-  );
-  if (direct) return { text: direct, turnId: extractTurnId(payload) };
-  const transcript = firstString(
-    payload.transcript_path,
-    payload.transcriptPath,
-    payload["transcript-path"]
-  );
-  return transcript ? lastAssistantFromTranscript(transcript, "codex") : { text: "", turnId: extractTurnId(payload) };
-}
 function readFile(file) {
-  try {
-    return fs.readFileSync(file, "utf8");
-  } catch {
-    return "";
-  }
+  try { return fs.readFileSync(file, "utf8"); } catch { return ""; }
 }
 function parseJSON(text) {
-  try {
-    return JSON.parse(text);
-  } catch {
-    return null;
-  }
-}
-function firstString(...values) {
-  for (const value of values) {
-    if (typeof value === "string" && value !== "") return value;
-  }
-  return "";
-}
-function collectText(content) {
-  if (typeof content === "string") return content;
-  if (Array.isArray(content)) {
-    return content
-      .map(item => collectText(item))
-      .filter(Boolean)
-      .join(" ");
-  }
-  if (!content || typeof content !== "object") return "";
-  if (typeof content.text === "string") return content.text;
-  if (content.content) return collectText(content.content);
-  return "";
-}
-function lastClaudeTranscript(file, payload) {
-  const deadline = Date.now() + 5000;
-  let result = { text: "", turnId: extractTurnId(payload) };
-  while (Date.now() <= deadline) {
-    result = lastAssistantFromTranscript(file, "claude");
-    if (result.text) return result;
-    sleepMs(120);
-  }
-  return result;
-}
-function lastAssistantFromTranscript(file, source) {
-  let data = "";
-  try {
-    data = fs.readFileSync(file, "utf8");
-  } catch {
-    return "";
-  }
-  let last = "";
-  let turnId = "";
-  for (const line of data.split(/\r?\n/)) {
-    if (!line.trim()) continue;
-    const event = parseJSON(line);
-    if (!event) continue;
-    if (source === "claude") {
-      if (event.role === "assistant") {
-        last = collectText(event.content) || last;
-      }
-      if (event.message && event.message.role === "assistant") {
-        last = collectText(event.message.content) || last;
-      }
-    }
-    if (source === "codex" &&
-      event.type === "response_item" &&
-      event.payload &&
-      event.payload.type === "message" &&
-      event.payload.role === "assistant"
-    ) {
-      last = collectText(event.payload.content) || last;
-      turnId = turnId || extractTurnId(event) || extractTurnId(event.payload);
-    }
-  }
-  return { text: last, turnId };
-}
-function extractTurnId(payload) {
-  return firstString(
-    payload.turn_id,
-    payload.turnId,
-    payload["turn-id"]
-  );
-}
-function isDuplicateTurn(stateFile, source, turnId) {
-  const current = `${source}:${turnId}`;
-  try {
-    return fs.readFileSync(stateFile, "utf8").trim() === current;
-  } catch {
-    return false;
-  }
-}
-function saveTurn(stateFile, source, turnId, text) {
-  const current = `${source}:${turnId || textHash(text)}`;
-  try {
-    fs.mkdirSync(require("path").dirname(stateFile), { recursive: true });
-    fs.writeFileSync(stateFile, current, "utf8");
-  } catch {
-    // 去重失败不影响播报。
-  }
+  try { return JSON.parse(text); } catch { return null; }
 }
-function textHash(text) {
-  return crypto.createHash("sha1").update(text, "utf8").digest("hex");
-}
-function sleepMs(ms) {
-  Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
-}
 NODE
 )
@@ -203,31 +48,6 @@ if [[ "$ISPEAK_HOOK_PRINT_TEXT" == "1" ]]; then
   exit 0
 fi
-echo "=== $(date) ===" >> "$LOG"
-echo "SOURCE: $SOURCE" >> "$LOG"
-echo "TEXT_LEN: ${#result}" >> "$LOG"
-echo "PREVIEW: ${result:0:150}" >> "$LOG"
-# Claude Code Stop Hook 调试
-if [[ "$SOURCE" == "claude" && -n "$input" ]]; then
-  # 用 grep 提取 transcript_path
-  tp=$(echo "$input" | grep -o '"transcript_path":"[^"]*"' | head -1 | sed 's/"transcript_path":"//;s/"$//')
-  if [[ -n "$tp" ]]; then
-    echo "CLAUDE_TRANSCRIPT_PATH: $tp" >> "$LOG"
-    if [[ -f "$tp" ]]; then
-      echo "CLAUDE_TRANSCRIPT_EXISTS: yes" >> "$LOG"
-    else
-      echo "CLAUDE_TRANSCRIPT_EXISTS: no" >> "$LOG"
-    fi
-  else
-    echo "CLAUDE_TRANSCRIPT_PATH: none" >> "$LOG"
-    echo "CLAUDE_RAW: ${input:0:300}" >> "$LOG"
-  fi
-fi
 if [[ -n "$result" && -S "$SOCK" ]]; then
   printf "{source:%s}%s" "$SOURCE" "$result" | nc -U -w5 "$SOCK" 2>> "$LOG"
-  echo "SPOKE: OK" >> "$LOG"
-else
-  echo "SPOKE: SKIP" >> "$LOG"
 fi

package/docs/architecture.md CHANGED Viewed

@@ -2,179 +2,145 @@
 ## 概述
-iSpeak 是一个运行在 macOS 上的本地 TTS 播报守护进程，通过 Unix Socket 接收文本，调用火山引擎 TTS 流式 API，边合成边播放。
+iSpeak 是一个运行在 macOS 上的本地 TTS 播报守护进程，通过 Unix Socket 接收文本，调用火山引擎 TTS 流式 API，边合成边通过原生 AVAudioEngine 播放 PCM 音频。
-当前版本采用“任务仓库 + 单 transaction worker”流式链路：
-- transaction worker：领取待执行任务，SSE 每到一段音频就写入播放器 stdin
-- 播放器优先使用 `ffplay -i pipe:0`，没有 `ffplay` 时回退到完整音频 `afplay`
+核心链路：**Socket → Player (channel) → TTS SSE → AVAudioEngine**
 ## 系统架构
 ```
 ┌─────────────────────────────────────────────────────────────┐
 │                         客户端                              │
-│  ispeak (bash CLI)  ──nc -U──>  ~/.config/iSpeak/ispeak.sock │
+│         nc -U  ─────────>  ~/.config/iSpeak/ispeak.sock      │
+│         ispeak "文本"      (Unix Socket)                    │
 └─────────────────────────────────────────────────────────────┘
                                     │
                                     ▼
 ┌─────────────────────────────────────────────────────────────┐
-│                     ispeakd (Go Daemon)                    │
+│                     ispeakd (Go Daemon)                      │
 │                                                             │
-│  Socket Acceptor                                            │
-│    - net.Listener.Accept()                                  │
-│    - 每个连接读取文本并提交任务                               │
+│  Socket Acceptor (handleConnection)                         │
+│    - 读文本 → 解析 {source:xxx} → 选音色 → cleanText → 提交 │
 │                                                             │
-│  Task Engine                                                │
+│  Player (channel 驱动)                                      │
 │  ┌───────────────────────────────────────────────────────┐  │
-│  │  Task Repository (in-memory)                         │  │
-│  │  - tasks: map[uint64]*Task                           │  │
-│  │  - pending: []uint64 (FIFO)                          │  │
+│  │  chan job (buffer=1)                                  │  │
+│  │  Submit: drain 旧消息 → 入队最新                      │  │
+│  │  loop:   for j := range ch → play(j, player)          │  │
 │  └───────────────────────────────────────────────────────┘  │
 │             │                                               │
 │             ▼                                               │
-│  Transaction Worker (single)                               │
-│  - pending -> running                                      │
-│  - 调用 TTS 流式接口（失败直接删除，不重试）                  │
-│  - SSE audio chunk -> StreamPlayer.Write                    │
-│  - 播放完成后删除任务；失败直接删除任务                       │
-│                                                             │
+│  AVAudioEngine (cgo, 单实例复用)                            │
+│    - PCM 48kHz 单声道 int16 → float32                       │
+│    - 流式 scheduleBuffer + pending 计数 + cond 同步         │
+│    - 关闭时补齐残留字节                                     │
 └─────────────────────────────────────────────────────────────┘
 ```
 ## 核心数据结构
-### Task
+### job
 ```go
-type Task struct {
-    ID     uint64     // 任务 ID（递增）
-    Text   string     // 过滤后的待执行文本
-    Status TaskStatus // 当前状态
-    Voice  VoiceInfo  // 任务音色快照
-    Cfg    Config     // 任务配置快照（提交时）
+type job struct {
+    text   string    // cleanText 清洗后的文本
+    voice  VoiceInfo // 音色快照
+    source string    // 来源: "claude" / "codex" / "default"
+    cfg    Config    // 配置快照
 }
 ```
-### TaskStatus
-```go
-const (
-    TaskStatusPending TaskStatus = iota // 待执行
-    TaskStatusRunning                   // 合成播放事务执行中
-)
-```
-说明：
-- 终态不持久化。任务成功/失败后都会从仓库删除。
-- 不保留 `failed/canceled/completed` 常驻状态，历史通过日志追踪。
-### TaskEngine
+### Player
 ```go
-type TaskEngine struct {
-    mu sync.Mutex
-    nextID       uint64
-    tasks        map[uint64]*Task
-    latestID     uint64
-    pending      []uint64
-    wake chan struct{}
-    synthesizeStreamFn func(ctx context.Context, cfg Config, text string, voice *VoiceInfo, onAudio func([]byte) error) error
-    newStreamPlayerFn  func() (StreamPlayer, error)
+type Player struct {
+    ch chan job   // buffer=1，串行播报
 }
 ```
-### 播放器接口
+单 goroutine 消费 channel，一个 AVAudioEngine 实例复用。新消息到达时 drain 旧消息，不打断正在播放的。
+### StreamPlayer
 ```go
 type StreamPlayer interface {
     Write(audio []byte) error
     CloseAndWait() error
-    Abort() error
 }
 ```
-## 状态机与逻辑
-### 状态流转
-```
-pending -> running -> delete
-```
+## 消息流程
-### 任务提交（核心规则）
+### 1. Socket 接收
-`Submit(cleanedText, voice, cfg)` 原子执行：
-1. 删除所有 `pending` 任务
-2. 不打断当前 `running` 事务
-3. 创建新任务（`pending`）
-4. 唤醒 transaction worker
+`handleConnection()`:
+1. `bufio.Scanner` 读取完整文本（最大 1MB）
+2. `extractVoicePrefix` 解析 `{source:claude}` 前缀，匹配 SourceVoices
+3. 未匹配到 → fallback 到 DefaultVoice
+4. `cleanText()` 过滤文本噪音（markdown/code/URL/path/UUID 等）
+5. `player.Submit(文本, 音色, 来源, 配置)`
-策略说明：
-- 未开始的旧任务直接删除
-- 已领取但过期的旧任务在事务执行前跳过
-- 正在合成/播放的任务自然结束
+### 2. 调度与去重
-### Transaction worker 规则
+`Submit()`:
+- 非阻塞 drain channel 中旧消息：`select { case <-ch: default: }`
+- 新消息入队
-1. FIFO 领取 `pending` 任务并置 `running`
-2. 启动 `StreamPlayer`
-3. 调用 TTS 流式接口，SSE 每解析出一个音频 chunk 就写入播放器
-4. TTS 结束后关闭播放器 stdin 并等待播放结束
-5. 成功：删除任务
-6. 失败：删除任务，不重试
+策略：**新消息丢弃旧排队消息，不打断正在播放的**
-## 消息流程
+### 3. 流式合成与播放
-### 1. 接收并清洗消息
+`play()`:
+1. HTTP POST 火山引擎 `/api/v3/tts/unidirectional/sse`
+2. SSE 流式解析 → base64 解码 → PCM int16 数据
+3. 每块 PCM 立即写入 AVAudioEngine 播放
+4. **合成失败**：只记日志，播放器正常继续
+5. **播放器写入失败**：返回 error，loop 层重建 AVAudioEngine
-`handleConnection()`：
-- 读取 socket 文本
-- 解析 `{source:xxx}` 音色前缀
-- `cleanText()` 生成语音友好的文本
-- 将“过滤后文本”提交给 `TaskEngine.Submit`
+## SSE 解析
-`cleanText()` 只影响 TTS 播报，不改变屏幕显示内容。当前清洗规则：
+`parseSSEStream()`:
+- 逐行读取，累积 `data:` 行
+- 空行触发 flush → `processEvent()` 解析 JSON
+- 兼容非标准直出（无 `data:` 前缀的裸 JSON）
+- `extractAudioBase64` 递归提取：顶层 `data/audio/audio_data` → 嵌套 `data/result/payload`
+- 错误码检查：`code` 不为 0 且不为 20000000 时返回 error
+- 整条流无音频块 → 返回 `"no audio data"`
-- Markdown 格式符号：标题、加粗、反引号、引用符
-- Markdown 表格整块：表头、分隔线、表格内容
-- 代码块、artifact、HTML 页面源码
-- Markdown 链接 URL，仅保留链接标题
-- 绝对路径简化为“路径”
-- 长 commit hash、UUID、长 ID
-- 明显文件列表、模型分片列表、下载清单
-- 下载进度、速度、进度条、ANSI 控制符等终端噪声
+## 配置热加载
-清洗目标是保留适合听的内容：结论、成功/失败状态、下一步动作、关键错误原因。
+`loadConfig()`:
+- mtime 缓存：路径相同 + 修改时间未变 → 直接返回缓存
+- 校验失败 → 用上一次有效配置兜底
+- 文件不存在 → fallback 环境变量 `IAGENT_TTS_API_KEY` / `IAGENT_TTS_ENDPOINT`
-### 2. 流式合成播放阶段
+## 稳定性设计
-- transaction worker 领取任务
-- HTTP POST 火山引擎 TTS 接口
-- 解析 SSE 流并 base64 解码音频 chunk
-- 优先将 chunk 写入 `ffplay` stdin 实时播放
-- 没有 `ffplay` 时缓存完整音频，结束后写临时 MP3 并用 `afplay` 播放
-- 删除任务与临时文件
+- **panic recover**: loop goroutine 崩溃后 `go p.loop()` 自动重启
+- **播放器重建**: 写入失败时关闭旧实例并创建新的 AVAudioEngine
+- **新消息优先**: channel buffer=1 + drain，旧排队消息自动丢弃
+- **配置热加载**: 每次连接重新读取，mtime 缓存避免频繁 I/O
+- **HTTP 复用**: 全局 `ttsHTTPClient`，30s 超时，连接池复用
+- **日志轮转**: lumberjack，10MB/份，保留 3 份，压缩归档
+- **优雅退出**: SIGINT/SIGTERM 触发 listener.Close()
-## 并发与一致性
+## 清洗规则
-- 单引擎锁 `mu` 保护任务仓库与 FIFO 队列
-- 单 transaction worker，保证播报顺序稳定
-- `wake` 为缓冲 1 的唤醒信号，防止重复唤醒堆积
-- FIFO 保证未开始任务公平顺序
+`cleanText()` 过滤顺序（先跨行块再行内符号）：
-## 失败与成本策略
+1. 跳过代码块 (` ```...``` `)
+2. 跳过 artifact (`<artifact>...</artifact>`)
+3. 跳过 Markdown 表格（分隔线 + 表头 + 内容行）
+4. 跳过 HTML 源码行、进度噪声行
+5. 行内清洗：ANSI 转义 → 链接 URL → 绝对路径 → UUID → commit hash → markdown 符号 → HTML 标签
-- 新任务到达时只清理 `pending`，不打断当前任务
-- 流式合成/播放失败：直接删除任务，不重试，避免重复播报
-- 只保留最新消息优先播报，降低 TTS 成本
+保留适合听的内容：结论、状态、下一步动作、关键错误原因。
 ## 文件布局
 ```
 ~/.config/iSpeak/
-├── config.json      # API Key、音色配置
+├── config.json      # API Key、音色映射
 ├── ispeak.sock      # Unix Socket
 ├── ispeak.log       # 日志（lumberjack 轮转）
 └── hook-speak.sh    # Claude/Codex Hook
@@ -183,10 +149,18 @@ pending -> running -> delete
 └── com.ispeak.plist # launchd 服务配置
 ```
-## 稳定性设计
+## 来源 & 音色映射
+Hook 传入 `{source:claude}` 前缀，ispeakd 解析后匹配 `config.json` 中的 `sourceVoices`：
+```json
+{
+  "defaultVoice": { "voice_type": "zh_female_mizai_uranus_bigtts", "resourceId": "seed-tts-2.0" },
+  "sourceVoices": {
+    "claude": { "voice_type": "zh_female_tianmeitaozi_uranus_bigtts", "resourceId": "seed-tts-2.0" },
+    "codex": { "voice_type": "zh_female_shuangkuaisisi_uranus_bigtts", "resourceId": "seed-tts-2.0" }
+  }
+}
+```
-- 关键 worker 使用 `panic recover`
-- 配置热更新（每次连接重新加载配置）
-- 播放器子进程命令协议，保证“播完再删任务”
-- 日志轮转（10MB/份，保留 3 份）
-- 进程级 temp 目录，退出时自动清理
+日志区分来源：`TTS [claude]: 文本` / `TTS [codex]: 文本` / `TTS [default]: 文本`

package/docs/hook-text-extraction.md CHANGED Viewed

@@ -1,274 +1,126 @@
 # Hook 文本提取链路
-本文记录 Claude Code / Codex CLI 在 Hook 中拿到“最后一条 assistant 回复”的实际方式。`hook-speak.sh` 的目标只做两件事：取最后一条 assistant 回复，发给 iSpeak socket。
+`hook-speak.sh` 只做一件事：从 Hook JSON 里取 assistant 回复文本，发给 iSpeak socket。当前 51 行。
-## 结论
+## 提取逻辑
-推荐优先级：
-1. **Codex `notify`**：从脚本第二个参数 `$2` 读取 JSON，取 `last-assistant-message`（kebab-case）。
-2. **Codex Stop Hook**：从 stdin 读取 JSON，取 `last_assistant_message`（snake_case）。
-3. **Claude Code Stop Hook**：从 stdin 读取 JSON，只读 `transcript_path`（官方无 direct 字段）。
-不扫描 `~/.codex/sessions`。没有 direct 字段也没有 `transcript_path` 时，本次不播报。
-## Codex CLI：notify
-当前本机版本：
+```js
+// Codex 遗留 notify（agent-turn-complete）与现代 Stop Hook 重复触发，跳过
+if (payload.type === "agent-turn-complete") return;
-```text
-codex-cli 0.130.0
+const text = payload.last_assistant_message     // Claude Stop / Codex Stop (snake_case)
+  || payload["last-assistant-message"]            // Codex notify (kebab-case)
+  || "";
 ```
-Codex CLI 的 `notify = [...]` 是 legacy notify 机制。官方源码里会把通知 JSON 追加成命令的最后一个 argv 参数，不写 stdin。
+不再需要 transcript 轮询、去重、状态文件、`payload.message` fallback。
-配置示例：
+## 输入来源
-```toml
-notify = ["/Users/你的用户名/.config/iSpeak/hook-speak.sh", "codex"]
-```
+| 来源 | 传参方式 | 字段名 | 处理 |
+|------|---------|--------|------|
+| Claude Code Stop Hook | stdin | `last_assistant_message` | 提取并播报 |
+| Codex Stop Hook | stdin | `last_assistant_message` | 提取并播报 |
+| Codex 遗留 notify | `$2` (argv) | `last-assistant-message` | 跳过（`agent-turn-complete`） |
-脚本实际收到：
+脚本统一处理 stdin 和 argv：
 ```bash
-$1 = "codex"
-$2 = '{"type":"agent-turn-complete",...,"last-assistant-message":"..."}'
-stdin = empty
+input="${2:-}"          # 遗留 notify 走 $2
+if [[ -z "$input" ]]; then
+  input=$(cat)          # Stop Hook 走 stdin
+fi
 ```
-核心字段：
+## Codex Stop Hook（现代）
+stdin JSON：
 ```json
 {
-  "type": "agent-turn-complete",
-  "thread-id": "...",
-  "turn-id": "...",
-  "cwd": "...",
-  "input-messages": ["..."],
-  "last-assistant-message": "最后一条 assistant 回复"
+  "turn_id": "...",
+  "transcript_path": "...",
+  "last_assistant_message": "最后一条 assistant 回复"
 }
 ```
-所以 Codex `notify` 的正确读取方式是：
+源码：`codex-rs/hooks/src/events/stop.rs` — `StopCommandInput` struct 包含 `last_assistant_message`。
-```bash
-input="${2:-}"
-```
+## Codex 遗留 notify（跳过）
-然后解析：
+Codex 有两套通知机制同时触发：
-```js
-payload["last-assistant-message"]
-```
+| 机制 | 事件 | 触发时机 |
+|------|------|---------|
+| 现代 Stop Hook | `stop` | agent 回合结束 |
+| 遗留 notify | `agent-turn-complete` | agent 回合结束 |
-源码依据：`codex-rs/hooks/src/legacy_notify.rs`（https://github.com/openai/codex，2026-05-11）。该文件把 `last_assistant_message` 序列化为 kebab-case 的 `last-assistant-message`，并在执行命令前 `command.arg(notify_payload)`。
+两套系统都包含 `last_assistant_message`，导致重复播报。现代 Stop Hook 已覆盖需求，遗留 notify 通过 `payload.type === "agent-turn-complete"` 跳过。
-## Codex CLI：Stop Hook
+源码：`codex-rs/hooks/src/legacy_notify.rs` — 向后兼容，JSON 通过 `command.arg()` 传入，字段序列化为 kebab-case。
-Codex 也支持 Claude 风格 Hook。Stop Hook 的输入 JSON 写入 stdin。
+## 触发时间点
-配置示例：
+Hook 在 AI **回复完成**时触发，每个回合一次。Claude Code 和 Codex 均使用 `Stop` 事件：
-```json
-{
-  "hooks": {
-    "Stop": [
-      {
-        "hooks": [
-          {
-            "type": "command",
-            "command": "bash $HOME/.config/iSpeak/hook-speak.sh codex",
-            "timeout": 30
-          }
-        ]
-      }
-    ]
-  }
-}
 ```
-脚本实际收到：
-```bash
-$1 = "codex"
-$2 = empty
-stdin = '{"hook_event_name":"Stop",...,"last_assistant_message":"..."}'
+用户发送消息 → AI 生成回复 → 回复结束 → Hook 触发 → 提取文本 → 发送 socket → TTS 播报
 ```
-核心字段（源码 `StopCommandInput` struct）：
-```rust
-struct StopCommandInput {
-    session_id: String,
-    turn_id: String,
-    transcript_path: NullableString,
-    cwd: String,
-    hook_event_name: String,
-    model: String,
-    permission_mode: String,
-    stop_hook_active: bool,
-    last_assistant_message: NullableString,  // ← Codex 有此字段
-}
-```
+从 Hook 触发到 TTS 首字延迟通常 < 500ms（取决于文本长度和网络）。
-对应 JSON：
+## 来源 & 音色
-```json
-{
-  "session_id": "...",
-  "turn_id": "...",
-  "transcript_path": "...",
-  "cwd": "...",
-  "hook_event_name": "Stop",
-  "model": "...",
-  "permission_mode": "bypassPermissions",
-  "stop_hook_active": false,
-  "last_assistant_message": "最后一条 assistant 回复"
-}
-```
+Hook 调用时传入来源名称（`$1`），对应 `config.json` 中的音色映射：
-源码依据：
+```bash
+# ~/.claude/settings.json — Claude Code
+"command": "bash ~/.config/iSpeak/hook-speak.sh claude"
-- `codex-rs/hooks/src/events/stop.rs`（https://github.com/openai/codex，2026-05-11）：构造 `StopCommandInput`，包含 `last_assistant_message` 和 `transcript_path`。
-- `codex-rs/hooks/src/engine/command_runner.rs`（同上）：Hook 命令通过 stdin 接收 `input_json`。
+# ~/.codex/hooks.json — Codex
+"command": "bash /Users/admin/.config/iSpeak/hook-speak.sh codex"
+```
-## Codex Transcript
+文本加上 `{source:claude}` 或 `{source:codex}` 前缀发往 socket，`ispeakd` 解析后选择对应音色。无前缀则用 `defaultVoice`。
-Codex 的 transcript/session 文件是 JSONL。实际 assistant 回复形态：
+音色映射示例（`~/.config/iSpeak/config.json`）：
 ```json
 {
-  "type": "response_item",
-  "payload": {
-    "type": "message",
-    "role": "assistant",
-    "content": [
-      {
-        "type": "output_text",
-        "text": "最后一条 assistant 回复"
-      }
-    ]
+  "defaultVoice": { "voice_type": "zh_female_mizai_uranus_bigtts" },
+  "sourceVoices": {
+    "claude": { "voice_type": "zh_female_tianmeitaozi_uranus_bigtts" },
+    "codex": { "voice_type": "zh_female_shuangkuaisisi_uranus_bigtts" }
   }
 }
 ```
-提取规则：
+日志中也会区分来源：
-```js
-event.type === "response_item" &&
-event.payload?.type === "message" &&
-event.payload?.role === "assistant"
 ```
-然后拼接：
-```js
-event.payload.content[].text
+TTS [claude]: 飞哥好。           → tianmeitaozi 音色
+TTS [codex]: 飞哥，你好。        → shuangkuaisisi 音色
+TTS [default]: 直接文本          → mizai 音色
 ```
-## Claude Code：Stop Hook
-> **来源**：[Claude Code Hooks Reference](https://code.claude.com/docs/en/hooks.md)，更新时间：2026-05-11
+## Claude Code Stop Hook
-Claude Code 官方 Stop Hook **没有 `last_assistant_message` 字段**。
-根据官方文档，Stop Hook 的 Common Input Fields 为：
+stdin JSON（实测，2026-05）：
 ```json
 {
-  "session_id": "abc123",
-  "transcript_path": "/home/user/.claude/projects/.../transcript.jsonl",
-  "cwd": "/home/user/my-project",
-  "permission_mode": "default",
+  "session_id": "...",
+  "transcript_path": "/Users/admin/.claude/projects/.../xxx.jsonl",
+  "cwd": "...",
   "hook_event_name": "Stop",
-  "effort": {
-    "level": "medium"
-  }
-}
-```
-子 agent 上下文中额外字段：
-```json
-{
-  "agent_id": "subagent_xyz",
-  "agent_type": "Explore"
+  "last_assistant_message": "最后一条 assistant 回复"
 }
 ```
-**结论**：Claude Code Stop Hook 官方设计只提供 `transcript_path`，没有直接内嵌 `last_assistant_message`。旧版本脚本的 `last_assistant_message` / `message` fallback 实际上**从未被官方文档支持**。
-Claude transcript 常见 assistant 形态：
-```json
-{"role":"assistant","content":[{"type":"text","text":"..."}]}
-```
-或：
-```json
-{"message":{"role":"assistant","content":[{"type":"text","text":"..."}]}}
-```
-## 当前脚本策略
-`configs/hook-speak.sh` 当前入口：
-```bash
-input="${2:-}"
-if [[ -z "$input" ]]; then
-  input=$(cat)
-fi
-```
-含义：
-- Codex `notify`：读 `$2`
-- Claude / Codex Stop Hook：读 stdin
-- 如果 Codex 的 `notify` 和 `Stop` 同时启用，脚本会按 `turn_id` 去重，避免同一回合播两次
-Codex 文本字段优先级（源码确认）：
-```js
-payload["last-assistant-message"]  // notify: kebab-case
-payload.last_assistant_message      // Stop Hook: snake_case
-payload.lastAssistantMessage
-payload.message
-payload.lastMessage
-payload.transcript_path
-payload.transcriptPath
-payload["transcript-path"]
-```
-Claude Code 文本字段优先级（官方文档）：
-```js
-payload.transcript_path  // 官方支持的唯一方式
-```
-> **注**：Claude Code Stop Hook 官方 payload 中**没有 `last_assistant_message` 字段**，这是与 Codex 的本质区别。
-## 为什么不能只读 stdin
-因为 Codex `notify` 不走 stdin。只读 stdin 会导致：
-```text
-TEXT_LEN: 0
-SPOKE: SKIP
-```
-正确做法是先读 `$2`，再读 stdin；不扫历史 session。
-## Claude Code TEXT_LEN: 0 的根因
-当 Claude Code Stop Hook 触发但 `TEXT_LEN: 0` 时：
-1. **官方字段不存在**：Claude Code Stop Hook 官方 payload 中**没有 `last_assistant_message` 字段**，只有 `transcript_path`
-2. **transcript 文件可能晚一点才写完**：Hook 触发时文件虽已存在，但最后一条 assistant 文本还没落盘
-3. **结果**：如果只读一次，`hook-speak.sh` 可能拿到空串，本次不播报
-当前脚本对 Claude transcript 做了很短的轮询，等最后一条 assistant 文本真正出现再播，避免这个时序窗。
+Claude Code 官方文档只列出 `transcript_path`，但实际 payload **包含 `last_assistant_message`**（实测确认）。直接用 direct 字段，无需读 transcript。
-这是 **Claude Code 与 Codex 的设计差异**，非 bug。Codex CLI（无论 notify 还是 Stop Hook）都提供 `last_assistant_message`，而 Claude Code 官方只提供 `transcript_path`。
+## 历史演进
-解决方案：从 `transcript_path` 读取并解析为最终一条 assistant 回复，并在 Claude 路径上补一个短轮询。
+- v1（250 行）：transcript 轮询 + turn_id 去重 + state file + text hash。复杂度高，`session_id` 做去重 key 导致同一 session 只播第一条。
+- v2（53 行）：省略去重和 transcript 轮询，但 `payload.message` 回退太宽泛，且 Codex 重复触发未处理。
+- v3（51 行）：统一提取，移除 `payload.message`，过滤 `agent-turn-complete` 解决 Codex 双重通知导致的重复播报。

package/main.go CHANGED Viewed

@@ -40,7 +40,6 @@ var errAlreadyRunning = errors.New("iSpeak already running")
 type StreamPlayer interface {
 	Write(audio []byte) error
 	CloseAndWait() error
-	Abort() error
 }
 // 最简单的播放器：channel 队列，串行播报
@@ -49,45 +48,75 @@ type Player struct {
 }
 type job struct {
-	text  string
-	voice VoiceInfo
-	cfg   Config
+	text   string
+	voice  VoiceInfo
+	source string
+	cfg    Config
 }
 func NewPlayer() *Player {
-	p := &Player{ch: make(chan job, 256)}
+	p := &Player{ch: make(chan job, 1)}
 	go p.loop()
 	return p
 }
-func (p *Player) Submit(text string, voice VoiceInfo, cfg Config) {
-	log.Printf("TTS: %s", text)
-	p.ch <- job{text, voice, cfg}
+func (p *Player) Submit(text string, voice VoiceInfo, source string, cfg Config) {
+	log.Printf("TTS [%s]: %s", source, text)
+	// 丢弃队列中的旧消息，只保留最新
+	select {
+	case <-p.ch:
+	default:
+	}
+	p.ch <- job{text, voice, source, cfg}
 }
 func (p *Player) loop() {
+	defer func() {
+		if r := recover(); r != nil {
+			log.Printf("Player loop 崩溃: %v，重启中", r)
+			go p.loop()
+		}
+	}()
+	player, err := newDefaultStreamPlayer()
+	if err != nil {
+		log.Printf("启动播放器失败: %v", err)
+		return
+	}
+	defer player.CloseAndWait()
 	for j := range p.ch {
-		player, err := newDefaultStreamPlayer()
-		if err != nil {
-			log.Printf("启动播放器失败: %v", err)
-			continue
+		if err := p.play(j, player); err != nil {
+			log.Printf("播放器异常，重建: %v", err)
+			player.CloseAndWait()
+			player, err = newDefaultStreamPlayer()
+			if err != nil {
+				log.Printf("重建播放器失败: %v", err)
+				return
+			}
 		}
-		p.play(j, player)
-		_ = player.CloseAndWait()
 	}
 }
-func (p *Player) play(j job, player StreamPlayer) {
+func (p *Player) play(j job, player StreamPlayer) error {
 	startedAt := time.Now()
+	var writeErr error
 	onAudio := func(audio []byte) error {
-		return player.Write(audio)
+		if err := player.Write(audio); err != nil {
+			writeErr = err
+			return err
+		}
+		return nil
 	}
 	if err := synthesizeStream(context.Background(), j.cfg, j.text, &j.voice, onAudio); err != nil {
+		if writeErr != nil {
+			return writeErr
+		}
 		log.Printf("TTS 合成失败: %v", err)
-		return
 	}
 	log.Printf("TTS: 完成 elapsed=%s", time.Since(startedAt).Round(time.Millisecond))
+	return nil
 }
 // 音色信息
@@ -231,7 +260,7 @@ func synthesizeStream(ctx context.Context, cfg Config, text string, voice *Voice
 		return fmt.Errorf("http request: %w", err)
 	}
 	if resp.StatusCode != 200 {
-		io.Copy(io.Discard, resp.Body) // 消费 body 以释放连接
+		io.Copy(io.Discard, resp.Body)
 		resp.Body.Close()
 		return fmt.Errorf("http status %d", resp.StatusCode)
 	}
@@ -240,28 +269,6 @@ func synthesizeStream(ctx context.Context, cfg Config, text string, voice *Voice
 	return parseSSEStream(resp.Body, onAudio)
 }
-// 解析 SSE 流，提取 base64 音频数据
-func parseSSE(r io.Reader) ([]byte, error) {
-	var chunks [][]byte
-	if err := parseSSEStream(r, func(audio []byte) error {
-		chunk := append([]byte(nil), audio...)
-		chunks = append(chunks, chunk)
-		return nil
-	}); err != nil {
-		return nil, err
-	}
-	total := 0
-	for _, c := range chunks {
-		total += len(c)
-	}
-	result := make([]byte, 0, total)
-	for _, c := range chunks {
-		result = append(result, c...)
-	}
-	return result, nil
-}
 func parseSSEStream(r io.Reader, onAudio func([]byte) error) error {
 	audioChunks := 0
 	reader := bufio.NewReaderSize(r, 64*1024)
@@ -552,7 +559,7 @@ func handleConnection(conn net.Conn, player *Player) {
 		return
 	}
-	voice, content := extractVoicePrefix(text, cfg)
+	source, voice, content := extractVoicePrefix(text, cfg)
 	if voice == nil {
 		voice = cfg.DefaultVoice
 	}
@@ -566,22 +573,24 @@ func handleConnection(conn net.Conn, player *Player) {
 		return
 	}
-	player.Submit(cleaned, *voice, cfg)
+	player.Submit(cleaned, *voice, source, cfg)
 }
-// 解析消息中的音色前缀，返回 VoiceInfo
-func extractVoicePrefix(text string, cfg Config) (voice *VoiceInfo, content string) {
+// 解析消息中的音色前缀，返回 (来源, 音色, 内容)
+func extractVoicePrefix(text string, cfg Config) (source string, voice *VoiceInfo, content string) {
 	// 格式: {source:claude}文本
 	const prefix = "{source:"
 	if strings.HasPrefix(text, prefix) {
 		if end := strings.Index(text, "}"); end > len(prefix) {
-			if v, ok := cfg.SourceVoices[text[len(prefix):end]]; ok {
+			source = text[len(prefix):end]
+			if v, ok := cfg.SourceVoices[source]; ok {
 				voice = v
 			}
 			content = text[end+1:]
 			return
 		}
 	}
+	source = "default"
 	content = text
 	return
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@xdfnet/ispeak",
-  "version": "1.6.15",
+  "version": "1.7.0",
   "description": "Local macOS TTS daemon for AI coding assistants, powered by Volcengine streaming TTS.",
   "license": "MIT",
   "homepage": "https://github.com/xdfnet/iSpeak#readme",

package/scripts/ispeak CHANGED Viewed

@@ -2,7 +2,7 @@
 # ispeak — iSpeak 控制命令
 set -euo pipefail
-VERSION="1.6.9"
+VERSION="1.7.0"
 SOCK="$HOME/.config/iSpeak/ispeak.sock"
 PLIST="$HOME/Library/LaunchAgents/com.ispeak.plist"
 LEGACY_PLIST="$HOME/Library/LaunchAgents/com.iSpeak.plist"