npm - @xdfnet/ispeak - Versions diffs - 1.6.4 → 1.6.6 - Mend

@xdfnet/ispeak 1.6.4 → 1.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/Docs/ARCHITECTURE.md +1 -1
package/Docs/HOOK_TEXT_EXTRACTION.md +240 -0
package/README.md +11 -6
package/clean_text.go +153 -0
package/configs/config.example.json +1 -1
package/configs/hook-speak.sh +119 -105
package/main.go +65 -205
package/npm/postinstall.js +41 -2
package/package.json +4 -2
package/scripts/ispeak +1 -1

package/Docs/ARCHITECTURE.md CHANGED Viewed

@@ -177,7 +177,7 @@ pending -> running -> delete
 ├── config.json      # API Key、音色配置
 ├── ispeak.sock      # Unix Socket
 ├── ispeak.log       # 日志（lumberjack 轮转）
-└── hook-speak.sh    # Claude/Codex Stop Hook
+└── hook-speak.sh    # Claude/Codex Hook
 ~/Library/LaunchAgents/
 └── com.iSpeak.plist # launchd 服务配置

package/Docs/HOOK_TEXT_EXTRACTION.md ADDED Viewed

@@ -0,0 +1,240 @@
+# Hook 文本提取链路
+本文记录 Claude Code / Codex CLI 在 Hook 中拿到“最后一条 assistant 回复”的实际方式。`hook-speak.sh` 的目标只做两件事：取最后一条 assistant 回复，发给 iSpeak socket。
+## 结论
+推荐优先级：
+1. **Codex `notify`**：从脚本第二个参数 `$2` 读取 JSON，取 `last-assistant-message`。
+2. **Claude / Codex Stop Hook**：从 stdin 读取 JSON，优先取 `last_assistant_message`。
+3. **明确 transcript**：如果没有直接字段，只读取 payload 里明确传入的 `transcript_path`。
+不扫描 `~/.codex/sessions`。没有 direct 字段也没有 `transcript_path` 时，本次不播报。
+## Codex CLI：notify
+当前本机版本：
+```text
+codex-cli 0.130.0
+```
+Codex CLI 的 `notify = [...]` 是 legacy notify 机制。官方源码里会把通知 JSON 追加成命令的最后一个 argv 参数，不写 stdin。
+配置示例：
+```toml
+notify = ["/Users/你的用户名/.config/iSpeak/hook-speak.sh", "codex"]
+```
+脚本实际收到：
+```bash
+$1 = "codex"
+$2 = '{"type":"agent-turn-complete",...,"last-assistant-message":"..."}'
+stdin = empty
+```
+核心字段：
+```json
+{
+  "type": "agent-turn-complete",
+  "thread-id": "...",
+  "turn-id": "...",
+  "cwd": "...",
+  "input-messages": ["..."],
+  "last-assistant-message": "最后一条 assistant 回复"
+}
+```
+所以 Codex `notify` 的正确读取方式是：
+```bash
+input="${2:-}"
+```
+然后解析：
+```js
+payload["last-assistant-message"]
+```
+源码依据：`codex-rs/hooks/src/legacy_notify.rs`。该文件把 `last_assistant_message` 序列化为 kebab-case 的 `last-assistant-message`，并在执行命令前 `command.arg(notify_payload)`。
+## Codex CLI：Stop Hook
+Codex 也支持 Claude 风格 Hook。Stop Hook 的输入 JSON 写入 stdin。
+配置示例：
+```json
+{
+  "hooks": {
+    "Stop": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "bash $HOME/.config/iSpeak/hook-speak.sh codex",
+            "timeout": 30
+          }
+        ]
+      }
+    ]
+  }
+}
+```
+脚本实际收到：
+```bash
+$1 = "codex"
+$2 = empty
+stdin = '{"hook_event_name":"Stop",...,"last_assistant_message":"..."}'
+```
+核心字段：
+```json
+{
+  "session_id": "...",
+  "turn_id": "...",
+  "transcript_path": "...",
+  "cwd": "...",
+  "hook_event_name": "Stop",
+  "model": "...",
+  "permission_mode": "bypassPermissions",
+  "stop_hook_active": false,
+  "last_assistant_message": "最后一条 assistant 回复"
+}
+```
+源码依据：
+- `codex-rs/hooks/src/events/stop.rs`：构造 `StopCommandInput`，包含 `last_assistant_message` 和 `transcript_path`。
+- `codex-rs/hooks/schema/generated/stop.command.input.schema.json`：Stop stdin schema。
+- `codex-rs/hooks/src/engine/command_runner.rs`：Hook 命令通过 stdin 接收 `input_json`。
+## Codex Transcript
+Codex 的 transcript/session 文件是 JSONL。实际 assistant 回复形态：
+```json
+{
+  "type": "response_item",
+  "payload": {
+    "type": "message",
+    "role": "assistant",
+    "content": [
+      {
+        "type": "output_text",
+        "text": "最后一条 assistant 回复"
+      }
+    ]
+  }
+}
+```
+提取规则：
+```js
+event.type === "response_item" &&
+event.payload?.type === "message" &&
+event.payload?.role === "assistant"
+```
+然后拼接：
+```js
+event.payload.content[].text
+```
+## Claude Code：Stop Hook
+Claude Code 官方 Stop Hook 通过 stdin 传 JSON，核心字段是：
+```json
+{
+  "session_id": "...",
+  "transcript_path": "...",
+  "hook_event_name": "Stop",
+  "stop_hook_active": false
+}
+```
+有些版本或场景可能直接提供：
+```json
+{
+  "last_assistant_message": "最后一条 assistant 回复"
+}
+```
+所以 Claude 的读取顺序是：
+1. `last_assistant_message`
+2. `message`
+3. `transcript_path`
+Claude transcript 常见 assistant 形态：
+```json
+{"role":"assistant","content":[{"type":"text","text":"..."}]}
+```
+或：
+```json
+{"message":{"role":"assistant","content":[{"type":"text","text":"..."}]}}
+```
+## 当前脚本策略
+`configs/hook-speak.sh` 当前入口：
+```bash
+input="${2:-}"
+if [[ -z "$input" ]]; then
+  input=$(cat)
+fi
+```
+含义：
+- Codex `notify`：读 `$2`
+- Claude / Codex Stop Hook：读 stdin
+Codex 文本字段优先级：
+```js
+payload["last-assistant-message"]
+payload.last_assistant_message
+payload.lastAssistantMessage
+payload.message
+payload.lastMessage
+payload.transcript_path
+payload.transcriptPath
+payload["transcript-path"]
+```
+Claude 文本字段优先级：
+```js
+payload.last_assistant_message
+payload.message
+payload.transcript_path
+payload.transcriptPath
+```
+## 为什么不能只读 stdin
+因为 Codex `notify` 不走 stdin。只读 stdin 会导致：
+```text
+TEXT_LEN: 0
+SPOKE: SKIP
+```
+正确做法是先读 `$2`，再读 stdin；不扫历史 session。

package/README.md CHANGED Viewed

@@ -1,13 +1,13 @@
 # iSpeak
-![Version](https://img.shields.io/badge/version-1.6.3-blue)
+![Version](https://img.shields.io/badge/version-1.6.6-blue)
 [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![Go Version](https://img.shields.io/badge/Go-1.26-blue)](https://golang.org/dl/)
 ![Platform](https://img.shields.io/badge/platform-macOS-green)
 iSpeak 让 AI 编程助手开口说话。你写代码，它播结果——眼睛休息，耳朵来听。
-适合 Claude Code 或 Codex 常驻后台的开发者。AI 完成任务后自动播报；你发新消息时，旧播报立即中断，不花冤枉钱。
+适合 Claude Code 或 Codex 常驻后台的开发者。AI 完成任务后自动播报；你发新消息时，未开始的旧播报会被丢弃，不花冤枉钱。
 ## 效果示例
@@ -104,8 +104,7 @@ pending → running → delete
 - Markdown 链接：只保留链接标题，不播 URL
 - 绝对路径：简化为“路径”
 - 长 commit hash、UUID、长 ID：不播
-- 明显文件列表：如模型分片、代码文件列表、下载文件清单
-- 下载进度和终端噪声：百分比、速度、进度条、ANSI 控制符
+- 下载进度噪声：速度、ETA、预计剩余时间、ANSI 控制符
 保留优先级：结论、成功/失败状态、需要用户操作的下一步、关键错误原因。
@@ -131,7 +130,7 @@ ispeak-codex "消息"    # Codex 专属音色
 ```json
 {
   "apiKey": "你的火山引擎 API Key",
-  "endpoint": "https://openspeech.bytedance.com/api/v3/tts/unidirectional",
+  "endpoint": "https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse",
   "defaultVoice": {
     "voice_type": "zh_female_mizai_uranus_bigtts",
     "resourceId": "seed-tts-2.0"
@@ -173,7 +172,13 @@ ispeak-codex "消息"    # Codex 专属音色
 ### Codex
-在 `~/.codex/hooks.json` 中添加 Stop Hook：
+推荐在 `~/.codex/config.toml` 中添加回合结束通知：
+```toml
+notify = ["bash", "/Users/你的用户名/.config/iSpeak/hook-speak.sh", "codex"]
+```
+如果你启用了 Codex hooks，也可以在 `~/.codex/hooks.json` 中添加 Stop Hook：
 ```json
 {

package/clean_text.go ADDED Viewed

@@ -0,0 +1,153 @@
+package main
+import (
+	"regexp"
+	"strings"
+)
+var (
+	markdownLinkRe     = regexp.MustCompile(`\[[^\]]+\]\(([^)]*)\)`)
+	absolutePathRe     = regexp.MustCompile(`/(?:Users|private|tmp|var|opt|usr|bin|sbin|etc|Library|Applications)/\S+`)
+	commitHashRe       = regexp.MustCompile(`\b[0-9a-f]{7,40}\b`)
+	uuidRe             = regexp.MustCompile(`\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b`)
+	urlRe              = regexp.MustCompile(`https?://\S+`)
+	ansiEscapeRe       = regexp.MustCompile(`\x1b\[[0-9;]*[A-Za-z]`)
+	multiSpaceRe       = regexp.MustCompile(`\s+`)
+	markdownListRe     = regexp.MustCompile(`^\s*(?:[-*+]\s+|\d+[.)]\s+)`)
+	htmlTagRe          = regexp.MustCompile(`<[^>]+>`)
+	codeFenceStartRe   = regexp.MustCompile("^```")
+	artifactStartRe    = regexp.MustCompile(`(?i)^<artifact\b`)
+	htmlDocumentLineRe = regexp.MustCompile(`(?i)^<!doctype html|^<html\b|^<head\b|^<body\b|^<style\b|^</`)
+	speedNoiseRe       = regexp.MustCompile(`(?i)\d+(?:\.\d+)?\s*(?:kb|mb|gb)/s`)
+	etaNoiseRe         = regexp.MustCompile(`(?i)\bETA\b|预计剩余|剩余时间`)
+)
+// 过滤格式符号，保留自然朗读文本。
+// 顺序很重要：先跳过跨行块结构，再跳过整行噪声，最后清理行内符号。
+func cleanText(text string) string {
+	var lines []string
+	rawLines := strings.Split(text, "\n")
+	inCodeBlock := false
+	inArtifact := false
+	inMarkdownTable := false
+	for i := 0; i < len(rawLines); i++ {
+		line := rawLines[i]
+		line = strings.TrimSpace(line)
+		if line == "" {
+			inMarkdownTable = false
+			continue
+		}
+		if codeFenceStartRe.MatchString(line) {
+			inCodeBlock = !inCodeBlock
+			continue
+		}
+		if inCodeBlock {
+			continue
+		}
+		if artifactStartRe.MatchString(line) {
+			inArtifact = !strings.Contains(strings.ToLower(line), "</artifact>")
+			continue
+		}
+		if inArtifact {
+			if strings.Contains(strings.ToLower(line), "</artifact>") {
+				inArtifact = false
+			}
+			continue
+		}
+		if isMarkdownTableSeparator(line) {
+			if len(lines) > 0 && isMarkdownTableRow(strings.TrimSpace(rawLines[i-1])) {
+				lines = lines[:len(lines)-1]
+			}
+			inMarkdownTable = true
+			continue
+		}
+		if inMarkdownTable {
+			if isMarkdownTableRow(line) {
+				continue
+			}
+			inMarkdownTable = false
+		}
+		if shouldSkipSpeechLine(line) {
+			continue
+		}
+		cleaned := cleanSpeechLine(line)
+		if cleaned != "" {
+			lines = append(lines, cleaned)
+		}
+	}
+	return strings.Join(lines, "，")
+}
+func shouldSkipSpeechLine(line string) bool {
+	if isMarkdownTableSeparator(line) {
+		return true
+	}
+	if strings.HasPrefix(line, "---") && strings.Count(line, "-") > 3 {
+		return true
+	}
+	if htmlDocumentLineRe.MatchString(line) {
+		return true
+	}
+	if isProgressNoiseLine(line) {
+		return true
+	}
+	if isMostlyTableRow(line) {
+		return true
+	}
+	return false
+}
+func isMarkdownTableSeparator(line string) bool {
+	line = strings.TrimSpace(line)
+	return strings.Contains(line, "|") && strings.Trim(line, "|-: ") == ""
+}
+func isMarkdownTableRow(line string) bool {
+	line = strings.TrimSpace(line)
+	return strings.Count(line, "|") >= 2
+}
+func cleanSpeechLine(line string) string {
+	// Markdown 链接必须在 URL 删除前处理，否则会丢掉链接标题。
+	line = ansiEscapeRe.ReplaceAllString(line, "")
+	line = markdownListRe.ReplaceAllString(line, "")
+	line = markdownLinkRe.ReplaceAllStringFunc(line, func(match string) string {
+		if end := strings.Index(match, "]"); end > 1 {
+			return match[1:end]
+		}
+		return ""
+	})
+	line = urlRe.ReplaceAllString(line, "")
+	line = absolutePathRe.ReplaceAllString(line, " 路径 ")
+	// UUID 必须在短 hash 前处理，避免先删短片段后破坏 UUID 识别。
+	line = uuidRe.ReplaceAllString(line, "")
+	line = commitHashRe.ReplaceAllString(line, "")
+	line = htmlTagRe.ReplaceAllString(line, "")
+	line = strings.NewReplacer(
+		"**", "",
+		"*", "",
+		"`", "",
+		"#", "",
+		">", "",
+		"✅", "",
+		"❌", "",
+		"✓", "",
+		"✗", "",
+		"→", "到",
+	).Replace(line)
+	line = strings.Trim(line, " \t-:|")
+	line = multiSpaceRe.ReplaceAllString(line, " ")
+	return strings.TrimSpace(line)
+}
+func isMostlyTableRow(line string) bool {
+	if !strings.Contains(line, "|") {
+		return false
+	}
+	return strings.Count(line, "|") >= 2 && len([]rune(line)) > 40
+}
+func isProgressNoiseLine(line string) bool {
+	return speedNoiseRe.MatchString(line) || etaNoiseRe.MatchString(line)
+}

package/configs/config.example.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "apiKey": "your-api-key",
-  "endpoint": "https://openspeech.bytedance.com/api/v3/tts/unidirectional",
+  "endpoint": "https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse",
   "defaultVoice": {
     "voice_type": "zh_female_mizai_uranus_bigtts",
     "resourceId": "seed-tts-2.0"

package/configs/hook-speak.sh CHANGED Viewed

@@ -1,130 +1,144 @@
 #!/bin/bash
-# Stop Hook: 从 transcript 文件中提取本次会话所有 Claude 回复文本
-# iAgent 调用 Claude 时设 ISPEAK_SKIP=1，此时跳过（iAgent 自己播）
+# Claude Code / Codex 共用播报 Hook：
+# 只取最后一条 assistant 回复，加 `{source:<name>}` 前缀后发给 ispeakd。
 [[ "$ISPEAK_SKIP" == "1" ]] && exit 0
-# 来源参数: claude 或 codex
 SOURCE="${1:-claude}"
 SOCK="$HOME/.config/iSpeak/ispeak.sock"
 LOG="$HOME/.config/iSpeak/hook.log"
-input=$(cat)
-json_value() {
-  local key="$1"
-  if command -v node >/dev/null 2>&1; then
-    printf "%s" "$input" | node -e '
-      const key = process.argv[1];
-      let input = "";
-      process.stdin.setEncoding("utf8");
-      process.stdin.on("data", chunk => input += chunk);
-      process.stdin.on("end", () => {
-        try {
-          const value = JSON.parse(input)[key];
-          if (typeof value === "string") process.stdout.write(value);
-        } catch (_) {}
-      });
-    ' "$key"
-    return
-  fi
-  printf "%s" "$input" | sed -n "s/.*\"$key\"[[:space:]]*:[[:space:]]*\"\([^\"]*\)\".*/\1/p"
+# Codex `notify` 会把 JSON 作为最后一个参数传入；
+# Claude/Claude 风格 Stop Hook 会把 JSON 写到 stdin。
+input="${2:-}"
+if [[ -z "$input" ]]; then
+  input=$(cat)
+fi
+input_file=$(mktemp)
+trap 'rm -f "$input_file"' EXIT
+printf "%s" "$input" > "$input_file"
+text=$(SOURCE="$SOURCE" HOOK_INPUT_FILE="$input_file" node <<'NODE' 2>/dev/null
+const fs = require("fs");
+{
+  const input = readFile(process.env.HOOK_INPUT_FILE || "");
+  const payload = parseJSON(input) || {};
+  const source = process.env.SOURCE || "";
+  const text = source.startsWith("codex")
+    ? lastCodexAssistant(payload)
+    : lastClaudeAssistant(payload);
+  if (text) process.stdout.write(text);
 }
-extract_recent_assistant_text() {
-  local transcript="$1"
-  local cutoff="$2"
-  if command -v node >/dev/null 2>&1; then
-    node -e '
-      const fs = require("fs");
-      const file = process.argv[1];
-      const cutoff = Number(process.argv[2]);
-      const out = [];
-      function collectText(content) {
-        if (typeof content === "string") {
-          out.push(content);
-          return;
-        }
-        if (!Array.isArray(content)) return;
-        for (const item of content) {
-          if (item && typeof item.text === "string") out.push(item.text);
-        }
-      }
+function lastClaudeAssistant(payload) {
+  const direct = firstString(payload.last_assistant_message, payload.message);
+  if (direct) return direct;
-      for (const line of fs.readFileSync(file, "utf8").split(/\r?\n/)) {
-        if (!line.trim()) continue;
-        try {
-          const event = JSON.parse(line);
-          if (typeof event.timestamp === "number" && event.timestamp < cutoff) continue;
-          if (event.role === "assistant") collectText(event.content);
-          if (event.message && event.message.role === "assistant") collectText(event.message.content);
-        } catch (_) {}
-      }
-      process.stdout.write([...new Set(out.filter(Boolean))].join(" "));
-    ' "$transcript" "$cutoff" 2>/dev/null
-    return
-  fi
-  awk -v cutoff="$cutoff" '
-    {
-      if (match($0, /"timestamp"[[:space:]]*:[[:space:]]*[0-9]+/)) {
-        ts = substr($0, RSTART, RLENGTH)
-        gsub(/[^0-9]/, "", ts)
-        ts = int(ts)
-        if (ts < cutoff) next
-      }
+  const transcript = firstString(payload.transcript_path, payload.transcriptPath);
+  return transcript ? lastAssistantFromTranscript(transcript, "claude") : "";
+}
-      if (match($0, /"role"[[:space:]]*:[[:space:]]*"assistant"/)) {
-        if (match($0, /"content"[[:space:]]*:[[:space:]]*\[/)) {
-          gsub(/[^{]*\[/, "", $0)
-          gsub(/\].*/, "", $0)
-          while (match($0, /"text"[[:space:]]*:[[:space:]]*"[^"]*"/)) {
-            t = substr($0, RSTART, RLENGTH)
-            gsub(/"text"[[:space:]]*:[[:space:]]*"/, "", t)
-            gsub(/"$/, "", t)
-            if (t != "") print t
-            $0 = substr($0, RSTART + RLENGTH)
-          }
-        } else if (match($0, /"content"[[:space:]]*:[[:space:]]*"[^"]*"/)) {
-          t = substr($0, RSTART, RLENGTH)
-          gsub(/"content"[[:space:]]*:[[:space:]]*"/, "", t)
-          gsub(/"$/, "", t)
-          if (t != "") print t
-        }
-      }
-    }
-  ' "$transcript" 2>/dev/null | sort -u | tr '\n' ' '
+function lastCodexAssistant(payload) {
+  const direct = firstString(
+    payload["last-assistant-message"],
+    payload.last_assistant_message,
+    payload.lastAssistantMessage,
+    payload.message,
+    payload.lastMessage
+  );
+  if (direct) return direct;
+  const transcript = firstString(
+    payload.transcript_path,
+    payload.transcriptPath,
+    payload["transcript-path"]
+  );
+  return transcript ? lastAssistantFromTranscript(transcript, "codex") : "";
+}
+function readFile(file) {
+  try {
+    return fs.readFileSync(file, "utf8");
+  } catch {
+    return "";
+  }
 }
-# 从 stdin JSON 提取 transcript 路径和最后一条消息
-transcript=$(json_value "transcript_path")
-last_msg=$(json_value "last_assistant_message")
+function parseJSON(text) {
+  try {
+    return JSON.parse(text);
+  } catch {
+    return null;
+  }
+}
+function firstString(...values) {
+  for (const value of values) {
+    if (typeof value === "string" && value !== "") return value;
+  }
+  return "";
+}
+function collectText(content) {
+  if (typeof content === "string") return content;
+  if (!Array.isArray(content)) return "";
+  return content
+    .map(item => item && typeof item.text === "string" ? item.text : "")
+    .filter(Boolean)
+    .join(" ");
+}
-all_text="$last_msg"
+function lastAssistantFromTranscript(file, source) {
+  let data = "";
+  try {
+    data = fs.readFileSync(file, "utf8");
+  } catch {
+    return "";
+  }
+  let last = "";
+  for (const line of data.split(/\r?\n/)) {
+    if (!line.trim()) continue;
+    const event = parseJSON(line);
+    if (!event) continue;
+    if (source === "claude") {
+      if (event.role === "assistant") {
+        last = collectText(event.content) || last;
+      }
+      if (event.message && event.message.role === "assistant") {
+        last = collectText(event.message.content) || last;
+      }
+    }
-# 如果有 transcript 文件，提取最近 30 秒内的所有 assistant 消息
-if [[ -n "$transcript" && -f "$transcript" ]]; then
-  # 计算 30 秒前的时间戳（毫秒）
-  cutoff=$(($(date +%s) * 1000 - 30000))
+    if (source === "codex" &&
+      event.type === "response_item" &&
+      event.payload &&
+      event.payload.type === "message" &&
+      event.payload.role === "assistant"
+    ) {
+      last = collectText(event.payload.content) || last;
+    }
+  }
+  return last;
+}
-  # 优先用 JSON parser，Node 不存在时回退到简易 awk。
-  extra=$(extract_recent_assistant_text "$transcript" "$cutoff")
+NODE
+)
-  if [[ -n "$extra" ]]; then
-    all_text="$extra"
-  fi
+if [[ "$ISPEAK_HOOK_PRINT_TEXT" == "1" ]]; then
+  printf "%s" "$text"
+  exit 0
 fi
 echo "=== $(date) ===" >> "$LOG"
 echo "SOURCE: $SOURCE" >> "$LOG"
-echo "TEXT_LEN: ${#all_text}" >> "$LOG"
-echo "PREVIEW: ${all_text:0:150}" >> "$LOG"
+echo "TEXT_LEN: ${#text}" >> "$LOG"
+echo "PREVIEW: ${text:0:150}" >> "$LOG"
-if [[ -n "$all_text" && -S "$SOCK" ]]; then
-  printf "{source:%s}%s" "$SOURCE" "$all_text" | nc -U -w5 "$SOCK" 2>> "$LOG"
+if [[ -n "$text" && -S "$SOCK" ]]; then
+  printf "{source:%s}%s" "$SOURCE" "$text" | nc -U -w5 "$SOCK" 2>> "$LOG"
   echo "SPOKE: OK" >> "$LOG"
 else
   echo "SPOKE: SKIP" >> "$LOG"

package/main.go CHANGED Viewed

@@ -17,7 +17,6 @@ import (
 	"os/exec"
 	"os/signal"
 	"path/filepath"
-	"regexp"
 	"strings"
 	"sync"
 	"syscall"
@@ -43,25 +42,6 @@ var tempDir string
 var errAlreadyRunning = errors.New("iSpeak already running")
-var (
-	markdownLinkRe     = regexp.MustCompile(`\[[^\]]+\]\(([^)]*)\)`)
-	absolutePathRe     = regexp.MustCompile(`/(?:Users|private|tmp|var|opt|usr|bin|sbin|etc|Library|Applications)/\S+`)
-	commitHashRe       = regexp.MustCompile(`\b[0-9a-f]{7,40}\b`)
-	uuidRe             = regexp.MustCompile(`\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b`)
-	urlRe              = regexp.MustCompile(`https?://\S+`)
-	ansiEscapeRe       = regexp.MustCompile(`\x1b\[[0-9;]*[A-Za-z]`)
-	multiSpaceRe       = regexp.MustCompile(`\s+`)
-	markdownListRe     = regexp.MustCompile(`^\s*(?:[-*+]\s+|\d+[.)]\s+)`)
-	htmlTagRe          = regexp.MustCompile(`<[^>]+>`)
-	codeFenceStartRe   = regexp.MustCompile("^```")
-	artifactStartRe    = regexp.MustCompile(`(?i)^<artifact\b`)
-	htmlDocumentLineRe = regexp.MustCompile(`(?i)^<!doctype html|^<html\b|^<head\b|^<body\b|^<style\b|^</`)
-	progressNoiseRe    = regexp.MustCompile(`(?i)(^\s*\d{1,3}%\s*$|\d{1,3}%.*\d+(?:\.\d+)?\s*(?:kb|mb|gb)/s|\bETA\b|^\s*[-=]{3,}\s*$)`)
-	speedNoiseRe       = regexp.MustCompile(`(?i)\d+(?:\.\d+)?\s*(?:kb|mb|gb)/s`)
-	etaNoiseRe         = regexp.MustCompile(`(?i)\bETA\b|预计剩余|剩余时间`)
-	fileListNoiseRe    = regexp.MustCompile(`(?i)\.(?:go|js|ts|tsx|jsx|json|md|yaml|yml|toml|sum|mod|lock|html|css|sh|plist|safetensors|mp3|wav|png|jpg|jpeg|pdf|docx)\b`)
-)
 type StreamPlayer interface {
 	Write(audio []byte) error
 	CloseAndWait() error
@@ -463,7 +443,7 @@ func loadConfig() Config {
 	// 回退到环境变量
 	return Config{
 		APIKey:   envOrDefault("IAGENT_TTS_API_KEY", ""),
-		Endpoint: envOrDefault("IAGENT_TTS_ENDPOINT", "https://openspeech.bytedance.com/api/v3/tts/unidirectional"),
+		Endpoint: envOrDefault("IAGENT_TTS_ENDPOINT", "https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse"),
 	}
 }
@@ -590,8 +570,7 @@ func parseSSE(r io.Reader) ([]byte, error) {
 func parseSSEStream(r io.Reader, onAudio func([]byte) error) error {
 	audioChunks := 0
-	scanner := bufio.NewScanner(r)
-	scanner.Buffer(make([]byte, 256*1024), 256*1024)
+	reader := bufio.NewReaderSize(r, 64*1024)
 	var dataLines []string
@@ -608,40 +587,49 @@ func parseSSEStream(r io.Reader, onAudio func([]byte) error) error {
 		return err
 	}
-	for scanner.Scan() {
-		line := strings.TrimSpace(scanner.Text())
+	for {
+		rawLine, err := reader.ReadString('\n')
+		if err != nil && len(rawLine) == 0 {
+			if err == io.EOF {
+				break
+			}
+			return fmt.Errorf("read sse: %w", err)
+		}
+		line := strings.TrimSpace(rawLine)
 		if line == "" {
 			if err := flush(); err != nil {
 				return err
 			}
-			continue
-		}
-		if strings.HasPrefix(line, ":") || strings.HasPrefix(line, "event:") ||
+		} else if strings.HasPrefix(line, ":") || strings.HasPrefix(line, "event:") ||
 			strings.HasPrefix(line, "id:") || strings.HasPrefix(line, "retry:") {
-			continue
-		}
-		if strings.HasPrefix(line, "data:") {
+			// SSE metadata, ignored.
+		} else if strings.HasPrefix(line, "data:") {
 			dataLines = append(dataLines, strings.TrimPrefix(line, "data:"))
-			continue
-		}
-		// 非标准 JSON 直出
-		if err := flush(); err != nil {
-			return err
-		}
-		ok, err := processEvent(line, onAudio)
-		if ok {
-			audioChunks++
+		} else {
+			// 非标准 JSON 直出
+			if err := flush(); err != nil {
+				return err
+			}
+			ok, err := processEvent(line, onAudio)
+			if ok {
+				audioChunks++
+			}
+			if err != nil {
+				return err
+			}
 		}
 		if err != nil {
-			return err
+			if err == io.EOF {
+				break
+			}
+			return fmt.Errorf("read sse: %w", err)
 		}
 	}
 	if err := flush(); err != nil {
 		return err
 	}
-	if err := scanner.Err(); err != nil {
-		return fmt.Errorf("scan: %w", err)
-	}
 	if audioChunks == 0 {
 		return fmt.Errorf("no audio data")
@@ -661,6 +649,10 @@ func processEvent(payload string, onAudio func([]byte) error) (bool, error) {
 		return false, nil
 	}
+	if err := sseEventError(event); err != nil {
+		return false, err
+	}
 	if b64 := extractAudioBase64(event); b64 != "" {
 		data, err := base64.StdEncoding.DecodeString(b64)
 		if err != nil {
@@ -675,6 +667,35 @@ func processEvent(payload string, onAudio func([]byte) error) (bool, error) {
 	return false, nil
 }
+func sseEventError(event map[string]any) error {
+	codeValue, ok := event["code"]
+	if !ok {
+		return nil
+	}
+	var code int64
+	switch v := codeValue.(type) {
+	case float64:
+		code = int64(v)
+	case int:
+		code = int64(v)
+	case int64:
+		code = v
+	default:
+		return nil
+	}
+	if code == 0 || code == 20000000 {
+		return nil
+	}
+	message, _ := event["message"].(string)
+	if message == "" {
+		message = "unknown error"
+	}
+	return fmt.Errorf("tts sse error: code=%d message=%s", code, message)
+}
 func extractAudioBase64(event map[string]any) string {
 	for _, key := range []string{"data", "audio", "audio_data"} {
 		if v, ok := event[key].(string); ok && v != "" {
@@ -691,167 +712,6 @@ func extractAudioBase64(event map[string]any) string {
 	return ""
 }
-// 过滤格式符号，保留自然朗读文本。
-// 顺序很重要：先跳过跨行块结构，再跳过整行噪声，最后清理行内符号。
-func cleanText(text string) string {
-	var lines []string
-	rawLines := strings.Split(text, "\n")
-	inCodeBlock := false
-	inArtifact := false
-	inMarkdownTable := false
-	for i := 0; i < len(rawLines); i++ {
-		line := rawLines[i]
-		line = strings.TrimSpace(line)
-		if line == "" {
-			inMarkdownTable = false
-			continue
-		}
-		if codeFenceStartRe.MatchString(line) {
-			inCodeBlock = !inCodeBlock
-			continue
-		}
-		if inCodeBlock {
-			continue
-		}
-		if artifactStartRe.MatchString(line) {
-			inArtifact = !strings.Contains(strings.ToLower(line), "</artifact>")
-			continue
-		}
-		if inArtifact {
-			if strings.Contains(strings.ToLower(line), "</artifact>") {
-				inArtifact = false
-			}
-			continue
-		}
-		if isMarkdownTableSeparator(line) {
-			if len(lines) > 0 && isMarkdownTableRow(strings.TrimSpace(rawLines[i-1])) {
-				lines = lines[:len(lines)-1]
-			}
-			inMarkdownTable = true
-			continue
-		}
-		if inMarkdownTable {
-			if isMarkdownTableRow(line) {
-				continue
-			}
-			inMarkdownTable = false
-		}
-		if shouldSkipSpeechLine(line) {
-			continue
-		}
-		cleaned := cleanSpeechLine(line)
-		if cleaned != "" {
-			lines = append(lines, cleaned)
-		}
-	}
-	return strings.Join(lines, "，")
-}
-func shouldSkipSpeechLine(line string) bool {
-	if isMarkdownTableSeparator(line) {
-		return true
-	}
-	if strings.HasPrefix(line, "---") && strings.Count(line, "-") > 3 {
-		return true
-	}
-	if htmlDocumentLineRe.MatchString(line) {
-		return true
-	}
-	if isProgressNoiseLine(line) {
-		return true
-	}
-	if isMostlyTableRow(line) {
-		return true
-	}
-	if isMostlyFileListLine(line) {
-		return true
-	}
-	return false
-}
-func isMarkdownTableSeparator(line string) bool {
-	line = strings.TrimSpace(line)
-	return strings.Contains(line, "|") && strings.Trim(line, "|-: ") == ""
-}
-func isMarkdownTableRow(line string) bool {
-	line = strings.TrimSpace(line)
-	return strings.Count(line, "|") >= 2
-}
-func cleanSpeechLine(line string) string {
-	// Markdown 链接必须在 URL 删除前处理，否则会丢掉链接标题。
-	line = ansiEscapeRe.ReplaceAllString(line, "")
-	line = markdownListRe.ReplaceAllString(line, "")
-	line = markdownLinkRe.ReplaceAllStringFunc(line, func(match string) string {
-		if end := strings.Index(match, "]"); end > 1 {
-			return match[1:end]
-		}
-		return ""
-	})
-	line = urlRe.ReplaceAllString(line, "")
-	line = absolutePathRe.ReplaceAllString(line, " 路径 ")
-	// UUID 必须在短 hash 前处理，避免先删短片段后破坏 UUID 识别。
-	line = uuidRe.ReplaceAllString(line, "")
-	line = commitHashRe.ReplaceAllString(line, "")
-	line = htmlTagRe.ReplaceAllString(line, "")
-	line = strings.NewReplacer(
-		"**", "",
-		"*", "",
-		"`", "",
-		"#", "",
-		">", "",
-		"✅", "",
-		"❌", "",
-		"✓", "",
-		"✗", "",
-		"→", "到",
-	).Replace(line)
-	line = strings.Trim(line, " \t-:|")
-	line = multiSpaceRe.ReplaceAllString(line, " ")
-	return strings.TrimSpace(line)
-}
-func isMostlyTableRow(line string) bool {
-	if !strings.Contains(line, "|") {
-		return false
-	}
-	return strings.Count(line, "|") >= 2 && len([]rune(line)) > 40
-}
-func isProgressNoiseLine(line string) bool {
-	if !progressNoiseRe.MatchString(line) {
-		return false
-	}
-	if speedNoiseRe.MatchString(line) || etaNoiseRe.MatchString(line) {
-		return true
-	}
-	return !containsCJK(line)
-}
-func isMostlyFileListLine(line string) bool {
-	if !fileListNoiseRe.MatchString(line) {
-		return false
-	}
-	if containsCJK(line) {
-		return false
-	}
-	if strings.Contains(line, ".safetensors") {
-		return true
-	}
-	return strings.Count(line, ".") >= 2 || strings.Contains(line, "/") || strings.Contains(line, " - ")
-}
-func containsCJK(s string) bool {
-	for _, r := range s {
-		if r >= '\u4e00' && r <= '\u9fff' {
-			return true
-		}
-	}
-	return false
-}
 func main() {
 	log.SetFlags(log.Ltime | log.Lshortfile)

package/npm/postinstall.js CHANGED Viewed

@@ -61,6 +61,43 @@ function copyIfMissing(src, dst, mode) {
   console.log(`配置文件已创建: ${dst}`);
 }
+function migrateDefaultEndpoint(configPath) {
+  if (!fs.existsSync(configPath)) {
+    return;
+  }
+  const oldEndpoint = "https://openspeech.bytedance.com/api/v3/tts/unidirectional";
+  const newEndpoint = "https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse";
+  let config;
+  try {
+    config = JSON.parse(fs.readFileSync(configPath, "utf8"));
+  } catch (_) {
+    return;
+  }
+  if (config.endpoint !== oldEndpoint) {
+    return;
+  }
+  fs.copyFileSync(configPath, `${configPath}.bak`);
+  config.endpoint = newEndpoint;
+  fs.writeFileSync(configPath, `${JSON.stringify(config, null, 2)}\n`);
+  console.log(`配置 endpoint 已迁移到 SSE，旧配置备份: ${configPath}.bak`);
+}
+function installHook(src, dst) {
+  if (fs.existsSync(dst)) {
+    try {
+      if (fs.readFileSync(src, "utf8") !== fs.readFileSync(dst, "utf8")) {
+        fs.copyFileSync(dst, `${dst}.bak`);
+        console.log(`旧 Hook 已备份: ${dst}.bak`);
+      }
+    } catch (_) {
+      fs.copyFileSync(dst, `${dst}.bak`);
+      console.log(`旧 Hook 已备份: ${dst}.bak`);
+    }
+  }
+  copyExecutable(src, dst);
+  console.log(`Hook 脚本已安装: ${dst}`);
+}
 function sleep(ms) {
   Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
 }
@@ -106,8 +143,10 @@ function main() {
   symlinkForce(cliPath, path.join(binDir, "ispeak-claude"));
   symlinkForce(cliPath, path.join(binDir, "ispeak-codex"));
-  copyIfMissing(path.join(root, "configs", "config.example.json"), path.join(configDir, "config.json"));
-  copyIfMissing(path.join(root, "configs", "hook-speak.sh"), path.join(configDir, "hook-speak.sh"), 0o755);
+  const configPath = path.join(configDir, "config.json");
+  copyIfMissing(path.join(root, "configs", "config.example.json"), configPath);
+  migrateDefaultEndpoint(configPath);
+  installHook(path.join(root, "configs", "hook-speak.sh"), path.join(configDir, "hook-speak.sh"));
   const plist = fs
     .readFileSync(path.join(root, "configs", "com.iSpeak.plist"), "utf8")

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@xdfnet/ispeak",
-  "version": "1.6.4",
+  "version": "1.6.6",
   "description": "Local macOS TTS daemon for AI coding assistants, powered by Volcengine streaming TTS.",
   "license": "MIT",
   "homepage": "https://github.com/xdfnet/iSpeak#readme",
@@ -26,14 +26,16 @@
   "scripts": {
     "build": "go build -ldflags=\"-s -w\" -o build/ispeakd .",
     "test": "go test ./...",
+    "prepublishOnly": "make test",
     "postinstall": "node npm/postinstall.js",
     "pack:dry-run": "npm pack --dry-run"
   },
   "files": [
     "main.go",
+    "clean_text.go",
     "go.mod",
     "go.sum",
-    "scripts/",
+    "scripts/ispeak",
     "configs/",
     "npm/",
     "Docs/",

package/scripts/ispeak CHANGED Viewed

@@ -2,7 +2,7 @@
 # ispeak — iSpeak 控制命令
 set -euo pipefail
-VERSION="1.6.3"
+VERSION="1.6.6"
 SOCK="$HOME/.config/iSpeak/ispeak.sock"
 PLIST="$HOME/Library/LaunchAgents/com.iSpeak.plist"
 CMD_NAME="$(basename "$0")"