@wu529778790/open-im 1.11.4-beta.2 → 1.11.4-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,11 +4,12 @@
4
4
  * Uses POST + JSON body + Bearer token auth (iLink protocol).
5
5
  */
6
6
  import { randomBytes } from 'node:crypto';
7
+ import { readFileSync } from 'node:fs';
7
8
  import { createLogger } from '../logger.js';
8
- import { splitLongContent, toReplyPlainText } from '../shared/utils.js';
9
- import { MAX_CLAWBOT_MESSAGE_LENGTH } from '../constants.js';
9
+ import { toReplyPlainText } from '../shared/utils.js';
10
10
  import { getChannelState } from './client.js';
11
11
  import { getActiveChatId, getClawbotContextToken } from '../shared/active-chats.js';
12
+ import { textToSpeech, getTTSConfig } from '../shared/tts.js';
12
13
  const log = createLogger('ClawBotSender');
13
14
  let apiUrl = 'https://ilinkai.weixin.qq.com';
14
15
  let apiToken = '';
@@ -89,24 +90,80 @@ async function postMessage(chatId, text, contextToken) {
89
90
  return false;
90
91
  }
91
92
  }
93
+ /**
94
+ * 发送语音消息
95
+ */
96
+ async function postVoiceMessage(chatId, audioPath, contextToken) {
97
+ if (getChannelState() !== 'connected') {
98
+ log.warn('ClawBot not connected, cannot send voice message');
99
+ return false;
100
+ }
101
+ const token = contextToken ?? getCachedContextToken(chatId);
102
+ if (!token) {
103
+ log.warn(`ClawBot no context_token for chatId=${chatId}, cannot send voice`);
104
+ return false;
105
+ }
106
+ try {
107
+ // 读取音频文件并转为 base64
108
+ const audioBuffer = readFileSync(audioPath);
109
+ const audioBase64 = audioBuffer.toString('base64');
110
+ const url = `${apiUrl}/ilink/bot/sendmessage`;
111
+ const body = JSON.stringify({
112
+ msg: {
113
+ from_user_id: '',
114
+ to_user_id: chatId,
115
+ client_id: generateClientId(),
116
+ message_type: 2, // BOT
117
+ message_state: 2, // FINISH
118
+ item_list: [{
119
+ type: 3, // VOICE
120
+ voice_item: {
121
+ media: { cdn_url: `data:audio/mp3;base64,${audioBase64}` },
122
+ },
123
+ }],
124
+ context_token: token,
125
+ },
126
+ base_info: { channel_version: '0.1.0' },
127
+ });
128
+ const res = await fetch(url, {
129
+ method: 'POST',
130
+ headers: buildHeaders(),
131
+ body,
132
+ });
133
+ const data = await res.json();
134
+ const ok = data.ret === 0 || data.ret === undefined;
135
+ if (!ok) {
136
+ log.error(`ClawBot voice message failed: ret=${data.ret} errcode=${data.errcode} errmsg=${data.errmsg}`);
137
+ return false;
138
+ }
139
+ return true;
140
+ }
141
+ catch (err) {
142
+ log.error('ClawBot voice message error:', err);
143
+ return false;
144
+ }
145
+ }
92
146
  /**
93
147
  * Send text reply to a ClawBot chat, splitting long messages automatically.
94
148
  */
95
149
  export async function sendTextReply(chatId, text, contextToken) {
96
150
  const plainText = toReplyPlainText(text);
97
- const parts = splitLongContent(plainText, MAX_CLAWBOT_MESSAGE_LENGTH);
98
- if (parts.length === 1) {
99
- log.info(`Sending ClawBot reply to chatId=${chatId}, len=${plainText.length}`);
100
- await postMessage(chatId, plainText, contextToken);
101
- return;
102
- }
103
- log.info(`Sending ClawBot reply in ${parts.length} parts to chatId=${chatId}, totalLen=${plainText.length}`);
104
- for (let i = 0; i < parts.length; i++) {
105
- const partText = i === 0
106
- ? `${parts[i]}\n\n_(1/${parts.length})_`
107
- : `_(续 ${i + 1}/${parts.length})_\n\n${parts[i]}`;
108
- await postMessage(chatId, partText, contextToken);
109
- log.info(`ClawBot part ${i + 1}/${parts.length} sent`);
151
+ // 发送文字消息
152
+ log.info(`Sending ClawBot reply to chatId=${chatId}, len=${plainText.length}`);
153
+ await postMessage(chatId, plainText, contextToken);
154
+ // 如果 TTS 启用,同时发送语音消息
155
+ const ttsConfig = getTTSConfig();
156
+ if (ttsConfig.enabled && plainText.length > 10) {
157
+ try {
158
+ const audioPath = await textToSpeech(plainText);
159
+ if (audioPath) {
160
+ await postVoiceMessage(chatId, audioPath, contextToken);
161
+ log.info(`Voice message sent to chatId=${chatId}`);
162
+ }
163
+ }
164
+ catch (err) {
165
+ log.warn('Failed to send voice message:', err);
166
+ }
110
167
  }
111
168
  }
112
169
  /**
package/dist/index.js CHANGED
@@ -104,6 +104,9 @@ const PLATFORM_MODULES = {
104
104
  const pc = config.platforms.clawbot;
105
105
  if (pc?.apiUrl && pc?.apiToken) {
106
106
  initClawBotSender(pc.apiUrl, pc.apiToken);
107
+ // 初始化 TTS
108
+ const { initTTS } = await import('./shared/tts.js');
109
+ initTTS({ enabled: true, voice: 'zh-CN-XiaoxiaoNeural' });
107
110
  }
108
111
  const handle = setupClawbotHandlers(config, sessionManager);
109
112
  await initClawbot(config, handle.handleEvent);
@@ -0,0 +1,22 @@
1
+ /**
2
+ * TTS (Text-to-Speech) 模块
3
+ * 使用 edge-tts-node 调用微软 Edge TTS 服务
4
+ */
5
+ /** TTS 配置 */
6
+ export interface TTSConfig {
7
+ enabled: boolean;
8
+ voice?: string;
9
+ }
10
+ /**
11
+ * 初始化 TTS
12
+ */
13
+ export declare function initTTS(cfg?: Partial<TTSConfig>): void;
14
+ /**
15
+ * 获取 TTS 配置
16
+ */
17
+ export declare function getTTSConfig(): TTSConfig;
18
+ /**
19
+ * 文字转语音
20
+ * @returns 音频文件路径
21
+ */
22
+ export declare function textToSpeech(text: string): Promise<string | null>;
@@ -0,0 +1,72 @@
1
+ /**
2
+ * TTS (Text-to-Speech) 模块
3
+ * 使用 edge-tts-node 调用微软 Edge TTS 服务
4
+ */
5
+ import { MsEdgeTTS, OUTPUT_FORMAT } from 'edge-tts-node';
6
+ import { createLogger } from '../logger.js';
7
+ import { mkdirSync, existsSync } from 'node:fs';
8
+ import { join } from 'node:path';
9
+ import { APP_HOME } from '../constants.js';
10
+ import { randomBytes } from 'node:crypto';
11
+ const log = createLogger('TTS');
12
+ /** 默认配置 */
13
+ const DEFAULT_TTS_CONFIG = {
14
+ enabled: false,
15
+ voice: 'zh-CN-XiaoxiaoNeural',
16
+ };
17
+ let config = DEFAULT_TTS_CONFIG;
18
+ let tts = null;
19
+ /**
20
+ * 初始化 TTS
21
+ */
22
+ export function initTTS(cfg) {
23
+ config = { ...DEFAULT_TTS_CONFIG, ...cfg };
24
+ if (config.enabled) {
25
+ tts = new MsEdgeTTS({ enableLogger: false });
26
+ log.info(`TTS enabled, voice: ${config.voice}`);
27
+ }
28
+ }
29
+ /**
30
+ * 获取 TTS 配置
31
+ */
32
+ export function getTTSConfig() {
33
+ return config;
34
+ }
35
+ /**
36
+ * 文字转语音
37
+ * @returns 音频文件路径
38
+ */
39
+ export async function textToSpeech(text) {
40
+ if (!config.enabled || !tts) {
41
+ return null;
42
+ }
43
+ try {
44
+ // 截断过长的文本(TTS 有长度限制)
45
+ const truncatedText = text.length > 5000 ? text.substring(0, 5000) + '...' : text;
46
+ // 清理 markdown 格式(TTS 不需要)
47
+ const cleanText = truncatedText
48
+ .replace(/```[\s\S]*?```/g, '代码块已省略') // 代码块
49
+ .replace(/`[^`]+`/g, (match) => match.slice(1, -1)) // 行内代码
50
+ .replace(/\*\*[^*]+\*\*/g, (match) => match.slice(2, -2)) // 粗体
51
+ .replace(/\*[^*]+\*/g, (match) => match.slice(1, -1)) // 斜体
52
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // 链接
53
+ .replace(/#{1,6}\s/g, '') // 标题
54
+ .replace(/\n{3,}/g, '\n\n'); // 多余换行
55
+ // 生成音频文件路径
56
+ const audioDir = join(APP_HOME, 'audio');
57
+ if (!existsSync(audioDir)) {
58
+ mkdirSync(audioDir, { recursive: true });
59
+ }
60
+ const audioPath = join(audioDir, `tts-${randomBytes(8).toString('hex')}.mp3`);
61
+ // 设置 TTS 元数据
62
+ await tts.setMetadata(config.voice, OUTPUT_FORMAT.AUDIO_24KHZ_96KBITRATE_MONO_MP3);
63
+ // 调用 TTS
64
+ await tts.toFile(audioPath, cleanText);
65
+ log.info(`TTS generated: ${audioPath}`);
66
+ return audioPath;
67
+ }
68
+ catch (err) {
69
+ log.error('TTS failed:', err);
70
+ return null;
71
+ }
72
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wu529778790/open-im",
3
- "version": "1.11.4-beta.2",
3
+ "version": "1.11.4-beta.3",
4
4
  "description": "Your AI coding assistant, in every chat app. Multi-platform IM bridge for Claude Code, Codex, and CodeBuddy.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -58,6 +58,7 @@
58
58
  "@sentry/node": "^10.58.0",
59
59
  "centrifuge": "^5.5.3",
60
60
  "dingtalk-stream": "^2.1.4",
61
+ "edge-tts-node": "^1.5.7",
61
62
  "prompts": "^2.4.2",
62
63
  "telegraf": "^4.16.3",
63
64
  "ws": "^8.20.0"