@downcity/plugins 1.0.56 → 1.0.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. package/bin/BuiltinPlugins.d.ts.map +1 -1
  2. package/bin/BuiltinPlugins.js +0 -4
  3. package/bin/BuiltinPlugins.js.map +1 -1
  4. package/bin/asr/Plugin.d.ts +67 -7
  5. package/bin/asr/Plugin.d.ts.map +1 -1
  6. package/bin/asr/Plugin.js +229 -461
  7. package/bin/asr/Plugin.js.map +1 -1
  8. package/bin/asr/types/AsrPlugin.d.ts +114 -0
  9. package/bin/asr/types/AsrPlugin.d.ts.map +1 -0
  10. package/bin/asr/types/AsrPlugin.js +10 -0
  11. package/bin/asr/types/AsrPlugin.js.map +1 -0
  12. package/bin/image/ImagePlugin.d.ts +1 -1
  13. package/bin/image/ImagePlugin.d.ts.map +1 -1
  14. package/bin/image/ImagePlugin.js +23 -5
  15. package/bin/image/ImagePlugin.js.map +1 -1
  16. package/bin/index.d.ts +2 -0
  17. package/bin/index.d.ts.map +1 -1
  18. package/bin/tts/Plugin.d.ts +53 -6
  19. package/bin/tts/Plugin.d.ts.map +1 -1
  20. package/bin/tts/Plugin.js +197 -474
  21. package/bin/tts/Plugin.js.map +1 -1
  22. package/bin/tts/types/TtsPlugin.d.ts +63 -102
  23. package/bin/tts/types/TtsPlugin.d.ts.map +1 -1
  24. package/bin/tts/types/TtsPlugin.js +4 -3
  25. package/bin/tts/types/TtsPlugin.js.map +1 -1
  26. package/bin/web/PROMPT.d.ts +1 -1
  27. package/bin/web/PROMPT.d.ts.map +1 -1
  28. package/bin/web/PROMPT.js +1 -1
  29. package/bin/web/PROMPT.js.map +1 -1
  30. package/bin/web/Plugin.d.ts +66 -5
  31. package/bin/web/Plugin.d.ts.map +1 -1
  32. package/bin/web/Plugin.js +126 -450
  33. package/bin/web/Plugin.js.map +1 -1
  34. package/bin/web/WebPromptAssets.d.ts +1 -9
  35. package/bin/web/WebPromptAssets.d.ts.map +1 -1
  36. package/bin/web/WebPromptAssets.js +1 -11
  37. package/bin/web/WebPromptAssets.js.map +1 -1
  38. package/bin/web/runtime/Install.d.ts +19 -0
  39. package/bin/web/runtime/Install.d.ts.map +1 -0
  40. package/bin/web/runtime/Install.js +178 -0
  41. package/bin/web/runtime/Install.js.map +1 -0
  42. package/bin/web/types/WebPlugin.d.ts +37 -109
  43. package/bin/web/types/WebPlugin.d.ts.map +1 -1
  44. package/bin/web/types/WebPlugin.js +5 -7
  45. package/bin/web/types/WebPlugin.js.map +1 -1
  46. package/package.json +2 -2
  47. package/src/BuiltinPlugins.ts +0 -4
  48. package/src/asr/Plugin.ts +264 -483
  49. package/src/asr/types/AsrPlugin.ts +118 -0
  50. package/src/image/ImagePlugin.ts +23 -5
  51. package/src/index.ts +12 -0
  52. package/src/tts/Plugin.ts +225 -492
  53. package/src/tts/types/TtsPlugin.ts +67 -102
  54. package/src/web/PROMPT.ts +1 -1
  55. package/src/web/PROMPT.ts.txt +32 -6
  56. package/src/web/Plugin.ts +119 -453
  57. package/src/web/WebPromptAssets.ts +1 -13
  58. package/src/web/runtime/Install.ts +241 -0
  59. package/src/web/types/WebPlugin.ts +37 -113
  60. package/bin/asr/Config.d.ts +0 -43
  61. package/bin/asr/Config.d.ts.map +0 -1
  62. package/bin/asr/Config.js +0 -107
  63. package/bin/asr/Config.js.map +0 -1
  64. package/bin/asr/Dependency.d.ts +0 -77
  65. package/bin/asr/Dependency.d.ts.map +0 -1
  66. package/bin/asr/Dependency.js +0 -238
  67. package/bin/asr/Dependency.js.map +0 -1
  68. package/bin/asr/InboundAugment.d.ts +0 -17
  69. package/bin/asr/InboundAugment.d.ts.map +0 -1
  70. package/bin/asr/InboundAugment.js +0 -47
  71. package/bin/asr/InboundAugment.js.map +0 -1
  72. package/bin/asr/ModelCatalog.d.ts +0 -29
  73. package/bin/asr/ModelCatalog.d.ts.map +0 -1
  74. package/bin/asr/ModelCatalog.js +0 -25
  75. package/bin/asr/ModelCatalog.js.map +0 -1
  76. package/bin/tts/Dependency.d.ts +0 -90
  77. package/bin/tts/Dependency.d.ts.map +0 -1
  78. package/bin/tts/Dependency.js +0 -344
  79. package/bin/tts/Dependency.js.map +0 -1
  80. package/bin/tts/PluginSupport.d.ts +0 -25
  81. package/bin/tts/PluginSupport.d.ts.map +0 -1
  82. package/bin/tts/PluginSupport.js +0 -72
  83. package/bin/tts/PluginSupport.js.map +0 -1
  84. package/bin/tts/runtime/Catalog.d.ts +0 -21
  85. package/bin/tts/runtime/Catalog.d.ts.map +0 -1
  86. package/bin/tts/runtime/Catalog.js +0 -90
  87. package/bin/tts/runtime/Catalog.js.map +0 -1
  88. package/bin/tts/runtime/DependencyInstaller.d.ts +0 -143
  89. package/bin/tts/runtime/DependencyInstaller.d.ts.map +0 -1
  90. package/bin/tts/runtime/DependencyInstaller.js +0 -261
  91. package/bin/tts/runtime/DependencyInstaller.js.map +0 -1
  92. package/bin/tts/runtime/Installer.d.ts +0 -89
  93. package/bin/tts/runtime/Installer.d.ts.map +0 -1
  94. package/bin/tts/runtime/Installer.js +0 -188
  95. package/bin/tts/runtime/Installer.js.map +0 -1
  96. package/bin/tts/runtime/Paths.d.ts +0 -20
  97. package/bin/tts/runtime/Paths.d.ts.map +0 -1
  98. package/bin/tts/runtime/Paths.js +0 -32
  99. package/bin/tts/runtime/Paths.js.map +0 -1
  100. package/bin/tts/runtime/Synthesizer.d.ts +0 -44
  101. package/bin/tts/runtime/Synthesizer.d.ts.map +0 -1
  102. package/bin/tts/runtime/Synthesizer.js +0 -363
  103. package/bin/tts/runtime/Synthesizer.js.map +0 -1
  104. package/bin/tts/types/Tts.d.ts +0 -91
  105. package/bin/tts/types/Tts.d.ts.map +0 -1
  106. package/bin/tts/types/Tts.js +0 -9
  107. package/bin/tts/types/Tts.js.map +0 -1
  108. package/bin/voice/Config.d.ts +0 -43
  109. package/bin/voice/Config.d.ts.map +0 -1
  110. package/bin/voice/Config.js +0 -104
  111. package/bin/voice/Config.js.map +0 -1
  112. package/bin/voice/Dependency.d.ts +0 -77
  113. package/bin/voice/Dependency.d.ts.map +0 -1
  114. package/bin/voice/Dependency.js +0 -237
  115. package/bin/voice/Dependency.js.map +0 -1
  116. package/bin/voice/InboundAugment.d.ts +0 -17
  117. package/bin/voice/InboundAugment.d.ts.map +0 -1
  118. package/bin/voice/InboundAugment.js +0 -47
  119. package/bin/voice/InboundAugment.js.map +0 -1
  120. package/bin/voice/ModelCatalog.d.ts +0 -29
  121. package/bin/voice/ModelCatalog.d.ts.map +0 -1
  122. package/bin/voice/ModelCatalog.js +0 -25
  123. package/bin/voice/ModelCatalog.js.map +0 -1
  124. package/bin/voice/runtime/Catalog.d.ts +0 -18
  125. package/bin/voice/runtime/Catalog.d.ts.map +0 -1
  126. package/bin/voice/runtime/Catalog.js +0 -61
  127. package/bin/voice/runtime/Catalog.js.map +0 -1
  128. package/bin/voice/runtime/DependencyInstaller.d.ts +0 -145
  129. package/bin/voice/runtime/DependencyInstaller.d.ts.map +0 -1
  130. package/bin/voice/runtime/DependencyInstaller.js +0 -309
  131. package/bin/voice/runtime/DependencyInstaller.js.map +0 -1
  132. package/bin/voice/runtime/Installer.d.ts +0 -94
  133. package/bin/voice/runtime/Installer.d.ts.map +0 -1
  134. package/bin/voice/runtime/Installer.js +0 -200
  135. package/bin/voice/runtime/Installer.js.map +0 -1
  136. package/bin/voice/runtime/Paths.d.ts +0 -8
  137. package/bin/voice/runtime/Paths.d.ts.map +0 -1
  138. package/bin/voice/runtime/Paths.js +0 -26
  139. package/bin/voice/runtime/Paths.js.map +0 -1
  140. package/bin/voice/runtime/Transcriber.d.ts +0 -57
  141. package/bin/voice/runtime/Transcriber.d.ts.map +0 -1
  142. package/bin/voice/runtime/Transcriber.js +0 -329
  143. package/bin/voice/runtime/Transcriber.js.map +0 -1
  144. package/bin/voice/types/Voice.d.ts +0 -58
  145. package/bin/voice/types/Voice.d.ts.map +0 -1
  146. package/bin/voice/types/Voice.js +0 -9
  147. package/bin/voice/types/Voice.js.map +0 -1
  148. package/bin/voice/types/VoicePlugin.d.ts +0 -190
  149. package/bin/voice/types/VoicePlugin.d.ts.map +0 -1
  150. package/bin/voice/types/VoicePlugin.js +0 -9
  151. package/bin/voice/types/VoicePlugin.js.map +0 -1
  152. package/bin/web/Dependency.d.ts +0 -10
  153. package/bin/web/Dependency.d.ts.map +0 -1
  154. package/bin/web/Dependency.js +0 -10
  155. package/bin/web/Dependency.js.map +0 -1
  156. package/bin/web/PROMPT.agent-browser.d.ts +0 -7
  157. package/bin/web/PROMPT.agent-browser.d.ts.map +0 -1
  158. package/bin/web/PROMPT.agent-browser.js +0 -8
  159. package/bin/web/PROMPT.agent-browser.js.map +0 -1
  160. package/bin/web/PROMPT.web-access.d.ts +0 -7
  161. package/bin/web/PROMPT.web-access.d.ts.map +0 -1
  162. package/bin/web/PROMPT.web-access.js +0 -8
  163. package/bin/web/PROMPT.web-access.js.map +0 -1
  164. package/bin/web/runtime/Config.d.ts +0 -21
  165. package/bin/web/runtime/Config.d.ts.map +0 -1
  166. package/bin/web/runtime/Config.js +0 -79
  167. package/bin/web/runtime/Config.js.map +0 -1
  168. package/bin/web/runtime/Source.d.ts +0 -29
  169. package/bin/web/runtime/Source.d.ts.map +0 -1
  170. package/bin/web/runtime/Source.js +0 -209
  171. package/bin/web/runtime/Source.js.map +0 -1
  172. package/src/asr/Config.ts +0 -138
  173. package/src/asr/Dependency.ts +0 -336
  174. package/src/asr/InboundAugment.ts +0 -59
  175. package/src/asr/ModelCatalog.ts +0 -43
  176. package/src/tts/Dependency.ts +0 -473
  177. package/src/tts/PluginSupport.ts +0 -85
  178. package/src/tts/runtime/Catalog.ts +0 -97
  179. package/src/tts/runtime/DependencyInstaller.ts +0 -436
  180. package/src/tts/runtime/Installer.ts +0 -297
  181. package/src/tts/runtime/Paths.ts +0 -39
  182. package/src/tts/runtime/Synthesizer.ts +0 -480
  183. package/src/tts/types/Tts.ts +0 -99
  184. package/src/voice/Config.ts +0 -135
  185. package/src/voice/Dependency.ts +0 -329
  186. package/src/voice/InboundAugment.ts +0 -59
  187. package/src/voice/ModelCatalog.ts +0 -43
  188. package/src/voice/runtime/Catalog.ts +0 -68
  189. package/src/voice/runtime/DependencyInstaller.ts +0 -505
  190. package/src/voice/runtime/Installer.ts +0 -324
  191. package/src/voice/runtime/Paths.ts +0 -26
  192. package/src/voice/runtime/Transcriber.ts +0 -467
  193. package/src/voice/types/Voice.ts +0 -68
  194. package/src/voice/types/VoicePlugin.ts +0 -194
  195. package/src/web/Dependency.ts +0 -17
  196. package/src/web/PROMPT.agent-browser.ts +0 -9
  197. package/src/web/PROMPT.agent-browser.ts.txt +0 -17
  198. package/src/web/PROMPT.web-access.ts +0 -9
  199. package/src/web/PROMPT.web-access.ts.txt +0 -13
  200. package/src/web/runtime/Config.ts +0 -105
  201. package/src/web/runtime/Source.ts +0 -257
@@ -1,39 +0,0 @@
1
- /**
2
- * TTS 模型目录路径辅助。
3
- *
4
- * 关键点(中文)
5
- * - 统一把相对路径、`~` 与默认目录解析成绝对路径。
6
- */
7
-
8
- import os from "node:os";
9
- import path from "node:path";
10
-
11
- function expandHomePath(inputPath: string): string {
12
- const raw = String(inputPath || "").trim();
13
- if (!raw) return raw;
14
- if (raw === "~") return os.homedir();
15
- if (raw.startsWith("~/")) {
16
- return path.join(os.homedir(), raw.slice(2));
17
- }
18
- return raw;
19
- }
20
-
21
- /**
22
- * 将相对路径解析为绝对路径;空值时回退默认目录。
23
- */
24
- export function resolveTtsModelsRootDir(input: {
25
- /**
26
- * 项目根目录。
27
- */
28
- projectRoot: string;
29
- /**
30
- * 用户显式配置的模型目录(可选)。
31
- */
32
- modelsDir?: string;
33
- }): string {
34
- const fallback = path.join(os.homedir(), ".downcity", "models", "tts");
35
- const raw = expandHomePath(String(input.modelsDir || fallback).trim());
36
- if (!raw) return path.resolve(fallback);
37
- if (path.isAbsolute(raw)) return path.resolve(raw);
38
- return path.resolve(input.projectRoot, raw);
39
- }
@@ -1,480 +0,0 @@
1
- /**
2
- * TTS 语音合成 runtime。
3
- *
4
- * 关键点(中文)
5
- * - 读取本地模型目录,不再依赖 console 模型池。
6
- * - 根据模型族选择对应 Python runner,并把输出落到本地文件。
7
- */
8
-
9
- import { execFile as execFileCb } from "node:child_process";
10
- import path from "node:path";
11
- import { promisify } from "node:util";
12
- import fs from "fs-extra";
13
- import type { TtsPluginConfig, TtsSynthesizeInput } from "@/tts/types/TtsPlugin.js";
14
- import type { TtsAudioFormat, TtsModelId } from "@/tts/types/Tts.js";
15
- import type { PluginCommandContext } from "@downcity/agent/internal/plugin/types/Plugin.js";
16
- import { renderChatMessageFileTag } from "@downcity/agent/internal/executor/messages/ChatMessageMarkup.js";
17
- import { getTtsModelCatalogItem, resolveTtsModelId } from "@/tts/runtime/Catalog.js";
18
- import { resolveTtsModelsRootDir } from "@/tts/runtime/Paths.js";
19
-
20
- const execFileAsync = promisify(execFileCb);
21
- const DEFAULT_TTS_TIMEOUT_MS = 300_000;
22
-
23
- function normalizeText(value: unknown): string {
24
- return String(value || "").trim();
25
- }
26
-
27
- function normalizeFormat(value: unknown): TtsAudioFormat {
28
- return normalizeText(value).toLowerCase() === "flac" ? "flac" : "wav";
29
- }
30
-
31
- function normalizeSpeed(value: unknown): number {
32
- if (typeof value !== "number" || !Number.isFinite(value) || Number.isNaN(value)) {
33
- return 1;
34
- }
35
- const next = Math.max(0.5, Math.min(2, value));
36
- return Number(next.toFixed(2));
37
- }
38
-
39
- function normalizeTimeoutMs(value: unknown): number {
40
- if (typeof value !== "number" || !Number.isFinite(value) || Number.isNaN(value)) {
41
- return DEFAULT_TTS_TIMEOUT_MS;
42
- }
43
- if (value < 5_000) return 5_000;
44
- if (value > 900_000) return 900_000;
45
- return Math.floor(value);
46
- }
47
-
48
- function sanitizeFileStem(value: string): string {
49
- return value
50
- .replace(/[^A-Za-z0-9_-]+/g, "-")
51
- .replace(/-+/g, "-")
52
- .replace(/^-+|-+$/g, "")
53
- .slice(0, 48) || "tts";
54
- }
55
-
56
- function toProjectRelativePath(projectRoot: string, targetPath: string): string | null {
57
- const relative = path.relative(projectRoot, targetPath);
58
- if (!relative) return null;
59
- if (relative.startsWith("..")) return null;
60
- if (path.isAbsolute(relative)) return null;
61
- return relative.split(path.sep).join("/");
62
- }
63
-
64
- function resolveOutputTarget(params: {
65
- context: PluginCommandContext;
66
- format: TtsAudioFormat;
67
- output?: string;
68
- modelId: string;
69
- }): { absPath: string; relativePath: string } {
70
- const output = normalizeText(params.output);
71
- const defaultDir = path.join(params.context.paths.getCacheDirPath(), "tts");
72
- const target = output
73
- ? (path.isAbsolute(output)
74
- ? path.normalize(output)
75
- : path.resolve(params.context.rootPath, output))
76
- : defaultDir;
77
-
78
- const ext = `.${params.format}`;
79
- const hasExplicitFile = Boolean(path.extname(target));
80
- const filePath = hasExplicitFile
81
- ? target
82
- : path.join(
83
- target,
84
- `${Date.now()}-${sanitizeFileStem(params.modelId)}${ext}`,
85
- );
86
- const relativePath = toProjectRelativePath(params.context.rootPath, filePath);
87
- if (!relativePath) {
88
- throw new Error(`TTS output must stay inside project root: ${filePath}`);
89
- }
90
- return {
91
- absPath: filePath,
92
- relativePath,
93
- };
94
- }
95
-
96
- function pickLastNonEmptyLine(value: string): string {
97
- const lines = String(value || "")
98
- .split(/\r?\n/)
99
- .map((line) => line.trim())
100
- .filter(Boolean);
101
- return lines.length > 0 ? lines[lines.length - 1] : "";
102
- }
103
-
104
- /**
105
- * 判断一条 stderr 是否属于可忽略的 Python 推理提示。
106
- *
107
- * 关键点(中文)
108
- * - 某些第三方库会把 warning / info 打到 stderr,但并不影响音频文件生成。
109
- * - 这里只过滤已知的非致命提示,避免把真实错误静默吞掉。
110
- */
111
- function isIgnorablePythonStderrLine(value: string): boolean {
112
- const line = normalizeText(value);
113
- if (!line) return true;
114
- if (/^Setting `pad_token_id` to `eos_token_id`/u.test(line)) return true;
115
- if (/^`torch_dtype` is deprecated!/u.test(line)) return true;
116
- if (/^Warning: flash-attn is not installed\./u.test(line)) return true;
117
- return false;
118
- }
119
-
120
- /**
121
- * 提取 stderr 中真正需要上抛的错误文本。
122
- *
123
- * 关键点(中文)
124
- * - 去掉空行与已知 warning 后,只要还有剩余内容,就按真实错误处理。
125
- * - 返回最后一条非 warning 行,方便 CLI 给出更稳定的错误摘要。
126
- */
127
- function pickUnexpectedPythonStderr(value: string): string {
128
- const lines = String(value || "")
129
- .split(/\r?\n/)
130
- .map((line) => line.trim())
131
- .filter((line) => Boolean(line) && !isIgnorablePythonStderrLine(line));
132
- return lines.length > 0 ? lines[lines.length - 1] : "";
133
- }
134
-
135
- /**
136
- * 归一化 Python stderr 摘要。
137
- *
138
- * 关键点(中文)
139
- * - 某些 runner 会把非致命提示打印到 stderr。
140
- * - 这里保留最后一条真正有意义的内容,供上层作为说明返回。
141
- */
142
- function normalizePythonStderrSummary(value: string): string | undefined {
143
- const lines = String(value || "")
144
- .split(/\r?\n/)
145
- .map((line) => line.trim())
146
- .filter(Boolean);
147
- if (lines.length === 0) {
148
- return undefined;
149
- }
150
- return lines[lines.length - 1];
151
- }
152
-
153
- function detectLanguageHint(input: string): "zh" | "en" {
154
- return /[\u3400-\u9fff]/u.test(input) ? "zh" : "en";
155
- }
156
-
157
- function resolveKokoroVoicePath(params: {
158
- modelDir: string;
159
- voice?: string;
160
- language: string;
161
- }): string {
162
- const voicesDir = path.join(params.modelDir, "voices");
163
- const requested = normalizeText(params.voice);
164
- if (requested) {
165
- const requestedPath = requested.endsWith(".pt")
166
- ? path.resolve(voicesDir, requested)
167
- : path.resolve(voicesDir, `${requested}.pt`);
168
- if (fs.existsSync(requestedPath)) {
169
- return requestedPath;
170
- }
171
- }
172
-
173
- const preferred =
174
- params.language === "zh"
175
- ? ["zf_xiaoni.pt", "af_heart.pt"]
176
- : ["af_heart.pt", "zf_xiaoni.pt"];
177
- for (const fileName of preferred) {
178
- const candidate = path.join(voicesDir, fileName);
179
- if (fs.existsSync(candidate)) {
180
- return candidate;
181
- }
182
- }
183
-
184
- const entries = fs.readdirSync(voicesDir).filter((item) => item.endsWith(".pt"));
185
- if (entries.length === 0) {
186
- throw new Error(`kokoro voice assets are missing: ${voicesDir}`);
187
- }
188
- return path.join(voicesDir, entries[0]);
189
- }
190
-
191
- function buildPythonExecEnv(pythonBin: string): NodeJS.ProcessEnv {
192
- const env: NodeJS.ProcessEnv = { ...process.env };
193
- const raw = String(pythonBin || "").trim();
194
- if (!raw || (!raw.includes("/") && !raw.includes("\\"))) {
195
- return env;
196
- }
197
- const pythonDir = path.dirname(path.resolve(raw));
198
- const currentPath = String(env.PATH || "");
199
- const segments = currentPath
200
- .split(path.delimiter)
201
- .map((item) => item.trim())
202
- .filter(Boolean);
203
- if (!segments.includes(pythonDir)) {
204
- env.PATH = [pythonDir, ...segments].join(path.delimiter);
205
- }
206
- return env;
207
- }
208
-
209
- async function runPythonInline(params: {
210
- pythonBin: string;
211
- script: string;
212
- args: string[];
213
- timeoutMs: number;
214
- }): Promise<{
215
- /**
216
- * Python runner stderr 摘要(可选)。
217
- */
218
- stderrSummary?: string;
219
- }> {
220
- let stdout = "";
221
- let stderr = "";
222
- try {
223
- const output = await execFileAsync(
224
- params.pythonBin,
225
- ["-c", params.script, ...params.args],
226
- {
227
- timeout: params.timeoutMs,
228
- maxBuffer: 8 * 1024 * 1024,
229
- env: buildPythonExecEnv(params.pythonBin),
230
- },
231
- );
232
- stdout = String(output.stdout || "");
233
- stderr = String(output.stderr || "");
234
- } catch (error) {
235
- const errorLike = error as {
236
- stdout?: string;
237
- stderr?: string;
238
- message?: string;
239
- };
240
- stdout = String(errorLike.stdout || "");
241
- stderr = String(errorLike.stderr || "");
242
- const err = pickLastNonEmptyLine(stderr) || pickLastNonEmptyLine(stdout);
243
- if (err) {
244
- throw new Error(`python runner failed: ${err}`);
245
- }
246
- throw new Error(`python runner failed: ${String(errorLike.message || error)}`);
247
- }
248
- const err = pickUnexpectedPythonStderr(stderr);
249
- return {
250
- stderrSummary: err || normalizePythonStderrSummary(stderr),
251
- };
252
- }
253
-
254
- const KOKORO_INLINE_SCRIPT = [
255
- "import numpy as np",
256
- "import soundfile as sf",
257
- "import torch",
258
- "from kokoro import KModel, KPipeline",
259
- "config_path = __import__('sys').argv[1]",
260
- "model_path = __import__('sys').argv[2]",
261
- "voice_path = __import__('sys').argv[3]",
262
- "lang_code = __import__('sys').argv[4]",
263
- "text = __import__('sys').argv[5]",
264
- "output_path = __import__('sys').argv[6]",
265
- "speed = float(__import__('sys').argv[7])",
266
- "device = 'cuda' if torch.cuda.is_available() else 'cpu'",
267
- "model = KModel(config=config_path, model=model_path).to(device).eval()",
268
- "pipeline = KPipeline(lang_code=lang_code, model=model, device=device)",
269
- "chunks = []",
270
- "for _, _, audio in pipeline(text, voice=voice_path, speed=speed):",
271
- " if audio is None:",
272
- " continue",
273
- " chunks.append(audio.cpu().numpy() if hasattr(audio, 'cpu') else np.asarray(audio))",
274
- "if not chunks:",
275
- " raise RuntimeError('kokoro returned empty audio')",
276
- "wave = np.concatenate(chunks)",
277
- "sf.write(output_path, wave, 24000)",
278
- "print(output_path)",
279
- ].join("\n");
280
-
281
- const QWEN3_INLINE_SCRIPT = [
282
- "import soundfile as sf",
283
- "import torch",
284
- "from qwen_tts import Qwen3TTSModel",
285
- "model_path = __import__('sys').argv[1]",
286
- "text = __import__('sys').argv[2]",
287
- "voice = __import__('sys').argv[3]",
288
- "language = __import__('sys').argv[4]",
289
- "output_path = __import__('sys').argv[5]",
290
- "speed = float(__import__('sys').argv[6])",
291
- "device = 'cuda' if torch.cuda.is_available() else 'cpu'",
292
- "dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32",
293
- "model = Qwen3TTSModel.from_pretrained(model_path, device_map=device, dtype=dtype)",
294
- "speakers = model.get_supported_speakers() or []",
295
- "speaker = voice if voice and voice in speakers else (speakers[0] if speakers else voice)",
296
- "if not speaker:",
297
- " raise RuntimeError('qwen3 supported speaker list is empty')",
298
- "languages = model.get_supported_languages() or []",
299
- "resolved_language = language if language and language in languages else ('Auto' if 'Auto' in languages else (languages[0] if languages else 'Auto'))",
300
- "wavs, sample_rate = model.generate_custom_voice(text=text, speaker=speaker, language=resolved_language, non_streaming_mode=True, do_sample=True, top_p=0.9, temperature=0.7, repetition_penalty=1.05)",
301
- "wave = wavs[0] if isinstance(wavs, list) else wavs",
302
- "sf.write(output_path, wave, sample_rate)",
303
- "print(output_path)",
304
- ].join("\n");
305
-
306
- async function runKokoroSynthesizer(params: {
307
- pythonBin: string;
308
- modelDir: string;
309
- text: string;
310
- voice?: string;
311
- language: string;
312
- outputPath: string;
313
- speed: number;
314
- timeoutMs: number;
315
- }): Promise<{
316
- /**
317
- * Python runner stderr 摘要(可选)。
318
- */
319
- stderrSummary?: string;
320
- }> {
321
- const voicePath = resolveKokoroVoicePath({
322
- modelDir: params.modelDir,
323
- voice: params.voice,
324
- language: params.language,
325
- });
326
- const configPath = path.join(params.modelDir, "config.json");
327
- const modelPath = path.join(params.modelDir, "kokoro-v1_0.pth");
328
- return runPythonInline({
329
- pythonBin: params.pythonBin,
330
- script: KOKORO_INLINE_SCRIPT,
331
- args: [
332
- configPath,
333
- modelPath,
334
- voicePath,
335
- params.language === "zh" ? "z" : "a",
336
- params.text,
337
- params.outputPath,
338
- String(params.speed),
339
- ],
340
- timeoutMs: params.timeoutMs,
341
- });
342
- }
343
-
344
- async function runQwen3Synthesizer(params: {
345
- pythonBin: string;
346
- modelDir: string;
347
- text: string;
348
- voice?: string;
349
- language: string;
350
- outputPath: string;
351
- speed: number;
352
- timeoutMs: number;
353
- }): Promise<{
354
- /**
355
- * Python runner stderr 摘要(可选)。
356
- */
357
- stderrSummary?: string;
358
- }> {
359
- return runPythonInline({
360
- pythonBin: params.pythonBin,
361
- script: QWEN3_INLINE_SCRIPT,
362
- args: [
363
- params.modelDir,
364
- params.text,
365
- normalizeText(params.voice),
366
- params.language === "zh" ? "Chinese" : params.language === "en" ? "English" : "Auto",
367
- params.outputPath,
368
- String(params.speed),
369
- ],
370
- timeoutMs: params.timeoutMs,
371
- });
372
- }
373
-
374
- /**
375
- * 执行一次 TTS 合成,并写出音频文件。
376
- */
377
- export async function synthesizeSpeechFile(params: {
378
- /**
379
- * 当前执行上下文。
380
- */
381
- context: PluginCommandContext;
382
- /**
383
- * 当前 plugin 配置。
384
- */
385
- config: TtsPluginConfig;
386
- /**
387
- * 本次合成输入。
388
- */
389
- input: TtsSynthesizeInput;
390
- }): Promise<{
391
- /**
392
- * 输出相对路径。
393
- */
394
- outputPath: string;
395
- /**
396
- * 可发送文件标签。
397
- */
398
- fileTag: string;
399
- /**
400
- * 文件字节数。
401
- */
402
- bytes: number;
403
- /**
404
- * Python runner stderr 摘要(可选)。
405
- */
406
- stderrSummary?: string;
407
- }> {
408
- const text = normalizeText(params.input.text);
409
- if (!text) {
410
- throw new Error("tts synthesize requires text");
411
- }
412
-
413
- const modelId = resolveTtsModelId(
414
- normalizeText(params.input.modelId || params.config.modelId),
415
- );
416
- if (!modelId) {
417
- throw new Error("tts modelId is missing");
418
- }
419
-
420
- const model = getTtsModelCatalogItem(modelId);
421
- if (!model) {
422
- throw new Error(`Unsupported tts model: ${modelId}`);
423
- }
424
-
425
- const format = normalizeFormat(params.input.format || params.config.format);
426
- const speed = normalizeSpeed(
427
- typeof params.input.speed === "number" ? params.input.speed : params.config.speed,
428
- );
429
- const language =
430
- normalizeText(params.input.language || params.config.language) ||
431
- detectLanguageHint(text);
432
- const pythonBin = normalizeText(params.config.pythonBin) || "python3";
433
- const timeoutMs = normalizeTimeoutMs(params.config.timeoutMs);
434
- const modelsRootDir = resolveTtsModelsRootDir({
435
- projectRoot: params.context.rootPath,
436
- modelsDir: params.config.modelsDir,
437
- });
438
- const modelDir = path.join(modelsRootDir, modelId);
439
- const output = resolveOutputTarget({
440
- context: params.context,
441
- format,
442
- output: normalizeText(params.input.output || params.config.outputDir),
443
- modelId,
444
- });
445
-
446
- await fs.ensureDir(path.dirname(output.absPath));
447
- const runResult =
448
- model.family === "kokoro"
449
- ? await runKokoroSynthesizer({
450
- pythonBin,
451
- modelDir,
452
- text,
453
- voice: params.input.voice || params.config.voice,
454
- language,
455
- outputPath: output.absPath,
456
- speed,
457
- timeoutMs,
458
- })
459
- : await runQwen3Synthesizer({
460
- pythonBin,
461
- modelDir,
462
- text,
463
- voice: params.input.voice || params.config.voice,
464
- language,
465
- outputPath: output.absPath,
466
- speed,
467
- timeoutMs,
468
- });
469
-
470
- const stats = await fs.stat(output.absPath);
471
- return {
472
- outputPath: output.relativePath,
473
- fileTag: renderChatMessageFileTag({
474
- type: "audio",
475
- path: output.relativePath,
476
- }),
477
- bytes: stats.size,
478
- ...(runResult.stderrSummary ? { stderrSummary: runResult.stderrSummary } : {}),
479
- };
480
- }
@@ -1,99 +0,0 @@
1
- /**
2
- * TTS 模型目录与推理族类型定义。
3
- *
4
- * 设计目标(中文)
5
- * - 统一描述 TTS 领域里的稳定模型 ID、模型族与下载清单。
6
- * - 让 plugin / runtime / console 共用同一套目录元数据。
7
- */
8
-
9
- /**
10
- * TTS provider 类型。
11
- *
12
- * 说明(中文)
13
- * - 当前仅保留 `local`,统一走本地模型目录与 Python runner。
14
- */
15
- export type TtsProvider = "local";
16
-
17
- /**
18
- * 内置可选 TTS 模型 ID。
19
- *
20
- * 说明(中文)
21
- * - 保持稳定字符串,避免后续改名破坏用户配置。
22
- */
23
- export type TtsModelId =
24
- | "qwen3-tts-0.6b"
25
- | "kokoro-82m"
26
- | "qwen3-tts-1.7b";
27
-
28
- /**
29
- * TTS 本地推理实现族。
30
- *
31
- * 说明(中文)
32
- * - `qwen3`:Qwen3-TTS 系列。
33
- * - `kokoro`:Kokoro 系列。
34
- */
35
- export type TtsRuntimeFamily = "qwen3" | "kokoro";
36
-
37
- /**
38
- * TTS 输出音频格式。
39
- *
40
- * 说明(中文)
41
- * - 当前仅保留本地 runner 稳定支持的格式。
42
- */
43
- export type TtsAudioFormat = "wav" | "flac";
44
-
45
- /**
46
- * 单个 HuggingFace 下载资源定义。
47
- */
48
- export interface TtsModelAsset {
49
- /**
50
- * 资源仓库 ID(owner/repo)。
51
- */
52
- repoId: string;
53
- /**
54
- * 下载 revision(通常为 main)。
55
- */
56
- revision: string;
57
- /**
58
- * 仅下载这些文件(可选)。
59
- *
60
- * 说明(中文)
61
- * - 为空时表示下载该 repo 的全部文件。
62
- * - 用于像 Kokoro 这样只需要少量核心文件的场景。
63
- */
64
- files?: string[];
65
- /**
66
- * 下载到模型目录内的目标子目录(可选)。
67
- */
68
- targetSubdir?: string;
69
- }
70
-
71
- /**
72
- * TTS 内置模型目录条目。
73
- */
74
- export interface TtsModelCatalogItem {
75
- /**
76
- * 模型稳定 ID(配置与命令唯一键)。
77
- */
78
- id: TtsModelId;
79
- /**
80
- * Console / CLI 展示名称。
81
- */
82
- label: string;
83
- /**
84
- * 面向用户的简要说明。
85
- */
86
- description: string;
87
- /**
88
- * 本地推理所对应的实现族。
89
- */
90
- family: TtsRuntimeFamily;
91
- /**
92
- * 是否作为优先推荐项展示。
93
- */
94
- recommended: boolean;
95
- /**
96
- * 模型安装时需要下载的资源列表。
97
- */
98
- assets: TtsModelAsset[];
99
- }