@downcity/plugins 1.0.56 → 1.0.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. package/bin/BuiltinPlugins.d.ts.map +1 -1
  2. package/bin/BuiltinPlugins.js +0 -4
  3. package/bin/BuiltinPlugins.js.map +1 -1
  4. package/bin/asr/Plugin.d.ts +67 -7
  5. package/bin/asr/Plugin.d.ts.map +1 -1
  6. package/bin/asr/Plugin.js +229 -461
  7. package/bin/asr/Plugin.js.map +1 -1
  8. package/bin/asr/types/AsrPlugin.d.ts +114 -0
  9. package/bin/asr/types/AsrPlugin.d.ts.map +1 -0
  10. package/bin/asr/types/AsrPlugin.js +10 -0
  11. package/bin/asr/types/AsrPlugin.js.map +1 -0
  12. package/bin/image/ImagePlugin.d.ts +1 -1
  13. package/bin/image/ImagePlugin.d.ts.map +1 -1
  14. package/bin/image/ImagePlugin.js +23 -5
  15. package/bin/image/ImagePlugin.js.map +1 -1
  16. package/bin/index.d.ts +2 -0
  17. package/bin/index.d.ts.map +1 -1
  18. package/bin/tts/Plugin.d.ts +53 -6
  19. package/bin/tts/Plugin.d.ts.map +1 -1
  20. package/bin/tts/Plugin.js +197 -474
  21. package/bin/tts/Plugin.js.map +1 -1
  22. package/bin/tts/types/TtsPlugin.d.ts +63 -102
  23. package/bin/tts/types/TtsPlugin.d.ts.map +1 -1
  24. package/bin/tts/types/TtsPlugin.js +4 -3
  25. package/bin/tts/types/TtsPlugin.js.map +1 -1
  26. package/bin/web/PROMPT.d.ts +1 -1
  27. package/bin/web/PROMPT.d.ts.map +1 -1
  28. package/bin/web/PROMPT.js +1 -1
  29. package/bin/web/PROMPT.js.map +1 -1
  30. package/bin/web/Plugin.d.ts +66 -5
  31. package/bin/web/Plugin.d.ts.map +1 -1
  32. package/bin/web/Plugin.js +126 -450
  33. package/bin/web/Plugin.js.map +1 -1
  34. package/bin/web/WebPromptAssets.d.ts +1 -9
  35. package/bin/web/WebPromptAssets.d.ts.map +1 -1
  36. package/bin/web/WebPromptAssets.js +1 -11
  37. package/bin/web/WebPromptAssets.js.map +1 -1
  38. package/bin/web/runtime/Install.d.ts +19 -0
  39. package/bin/web/runtime/Install.d.ts.map +1 -0
  40. package/bin/web/runtime/Install.js +178 -0
  41. package/bin/web/runtime/Install.js.map +1 -0
  42. package/bin/web/types/WebPlugin.d.ts +37 -109
  43. package/bin/web/types/WebPlugin.d.ts.map +1 -1
  44. package/bin/web/types/WebPlugin.js +5 -7
  45. package/bin/web/types/WebPlugin.js.map +1 -1
  46. package/package.json +2 -2
  47. package/src/BuiltinPlugins.ts +0 -4
  48. package/src/asr/Plugin.ts +264 -483
  49. package/src/asr/types/AsrPlugin.ts +118 -0
  50. package/src/image/ImagePlugin.ts +23 -5
  51. package/src/index.ts +12 -0
  52. package/src/tts/Plugin.ts +225 -492
  53. package/src/tts/types/TtsPlugin.ts +67 -102
  54. package/src/web/PROMPT.ts +1 -1
  55. package/src/web/PROMPT.ts.txt +32 -6
  56. package/src/web/Plugin.ts +119 -453
  57. package/src/web/WebPromptAssets.ts +1 -13
  58. package/src/web/runtime/Install.ts +241 -0
  59. package/src/web/types/WebPlugin.ts +37 -113
  60. package/bin/asr/Config.d.ts +0 -43
  61. package/bin/asr/Config.d.ts.map +0 -1
  62. package/bin/asr/Config.js +0 -107
  63. package/bin/asr/Config.js.map +0 -1
  64. package/bin/asr/Dependency.d.ts +0 -77
  65. package/bin/asr/Dependency.d.ts.map +0 -1
  66. package/bin/asr/Dependency.js +0 -238
  67. package/bin/asr/Dependency.js.map +0 -1
  68. package/bin/asr/InboundAugment.d.ts +0 -17
  69. package/bin/asr/InboundAugment.d.ts.map +0 -1
  70. package/bin/asr/InboundAugment.js +0 -47
  71. package/bin/asr/InboundAugment.js.map +0 -1
  72. package/bin/asr/ModelCatalog.d.ts +0 -29
  73. package/bin/asr/ModelCatalog.d.ts.map +0 -1
  74. package/bin/asr/ModelCatalog.js +0 -25
  75. package/bin/asr/ModelCatalog.js.map +0 -1
  76. package/bin/tts/Dependency.d.ts +0 -90
  77. package/bin/tts/Dependency.d.ts.map +0 -1
  78. package/bin/tts/Dependency.js +0 -344
  79. package/bin/tts/Dependency.js.map +0 -1
  80. package/bin/tts/PluginSupport.d.ts +0 -25
  81. package/bin/tts/PluginSupport.d.ts.map +0 -1
  82. package/bin/tts/PluginSupport.js +0 -72
  83. package/bin/tts/PluginSupport.js.map +0 -1
  84. package/bin/tts/runtime/Catalog.d.ts +0 -21
  85. package/bin/tts/runtime/Catalog.d.ts.map +0 -1
  86. package/bin/tts/runtime/Catalog.js +0 -90
  87. package/bin/tts/runtime/Catalog.js.map +0 -1
  88. package/bin/tts/runtime/DependencyInstaller.d.ts +0 -143
  89. package/bin/tts/runtime/DependencyInstaller.d.ts.map +0 -1
  90. package/bin/tts/runtime/DependencyInstaller.js +0 -261
  91. package/bin/tts/runtime/DependencyInstaller.js.map +0 -1
  92. package/bin/tts/runtime/Installer.d.ts +0 -89
  93. package/bin/tts/runtime/Installer.d.ts.map +0 -1
  94. package/bin/tts/runtime/Installer.js +0 -188
  95. package/bin/tts/runtime/Installer.js.map +0 -1
  96. package/bin/tts/runtime/Paths.d.ts +0 -20
  97. package/bin/tts/runtime/Paths.d.ts.map +0 -1
  98. package/bin/tts/runtime/Paths.js +0 -32
  99. package/bin/tts/runtime/Paths.js.map +0 -1
  100. package/bin/tts/runtime/Synthesizer.d.ts +0 -44
  101. package/bin/tts/runtime/Synthesizer.d.ts.map +0 -1
  102. package/bin/tts/runtime/Synthesizer.js +0 -363
  103. package/bin/tts/runtime/Synthesizer.js.map +0 -1
  104. package/bin/tts/types/Tts.d.ts +0 -91
  105. package/bin/tts/types/Tts.d.ts.map +0 -1
  106. package/bin/tts/types/Tts.js +0 -9
  107. package/bin/tts/types/Tts.js.map +0 -1
  108. package/bin/voice/Config.d.ts +0 -43
  109. package/bin/voice/Config.d.ts.map +0 -1
  110. package/bin/voice/Config.js +0 -104
  111. package/bin/voice/Config.js.map +0 -1
  112. package/bin/voice/Dependency.d.ts +0 -77
  113. package/bin/voice/Dependency.d.ts.map +0 -1
  114. package/bin/voice/Dependency.js +0 -237
  115. package/bin/voice/Dependency.js.map +0 -1
  116. package/bin/voice/InboundAugment.d.ts +0 -17
  117. package/bin/voice/InboundAugment.d.ts.map +0 -1
  118. package/bin/voice/InboundAugment.js +0 -47
  119. package/bin/voice/InboundAugment.js.map +0 -1
  120. package/bin/voice/ModelCatalog.d.ts +0 -29
  121. package/bin/voice/ModelCatalog.d.ts.map +0 -1
  122. package/bin/voice/ModelCatalog.js +0 -25
  123. package/bin/voice/ModelCatalog.js.map +0 -1
  124. package/bin/voice/runtime/Catalog.d.ts +0 -18
  125. package/bin/voice/runtime/Catalog.d.ts.map +0 -1
  126. package/bin/voice/runtime/Catalog.js +0 -61
  127. package/bin/voice/runtime/Catalog.js.map +0 -1
  128. package/bin/voice/runtime/DependencyInstaller.d.ts +0 -145
  129. package/bin/voice/runtime/DependencyInstaller.d.ts.map +0 -1
  130. package/bin/voice/runtime/DependencyInstaller.js +0 -309
  131. package/bin/voice/runtime/DependencyInstaller.js.map +0 -1
  132. package/bin/voice/runtime/Installer.d.ts +0 -94
  133. package/bin/voice/runtime/Installer.d.ts.map +0 -1
  134. package/bin/voice/runtime/Installer.js +0 -200
  135. package/bin/voice/runtime/Installer.js.map +0 -1
  136. package/bin/voice/runtime/Paths.d.ts +0 -8
  137. package/bin/voice/runtime/Paths.d.ts.map +0 -1
  138. package/bin/voice/runtime/Paths.js +0 -26
  139. package/bin/voice/runtime/Paths.js.map +0 -1
  140. package/bin/voice/runtime/Transcriber.d.ts +0 -57
  141. package/bin/voice/runtime/Transcriber.d.ts.map +0 -1
  142. package/bin/voice/runtime/Transcriber.js +0 -329
  143. package/bin/voice/runtime/Transcriber.js.map +0 -1
  144. package/bin/voice/types/Voice.d.ts +0 -58
  145. package/bin/voice/types/Voice.d.ts.map +0 -1
  146. package/bin/voice/types/Voice.js +0 -9
  147. package/bin/voice/types/Voice.js.map +0 -1
  148. package/bin/voice/types/VoicePlugin.d.ts +0 -190
  149. package/bin/voice/types/VoicePlugin.d.ts.map +0 -1
  150. package/bin/voice/types/VoicePlugin.js +0 -9
  151. package/bin/voice/types/VoicePlugin.js.map +0 -1
  152. package/bin/web/Dependency.d.ts +0 -10
  153. package/bin/web/Dependency.d.ts.map +0 -1
  154. package/bin/web/Dependency.js +0 -10
  155. package/bin/web/Dependency.js.map +0 -1
  156. package/bin/web/PROMPT.agent-browser.d.ts +0 -7
  157. package/bin/web/PROMPT.agent-browser.d.ts.map +0 -1
  158. package/bin/web/PROMPT.agent-browser.js +0 -8
  159. package/bin/web/PROMPT.agent-browser.js.map +0 -1
  160. package/bin/web/PROMPT.web-access.d.ts +0 -7
  161. package/bin/web/PROMPT.web-access.d.ts.map +0 -1
  162. package/bin/web/PROMPT.web-access.js +0 -8
  163. package/bin/web/PROMPT.web-access.js.map +0 -1
  164. package/bin/web/runtime/Config.d.ts +0 -21
  165. package/bin/web/runtime/Config.d.ts.map +0 -1
  166. package/bin/web/runtime/Config.js +0 -79
  167. package/bin/web/runtime/Config.js.map +0 -1
  168. package/bin/web/runtime/Source.d.ts +0 -29
  169. package/bin/web/runtime/Source.d.ts.map +0 -1
  170. package/bin/web/runtime/Source.js +0 -209
  171. package/bin/web/runtime/Source.js.map +0 -1
  172. package/src/asr/Config.ts +0 -138
  173. package/src/asr/Dependency.ts +0 -336
  174. package/src/asr/InboundAugment.ts +0 -59
  175. package/src/asr/ModelCatalog.ts +0 -43
  176. package/src/tts/Dependency.ts +0 -473
  177. package/src/tts/PluginSupport.ts +0 -85
  178. package/src/tts/runtime/Catalog.ts +0 -97
  179. package/src/tts/runtime/DependencyInstaller.ts +0 -436
  180. package/src/tts/runtime/Installer.ts +0 -297
  181. package/src/tts/runtime/Paths.ts +0 -39
  182. package/src/tts/runtime/Synthesizer.ts +0 -480
  183. package/src/tts/types/Tts.ts +0 -99
  184. package/src/voice/Config.ts +0 -135
  185. package/src/voice/Dependency.ts +0 -329
  186. package/src/voice/InboundAugment.ts +0 -59
  187. package/src/voice/ModelCatalog.ts +0 -43
  188. package/src/voice/runtime/Catalog.ts +0 -68
  189. package/src/voice/runtime/DependencyInstaller.ts +0 -505
  190. package/src/voice/runtime/Installer.ts +0 -324
  191. package/src/voice/runtime/Paths.ts +0 -26
  192. package/src/voice/runtime/Transcriber.ts +0 -467
  193. package/src/voice/types/Voice.ts +0 -68
  194. package/src/voice/types/VoicePlugin.ts +0 -194
  195. package/src/web/Dependency.ts +0 -17
  196. package/src/web/PROMPT.agent-browser.ts +0 -9
  197. package/src/web/PROMPT.agent-browser.ts.txt +0 -17
  198. package/src/web/PROMPT.web-access.ts +0 -9
  199. package/src/web/PROMPT.web-access.ts.txt +0 -13
  200. package/src/web/runtime/Config.ts +0 -105
  201. package/src/web/runtime/Source.ts +0 -257
@@ -1,467 +0,0 @@
1
- import path from "node:path";
2
- import { exec as execWithShell, execFile as execFileCb } from "node:child_process";
3
- import { promisify } from "node:util";
4
- import fs from "fs-extra";
5
- import type {
6
- VoiceModelId,
7
- VoiceProvider,
8
- VoiceTranscribeStrategy,
9
- } from "@/voice/types/Voice.js";
10
- import { resolveVoiceModelsRootDir } from "./Paths.js";
11
- import type { VoicePluginConfig } from "@/voice/types/VoicePlugin.js";
12
- import type { PluginCommandContext } from "@downcity/agent/internal/plugin/types/Plugin.js";
13
-
14
- const execShellAsync = promisify(execWithShell);
15
- const execFileAsync = promisify(execFileCb);
16
- const DEFAULT_TRANSCRIBE_TIMEOUT_MS = 120_000;
17
-
18
- /**
19
- * Voice 转写输入参数。
20
- */
21
- export interface VoiceTranscribeInput {
22
- /**
23
- * 运行时上下文(用于读取配置/根目录/日志)。
24
- */
25
- context: PluginCommandContext;
26
- /**
27
- * 待转写音频路径(相对项目根目录或绝对路径)。
28
- */
29
- audioPath: string;
30
- /**
31
- * 语言提示(可选)。
32
- */
33
- language?: string;
34
- }
35
-
36
- /**
37
- * Voice 转写输出结果。
38
- */
39
- export interface VoiceTranscribeResult {
40
- /**
41
- * 转写文本。
42
- */
43
- text: string;
44
- /**
45
- * 归一化后的音频绝对路径。
46
- */
47
- audioPath: string;
48
- /**
49
- * 本次使用的模型 ID。
50
- */
51
- modelId: VoiceModelId;
52
- /**
53
- * 本次使用的 provider。
54
- */
55
- provider: VoiceProvider;
56
- /**
57
- * 本次转写耗时(毫秒)。
58
- */
59
- elapsedMs: number;
60
- /**
61
- * 实际执行器标识。
62
- */
63
- runner: "funasr" | "transformers-whisper" | "command";
64
- }
65
-
66
- type VoiceConfigResolved = {
67
- config: VoicePluginConfig;
68
- modelId: VoiceModelId;
69
- provider: VoiceProvider;
70
- modelDir: string;
71
- audioPath: string;
72
- timeoutMs: number;
73
- language: string;
74
- pythonBin: string;
75
- commandTemplate?: string;
76
- strategy: VoiceTranscribeStrategy;
77
- };
78
-
79
- function normalizeLanguage(input?: string): string {
80
- const text = String(input || "").trim();
81
- return text || "zh";
82
- }
83
-
84
- function normalizeTimeoutMs(value?: number): number {
85
- if (!Number.isFinite(value as number)) return DEFAULT_TRANSCRIBE_TIMEOUT_MS;
86
- const ms = Number(value);
87
- if (ms < 1_000) return 1_000;
88
- if (ms > 600_000) return 600_000;
89
- return Math.floor(ms);
90
- }
91
-
92
- function normalizePythonBin(value?: string): string {
93
- const text = String(value || "").trim();
94
- return text || "python3";
95
- }
96
-
97
- function normalizeStrategy(strategy?: VoiceTranscribeStrategy): VoiceTranscribeStrategy {
98
- if (strategy === "funasr") return "funasr";
99
- if (strategy === "transformers-whisper") return "transformers-whisper";
100
- if (strategy === "command") return "command";
101
- return "auto";
102
- }
103
-
104
- function toAbsoluteAudioPath(context: PluginCommandContext, input: string): string {
105
- const raw = String(input || "").trim();
106
- if (!raw) {
107
- throw new Error("voice transcribe requires audioPath");
108
- }
109
- if (path.isAbsolute(raw)) return path.resolve(raw);
110
- return path.resolve(context.rootPath, raw);
111
- }
112
-
113
- function pickLastNonEmptyLine(value: string): string {
114
- const lines = String(value || "")
115
- .split(/\r?\n/)
116
- .map((line) => line.trim())
117
- .filter(Boolean);
118
- return lines.length > 0 ? lines[lines.length - 1] : "";
119
- }
120
-
121
- function shellEscapeSingle(value: string): string {
122
- return `'${String(value).replace(/'/g, `'"'"'`)}'`;
123
- }
124
-
125
- /**
126
- * 为 python 执行构建环境变量。
127
- *
128
- * 关键点(中文)
129
- * - 当 `pythonBin` 指向 venv 内解释器时,把其 `bin` 目录前置到 PATH,
130
- * 以便 python 子进程内调用 `pip`(FunASR 远程代码会用到)能正确命中同一 venv。
131
- */
132
- function buildPythonExecEnv(pythonBin: string): NodeJS.ProcessEnv {
133
- const env: NodeJS.ProcessEnv = { ...process.env };
134
- const raw = String(pythonBin || "").trim();
135
- if (!raw || (!raw.includes("/") && !raw.includes("\\"))) {
136
- return env;
137
- }
138
- const pythonDir = path.dirname(path.resolve(raw));
139
- const currentPath = String(env.PATH || "");
140
- const segments = currentPath
141
- .split(path.delimiter)
142
- .map((item) => item.trim())
143
- .filter(Boolean);
144
- if (!segments.includes(pythonDir)) {
145
- env.PATH = [pythonDir, ...segments].join(path.delimiter);
146
- }
147
- return env;
148
- }
149
-
150
- function renderCommandTemplate(template: string, values: Record<string, string>): string {
151
- return template.replace(/\{(audioPath|modelDir|modelId|language)\}/g, (_, key) => {
152
- const value = values[key] || "";
153
- return shellEscapeSingle(value);
154
- });
155
- }
156
-
157
- async function runCustomCommand(params: {
158
- template: string;
159
- values: Record<string, string>;
160
- timeoutMs: number;
161
- }): Promise<string> {
162
- const command = renderCommandTemplate(params.template, params.values);
163
- const { stdout, stderr } = await execShellAsync(command, {
164
- timeout: params.timeoutMs,
165
- maxBuffer: 8 * 1024 * 1024,
166
- });
167
- const text = pickLastNonEmptyLine(String(stdout || ""));
168
- if (text) return text;
169
- const err = pickLastNonEmptyLine(String(stderr || ""));
170
- if (err) {
171
- throw new Error(`voice custom command produced no transcript: ${err}`);
172
- }
173
- throw new Error("voice custom command produced empty transcript");
174
- }
175
-
176
- async function runPythonInline(params: {
177
- pythonBin: string;
178
- script: string;
179
- args: string[];
180
- timeoutMs: number;
181
- }): Promise<string> {
182
- let stdout = "";
183
- let stderr = "";
184
- try {
185
- const output = await execFileAsync(
186
- params.pythonBin,
187
- ["-c", params.script, ...params.args],
188
- {
189
- timeout: params.timeoutMs,
190
- maxBuffer: 8 * 1024 * 1024,
191
- env: buildPythonExecEnv(params.pythonBin),
192
- },
193
- );
194
- stdout = String(output.stdout || "");
195
- stderr = String(output.stderr || "");
196
- } catch (error) {
197
- const errorLike = error as {
198
- stdout?: string;
199
- stderr?: string;
200
- message?: string;
201
- };
202
- stdout = String(errorLike.stdout || "");
203
- stderr = String(errorLike.stderr || "");
204
- const err = pickLastNonEmptyLine(stderr) || pickLastNonEmptyLine(stdout);
205
- if (err) {
206
- throw new Error(`python runner failed: ${err}`);
207
- }
208
- throw new Error(`python runner failed: ${String(errorLike.message || error)}`);
209
- }
210
- const text = pickLastNonEmptyLine(stdout);
211
- if (text) return text;
212
- const err = pickLastNonEmptyLine(stderr);
213
- if (err) {
214
- throw new Error(`python runner produced no transcript: ${err}`);
215
- }
216
- throw new Error("python runner produced empty transcript");
217
- }
218
-
219
- const FUNASR_INLINE_SCRIPT = [
220
- "from funasr import AutoModel",
221
- "import sys",
222
- "model_dir = sys.argv[1]",
223
- "audio_path = sys.argv[2]",
224
- "model = AutoModel(model=model_dir, trust_remote_code=True, disable_update=True)",
225
- "result = model.generate(input=audio_path)",
226
- "text = ''",
227
- "if isinstance(result, list) and len(result) > 0:",
228
- " first = result[0]",
229
- " if isinstance(first, dict):",
230
- " text = str(first.get('text') or '')",
231
- "elif isinstance(result, dict):",
232
- " text = str(result.get('text') or '')",
233
- "print(text.strip())",
234
- ].join("\n");
235
-
236
- const TRANSFORMERS_WHISPER_INLINE_SCRIPT = [
237
- "from transformers import pipeline",
238
- "import sys",
239
- "model_dir = sys.argv[1]",
240
- "audio_path = sys.argv[2]",
241
- "language = sys.argv[3] if len(sys.argv) > 3 else ''",
242
- "pipe = pipeline('automatic-speech-recognition', model=model_dir)",
243
- "kwargs = {}",
244
- "if language:",
245
- " kwargs['generate_kwargs'] = {'language': language}",
246
- "result = pipe(audio_path, **kwargs)",
247
- "text = ''",
248
- "if isinstance(result, dict):",
249
- " text = str(result.get('text') or '')",
250
- "else:",
251
- " text = str(result)",
252
- "print(text.strip())",
253
- ].join("\n");
254
-
255
- async function runFunasrRunner(params: {
256
- pythonBin: string;
257
- modelDir: string;
258
- audioPath: string;
259
- timeoutMs: number;
260
- }): Promise<string> {
261
- return runPythonInline({
262
- pythonBin: params.pythonBin,
263
- script: FUNASR_INLINE_SCRIPT,
264
- args: [params.modelDir, params.audioPath],
265
- timeoutMs: params.timeoutMs,
266
- });
267
- }
268
-
269
- async function runTransformersWhisperRunner(params: {
270
- pythonBin: string;
271
- modelDir: string;
272
- audioPath: string;
273
- language: string;
274
- timeoutMs: number;
275
- }): Promise<string> {
276
- return runPythonInline({
277
- pythonBin: params.pythonBin,
278
- script: TRANSFORMERS_WHISPER_INLINE_SCRIPT,
279
- args: [params.modelDir, params.audioPath, params.language],
280
- timeoutMs: params.timeoutMs,
281
- });
282
- }
283
-
284
- async function resolveVoiceConfig(input: VoiceTranscribeInput): Promise<VoiceConfigResolved> {
285
- const pluginConfig =
286
- input.context.config.plugins?.asr &&
287
- typeof input.context.config.plugins.asr === "object" &&
288
- !Array.isArray(input.context.config.plugins.asr)
289
- ? input.context.config.plugins.asr
290
- : null;
291
-
292
- const enabled =
293
- pluginConfig && (pluginConfig as { enabled?: unknown }).enabled === true;
294
- if (!enabled) {
295
- throw new Error("ASR plugin is disabled. Run `town asr on` first.");
296
- }
297
-
298
- const provider = String(
299
- (pluginConfig as { provider?: unknown } | null)?.provider || "local",
300
- ) as VoiceProvider;
301
- if (!["local", "command"].includes(provider)) {
302
- throw new Error(`Unsupported asr provider: ${provider}`);
303
- }
304
-
305
- const modelId =
306
- String(
307
- (pluginConfig as { modelId?: unknown } | null)?.modelId || "SenseVoiceSmall",
308
- ).trim() || "SenseVoiceSmall";
309
- if (!modelId && provider === "local") {
310
- throw new Error("ASR active model is not configured. Run `town asr use <modelId>`.");
311
- }
312
-
313
- const modelsRootDir = resolveVoiceModelsRootDir({
314
- projectRoot: input.context.rootPath,
315
- modelsDir: String(
316
- (pluginConfig as { modelsDir?: unknown } | null)?.modelsDir || "",
317
- ).trim(),
318
- });
319
- const modelDir = path.resolve(modelsRootDir, modelId);
320
- if (provider === "local") {
321
- const modelDirExists = await fs.pathExists(modelDir);
322
- if (!modelDirExists) {
323
- throw new Error(`Voice model directory does not exist: ${modelDir}`);
324
- }
325
- }
326
-
327
- const audioPath = toAbsoluteAudioPath(input.context, input.audioPath);
328
- const audioExists = await fs.pathExists(audioPath);
329
- if (!audioExists) {
330
- throw new Error(`Audio file does not exist: ${audioPath}`);
331
- }
332
-
333
- const commandTemplate = String(
334
- (pluginConfig as { command?: unknown } | null)?.command || "",
335
- ).trim();
336
- const strategy = normalizeStrategy(
337
- ((pluginConfig as { strategy?: unknown } | null)?.strategy as
338
- | VoiceTranscribeStrategy
339
- | undefined) ||
340
- (provider === "command" ? "command" : "auto"),
341
- );
342
-
343
- return {
344
- config: (pluginConfig as VoicePluginConfig | null) || {
345
- provider: "local",
346
- },
347
- modelId: modelId as VoiceModelId,
348
- provider,
349
- modelDir,
350
- audioPath,
351
- timeoutMs: normalizeTimeoutMs(
352
- (pluginConfig as { timeoutMs?: unknown } | null)?.timeoutMs as number | undefined,
353
- ),
354
- language: normalizeLanguage(
355
- input.language ||
356
- String((pluginConfig as { language?: unknown } | null)?.language || ""),
357
- ),
358
- pythonBin: normalizePythonBin(
359
- String(
360
- (pluginConfig as { pythonBin?: unknown } | null)?.pythonBin || "",
361
- ),
362
- ),
363
- commandTemplate: commandTemplate || undefined,
364
- strategy,
365
- };
366
- }
367
-
368
- function resolveAutoRunnerOrder(modelId: VoiceModelId): Array<"funasr" | "transformers-whisper"> {
369
- if (modelId === "whisper-large-v3-turbo") {
370
- return ["transformers-whisper", "funasr"];
371
- }
372
- return ["funasr", "transformers-whisper"];
373
- }
374
-
375
- /**
376
- * 执行 ASR 音频转写。
377
- *
378
- * 关键点(中文)
379
- * - 该函数是 ASR plugin 对 chat 等 service 暴露的核心能力。
380
- * - 内置 runner 失败时会返回清晰报错,调用方可降级为附件流程。
381
- */
382
- export async function transcribeVoiceAudio(
383
- input: VoiceTranscribeInput,
384
- ): Promise<VoiceTranscribeResult> {
385
- const startedAt = Date.now();
386
- const resolved = await resolveVoiceConfig(input);
387
-
388
- const runnerFailures: string[] = [];
389
-
390
- const tryRunner = async (
391
- runner: "funasr" | "transformers-whisper" | "command",
392
- ): Promise<VoiceTranscribeResult | null> => {
393
- try {
394
- let text = "";
395
- if (runner === "command") {
396
- const template = String(resolved.commandTemplate || "").trim();
397
- if (!template) {
398
- throw new Error(
399
- "ASR transcribe strategy=command requires plugins.asr.command",
400
- );
401
- }
402
- text = await runCustomCommand({
403
- template,
404
- timeoutMs: resolved.timeoutMs,
405
- values: {
406
- audioPath: resolved.audioPath,
407
- modelDir: resolved.modelDir,
408
- modelId: resolved.modelId,
409
- language: resolved.language,
410
- },
411
- });
412
- } else if (runner === "funasr") {
413
- text = await runFunasrRunner({
414
- pythonBin: resolved.pythonBin,
415
- modelDir: resolved.modelDir,
416
- audioPath: resolved.audioPath,
417
- timeoutMs: resolved.timeoutMs,
418
- });
419
- } else {
420
- text = await runTransformersWhisperRunner({
421
- pythonBin: resolved.pythonBin,
422
- modelDir: resolved.modelDir,
423
- audioPath: resolved.audioPath,
424
- language: resolved.language,
425
- timeoutMs: resolved.timeoutMs,
426
- });
427
- }
428
-
429
- const normalized = String(text || "").trim();
430
- if (!normalized) {
431
- throw new Error("transcript is empty");
432
- }
433
-
434
- return {
435
- text: normalized,
436
- audioPath: resolved.audioPath,
437
- modelId: resolved.modelId,
438
- provider: resolved.provider,
439
- elapsedMs: Date.now() - startedAt,
440
- runner,
441
- };
442
- } catch (error) {
443
- runnerFailures.push(`${runner}: ${String(error)}`);
444
- return null;
445
- }
446
- };
447
-
448
- if (resolved.strategy === "command") {
449
- const result = await tryRunner("command");
450
- if (result) return result;
451
- } else if (resolved.strategy === "funasr") {
452
- const result = await tryRunner("funasr");
453
- if (result) return result;
454
- } else if (resolved.strategy === "transformers-whisper") {
455
- const result = await tryRunner("transformers-whisper");
456
- if (result) return result;
457
- } else {
458
- for (const runner of resolveAutoRunnerOrder(resolved.modelId)) {
459
- const result = await tryRunner(runner);
460
- if (result) return result;
461
- }
462
- }
463
-
464
- throw new Error(
465
- `Voice transcription failed for model "${resolved.modelId}". ${runnerFailures.join(" | ")}`,
466
- );
467
- }
@@ -1,68 +0,0 @@
1
- /**
2
- * Voice 模型目录与转写策略类型定义。
3
- *
4
- * 设计目标(中文)
5
- * - 统一描述 voice 领域里的稳定枚举类型。
6
- * - 让 plugin / runtime 共用一套模型与策略定义。
7
- */
8
-
9
- /**
10
- * Voice provider 类型。
11
- *
12
- * 说明(中文)
13
- * - `local`:使用本地模型目录与 Python runner。
14
- * - `command`:使用用户自定义命令模板。
15
- */
16
- export type VoiceProvider = "local" | "command";
17
-
18
- /**
19
- * 内置可选 STT 模型 ID。
20
- *
21
- * 说明(中文)
22
- * - 保持稳定字符串,避免未来重命名导致用户配置失效。
23
- */
24
- export type VoiceModelId =
25
- | "SenseVoiceSmall"
26
- | "paraformer-zh-streaming"
27
- | "whisper-large-v3-turbo";
28
-
29
- /**
30
- * Voice 转写执行策略。
31
- *
32
- * 说明(中文)
33
- * - `auto`:按激活模型自动选择内置 runner。
34
- * - `funasr`:强制使用 FunASR python runner。
35
- * - `transformers-whisper`:强制使用 Transformers Whisper python runner。
36
- * - `command`:使用自定义命令模板。
37
- */
38
- export type VoiceTranscribeStrategy =
39
- | "auto"
40
- | "funasr"
41
- | "transformers-whisper"
42
- | "command";
43
-
44
- /**
45
- * Voice 内置模型目录条目。
46
- */
47
- export interface VoiceModelCatalogItem {
48
- /**
49
- * 模型稳定 ID(配置与命令唯一键)。
50
- */
51
- id: VoiceModelId;
52
- /**
53
- * CLI 展示名称。
54
- */
55
- label: string;
56
- /**
57
- * 模型描述(语言/特性/场景)。
58
- */
59
- description: string;
60
- /**
61
- * HuggingFace 仓库 ID(owner/repo)。
62
- */
63
- huggingfaceRepo: string;
64
- /**
65
- * 下载 revision(通常为 main)。
66
- */
67
- revision: string;
68
- }