yuanflow-cli 0.1.47 → 0.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/cli.js CHANGED
@@ -629,19 +629,19 @@ function printHelp() {
629
629
  yuanflow-cli browser task-plan --platform xiaohongshu --task publish --account main --format agent-json
630
630
  yuanflow-cli ai qwen3-vl-plus --prompt "描述这张图" --image-url "https://example.com/image.png" --dry-run
631
631
  yuanflow-cli ai qwen3-vl-plus --prompt "总结这个视频画面" --video-url "https://example.com/video.mp4" --dry-run
632
- yuanflow-cli ai qwen3-vl-plus --prompt "描述本地图片" --image-file "D:\\素材\\cover.png" --dry-run
633
- yuanflow-cli ai qwen3-vl-plus --prompt "描述本地视频" --video-file "D:\\素材\\demo.mp4" --dry-run
634
- yuanflow-cli voice clone --file-transfer "D:\\voice\\sample.wav" --name demo --activate --dry-run
632
+ yuanflow-cli ai qwen3-vl-plus --prompt "描述本地图片" --image-file "<本地图片路径>" --dry-run
633
+ yuanflow-cli ai qwen3-vl-plus --prompt "描述本地视频" --video-file "<本地视频路径>" --dry-run
634
+ yuanflow-cli voice clone --file-transfer "<本地音频路径>" --name demo --activate --dry-run
635
635
  yuanflow-cli voice list --dry-run
636
636
  yuanflow-cli voice activate --voice voice_xxx --dry-run
637
- yuanflow-cli voice replicate --text "你好" --voice voice_xxx --output "D:\\voice\\replicate.mp3" --dry-run
638
- yuanflow-cli ai qwen-voice-enrollment --file "D:\\voice\\sample.wav" --name demo --activate --dry-run
639
- yuanflow-cli ai qwen3-tts-vc-realtime-2026-01-15 --text "你好" --voice voice_xxx --output "D:\\voice\\qwen.mp3" --dry-run
637
+ yuanflow-cli voice replicate --text "你好" --voice voice_xxx --output "<输出音频路径>" --dry-run
638
+ yuanflow-cli ai voice-enrollment --file "<本地音频路径>" --name demo --activate --dry-run
639
+ yuanflow-cli ai cosyvoice-v3-flash --text "你好" --voice voice_xxx --output "<输出音频路径>" --dry-run
640
640
  yuanflow-cli ai fun-asr --audio-url "https://example.com/audio.wav" --response-format verbose_json --dry-run
641
641
  yuanflow-cli ai doubao-tts voices --dry-run
642
642
  yuanflow-cli ai doubao-tts voice --voice zh_female_xiaohe_uranus_bigtts --dry-run
643
- yuanflow-cli ai doubao-tts voice-download --voice zh_female_xiaohe_uranus_bigtts --output "D:\\voice\\preview.mp3" --dry-run
644
- yuanflow-cli ai doubao-tts --text "你好" --voice zh_female_xiaohe_uranus_bigtts --output "D:\\voice\\doubao.mp3" --dry-run
643
+ yuanflow-cli ai doubao-tts voice-download --voice zh_female_xiaohe_uranus_bigtts --output "<输出音频路径>" --dry-run
644
+ yuanflow-cli ai doubao-tts --text "你好" --voice zh_female_xiaohe_uranus_bigtts --output "<输出音频路径>" --dry-run
645
645
  yuanflow-cli list douyin
646
646
 
647
647
  说明:
@@ -650,7 +650,7 @@ function printHelp() {
650
650
  qwen3-vl-plus 支持 --image-url、--video-url、--image-file、--video-file 四选一;视频建议最大 2GB、时长 2 秒到 1 小时。
651
651
  qwen3-vl-plus 本地图片/视频会先走 YuanFlow 文件中转,内部调用 /atomic/oss/temp-upload 后把 signed_url 提交给模型。
652
652
  browser 命令是自媒体平台专用浏览器自动化协议,只返回受控 profile/cookie/任务路径与执行计划,不用于普通网页搜索。
653
- 视频智能剪辑和视频制作执行链已迁移到独立项目 D:\\AI_project\\视频制作,不再由 yuanflow-cli video 命令提供;视频拆解仍由 yuanflow-skill 的“视频拆解”提供。
653
+ 视频智能剪辑和视频制作执行链已迁移到独立项目,不再由 yuanflow-cli video 命令提供;视频拆解仍由 yuanflow-skill 的“视频拆解”提供。
654
654
  需要鉴权的请求都会使用 Authorization: Bearer <token>。
655
655
  token 优先级:--token > YUANCHUANG_API_TOKEN > 本地 config.token。
656
656
  YuanFlow-main 内置环境使用时,token 由 YuanFlow-main 内置环境注入,不需要手动配置。
@@ -7,8 +7,8 @@ const AUDIO_SPEECH_PATH = '/v1/audio/speech';
7
7
  const AUDIO_VOICES_PATH = '/v1/audio/voices';
8
8
  const YUANFLOW_FILE_TRANSFER_PATH = '/atomic/oss/temp-upload';
9
9
 
10
- const MODEL_VOICE_CLONE = 'qwen-voice-enrollment';
11
- const MODEL_VOICE_REPLICATE = 'qwen3-tts-vc-realtime-2026-01-15';
10
+ const MODEL_VOICE_CLONE = 'voice-enrollment';
11
+ const MODEL_VOICE_REPLICATE = 'cosyvoice-v3-flash';
12
12
 
13
13
  export function listVoiceCommands() {
14
14
  return [
@@ -19,19 +19,21 @@ export function listVoiceCommands() {
19
19
  method: 'POST',
20
20
  apiPath: AUDIO_VOICES_PATH,
21
21
  options: [
22
- option('--file', 'file', false, '本地音频文件;与 --file-transfer、--audio-url 三选一。'),
22
+ option('--file', 'file', false, '本地音频文件;通过 multipart 直接提交给 YuanFlow API,与 --file-transfer、--audio-url 三选一。'),
23
23
  option('--file-transfer', 'fileTransfer', false, '本地音频文件;先通过 YuanFlow 文件中转生成临时 URL,再创建声音克隆。'),
24
24
  option('--audio-url', 'audioUrl', false, '公网可访问音频 URL;与 --file、--file-transfer 三选一。'),
25
25
  option('--name', 'name', false, '声音克隆展示名。'),
26
26
  option('--preferred-name', 'preferredName', false, '偏好音色名,默认跟随 --name。'),
27
- option('--text', 'text', false, '参考音频对应文本,可选。'),
28
- option('--language', 'language', false, '语言代码,可选。'),
27
+ option('--target-model', 'targetModel', false, `后续合成模型,默认 ${MODEL_VOICE_REPLICATE}。`),
28
+ option('--language-hints', 'languageHints', false, '逗号分隔的样本音频语种提示,例如 zh。'),
29
+ option('--language', 'language', false, '兼容别名;会映射为 language_hints。'),
29
30
  option('--activate', 'activate', false, '创建后设为当前默认音色。'),
30
31
  ...commonOptions(),
31
32
  ],
32
33
  requestBody: {
33
34
  model: MODEL_VOICE_CLONE,
34
- audio: '<本地音频 data URI,或通过 audio_url 传入 YuanFlow 文件中转 URL>',
35
+ target_model: MODEL_VOICE_REPLICATE,
36
+ file: '<multipart 本地音频,或通过 audio_url 传入公网音频 URL>',
35
37
  },
36
38
  returns: '返回 voice_xxx 音色对象;后续 voice replicate 可通过 --voice voice_xxx 复刻声音。',
37
39
  }),
@@ -69,8 +71,16 @@ export function listVoiceCommands() {
69
71
  option('--voice', 'voice', true, '声音克隆 ID:voice_xxx;也可传 default 使用已激活默认音色。'),
70
72
  option('--output', 'output', true, '音频保存路径;dry-run 时可不传。'),
71
73
  option('--response-format', 'responseFormat', false, 'mp3、wav、pcm 等,默认 mp3。'),
72
- option('--speed', 'speed', false, '语速控制。'),
74
+ option('--rate', 'rate', false, '语速控制,会写入 metadata.rate。'),
75
+ option('--speed', 'speed', false, '兼容别名;会写入 metadata.rate。'),
76
+ option('--volume', 'volume', false, '音量控制,会写入 metadata.volume。'),
77
+ option('--pitch', 'pitch', false, '音调控制,会写入 metadata.pitch。'),
73
78
  option('--sample-rate', 'sampleRate', false, '采样率。'),
79
+ option('--language', 'language', false, '目标合成语言,例如 zh。'),
80
+ option('--language-hints', 'languageHints', false, '逗号分隔的目标合成语种提示,例如 zh。'),
81
+ option('--enable-ssml', 'enableSsml', false, '是否开启 SSML。'),
82
+ option('--word-timestamp-enabled', 'wordTimestampEnabled', false, '是否开启字级时间戳。'),
83
+ option('--seed', 'seed', false, '随机种子。'),
74
84
  option('--metadata', 'metadata', false, '透传给 YuanFlow API 的 metadata JSON。'),
75
85
  ...commonOptions(),
76
86
  ],
@@ -112,8 +122,10 @@ export async function runVoiceCommand({ action = 'help', options }) {
112
122
  }
113
123
 
114
124
  async function cloneVoice(options) {
115
- const body = await buildVoiceCloneBody(options);
116
- const response = await callJson(AUDIO_VOICES_PATH, options, body);
125
+ const { body, filePath } = await buildVoiceClonePayload(options);
126
+ const response = filePath
127
+ ? await callMultipartJson(AUDIO_VOICES_PATH, options, body, filePath)
128
+ : await callJson(AUDIO_VOICES_PATH, options, body);
117
129
  return result('voice clone', AUDIO_VOICES_PATH, body, response, { kind: 'voice-clone' });
118
130
  }
119
131
 
@@ -138,9 +150,9 @@ async function replicateVoice(options) {
138
150
  return result('voice replicate', AUDIO_SPEECH_PATH, body, response, { kind: 'voice-replicate' });
139
151
  }
140
152
 
141
- async function buildVoiceCloneBody(options) {
153
+ async function buildVoiceClonePayload(options) {
142
154
  if (options.json) {
143
- return JSON.parse(options.json);
155
+ return { body: JSON.parse(options.json) };
144
156
  }
145
157
  const filePath = cleanOptional(options.file);
146
158
  const fileTransferPath = cleanOptional(options.named?.['file-transfer']);
@@ -155,20 +167,24 @@ async function buildVoiceCloneBody(options) {
155
167
 
156
168
  const body = {
157
169
  model: MODEL_VOICE_CLONE,
170
+ target_model: cleanOptional(options.named?.['target-model']) || MODEL_VOICE_REPLICATE,
158
171
  ...optionalField('name', options.named?.name),
159
172
  ...optionalField('preferred_name', options.named?.['preferred-name']),
160
- ...optionalField('text', options.named?.text),
161
- ...optionalField('language', options.named?.language),
162
173
  ...optionalBooleanField('activate', options.named?.activate),
163
174
  };
175
+ const languageHints = splitList(options.named?.['language-hints'] || options.named?.language);
176
+ if (languageHints.length > 0) {
177
+ body.language_hints = languageHints;
178
+ }
164
179
  if (audioUrl) {
165
180
  body.audio_url = audioUrl;
166
181
  } else if (fileTransferPath) {
167
182
  body.audio_url = await resolveYuanFlowAudioFile(fileTransferPath, options);
168
183
  } else {
169
- body.audio = options.dryRun ? '<data URI omitted in dry-run>' : await fileToDataUri(filePath);
184
+ body.file = '<file omitted>';
185
+ return { body, filePath };
170
186
  }
171
- return body;
187
+ return { body };
172
188
  }
173
189
 
174
190
  function buildVoiceReplicateBody(options) {
@@ -187,9 +203,19 @@ function buildVoiceReplicateBody(options) {
187
203
  response_format: cleanOptional(options.named?.['response-format']) || 'mp3',
188
204
  ...optionalField('instructions', options.named?.instructions),
189
205
  };
190
- addNumber(body, 'speed', options.named?.speed);
191
206
  const metadata = parseJsonObject(options.named?.metadata);
192
207
  addNumber(metadata, 'sample_rate', options.named?.['sample-rate']);
208
+ addNumber(metadata, 'volume', options.named?.volume);
209
+ addNumber(metadata, 'rate', options.named?.rate || options.named?.speed);
210
+ addNumber(metadata, 'pitch', options.named?.pitch);
211
+ addString(metadata, 'language', options.named?.language);
212
+ addNumber(metadata, 'seed', options.named?.seed);
213
+ addBoolean(metadata, 'enable_ssml', options.named?.['enable-ssml']);
214
+ addBoolean(metadata, 'word_timestamp_enabled', options.named?.['word-timestamp-enabled']);
215
+ const hints = splitList(options.named?.['language-hints']);
216
+ if (hints.length > 0) {
217
+ metadata.language_hints = hints;
218
+ }
193
219
  if (Object.keys(metadata).length > 0) {
194
220
  body.metadata = metadata;
195
221
  }
@@ -236,6 +262,28 @@ async function callJson(apiPath, options, body) {
236
262
  return readJsonResponse(response);
237
263
  }
238
264
 
265
+ async function callMultipartJson(apiPath, options, body, filePath) {
266
+ const request = await buildRequest(apiPath, options, 'POST', body);
267
+ if (request.dryRun) {
268
+ return request;
269
+ }
270
+ const form = new FormData();
271
+ for (const [key, value] of Object.entries(body || {})) {
272
+ if (key === 'file') {
273
+ continue;
274
+ }
275
+ form.set(key, Array.isArray(value) ? value.join(',') : String(value));
276
+ }
277
+ const file = new Blob([await readFile(filePath)], { type: inferAudioMimeType(filePath) });
278
+ form.set('file', file, path.basename(filePath));
279
+ const response = await fetch(request.url, {
280
+ method: 'POST',
281
+ headers: request.headers,
282
+ body: form,
283
+ });
284
+ return readJsonResponse(response);
285
+ }
286
+
239
287
  async function callGetJson(apiPath, options) {
240
288
  const request = await buildRequest(apiPath, options, 'GET');
241
289
  if (request.dryRun) {
@@ -364,11 +412,6 @@ function option(flag, name, required, label) {
364
412
  return { flag, name, required, label };
365
413
  }
366
414
 
367
- async function fileToDataUri(filePath) {
368
- const data = await readFile(filePath);
369
- return `data:${inferAudioMimeType(filePath)};base64,${data.toString('base64')}`;
370
- }
371
-
372
415
  function inferAudioMimeType(filePath) {
373
416
  switch (path.extname(filePath).toLowerCase()) {
374
417
  case '.mp3':
@@ -418,6 +461,20 @@ function addNumber(target, name, value) {
418
461
  }
419
462
  }
420
463
 
464
+ function addString(target, name, value) {
465
+ const cleaned = cleanOptional(value);
466
+ if (cleaned !== undefined) {
467
+ target[name] = cleaned;
468
+ }
469
+ }
470
+
471
+ function addBoolean(target, name, value) {
472
+ const parsed = parseBoolean(value);
473
+ if (parsed !== undefined) {
474
+ target[name] = parsed;
475
+ }
476
+ }
477
+
421
478
  function parseBoolean(value) {
422
479
  const cleaned = cleanOptional(value);
423
480
  if (cleaned === undefined) {
@@ -429,6 +486,20 @@ function parseBoolean(value) {
429
486
  return ['1', 'true', 'yes', 'on'].includes(String(cleaned).toLowerCase());
430
487
  }
431
488
 
489
+ function splitList(value) {
490
+ const cleaned = cleanOptional(value);
491
+ if (!cleaned) {
492
+ return [];
493
+ }
494
+ if (Array.isArray(cleaned)) {
495
+ return cleaned.map((item) => String(item).trim()).filter(Boolean);
496
+ }
497
+ return String(cleaned)
498
+ .split(',')
499
+ .map((item) => item.trim())
500
+ .filter(Boolean);
501
+ }
502
+
432
503
  function cleanOptional(value) {
433
504
  if (value === undefined || value === null) return undefined;
434
505
  if (typeof value === 'string') {