skillfree 0.1.29 → 0.1.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- const { post, postStream, get, request, getApiKey, BASE_URL, checkCredits } = require('../lib/client')
1
+ const { post, postStream, request, getApiKey, BASE_URL, checkCredits } = require('../lib/client')
2
2
  const fs = require('fs')
3
3
  const path = require('path')
4
4
 
@@ -10,35 +10,23 @@ async function downloadAndSave(url, output) {
10
10
  console.log(`✅ 已保存到 ${output}`)
11
11
  }
12
12
 
13
- // ─── 工具:封装 PCM 为 WAV ─────────────────────────────────────────────────────
14
- function pcmToWav(pcmBytes, sampleRate = 24000, channels = 1, bitsPerSample = 16) {
15
- const dataSize = pcmBytes.length
16
- const wav = Buffer.alloc(44 + dataSize)
17
- wav.write('RIFF', 0); wav.writeUInt32LE(36 + dataSize, 4); wav.write('WAVE', 8)
18
- wav.write('fmt ', 12); wav.writeUInt32LE(16, 16); wav.writeUInt16LE(1, 20)
19
- wav.writeUInt16LE(channels, 22); wav.writeUInt32LE(sampleRate, 24)
20
- wav.writeUInt32LE(sampleRate * channels * bitsPerSample / 8, 28)
21
- wav.writeUInt16LE(channels * bitsPerSample / 8, 32); wav.writeUInt16LE(bitsPerSample, 34)
22
- wav.write('data', 36); wav.writeUInt32LE(dataSize, 40)
23
- pcmBytes.copy(wav, 44)
24
- return wav
25
- }
26
-
27
13
  async function pilot(flags) {
28
- const type = flags.type || 'chat'
14
+ const type = flags.type || 'chat'
29
15
  const prompt = flags.prompt || flags.text || ''
30
16
  const output = flags.output || null
31
- const model = flags.model || null
17
+ const model = flags.model || null
32
18
 
33
- // ── 积分预检(< 100 积分时拦截并提示充值)────────────────────────────────────
19
+ // ── 余额预检(< 100 积分时拦截并提示充值)────────────────────────────────────
34
20
  await checkCredits()
35
21
 
36
22
  // ── CHAT ─────────────────────────────────────────────────────────────────────
37
23
  if (type === 'chat') {
24
+ const chatModel = model || 'claude-sonnet-4-6'
25
+
38
26
  if (!output) {
39
27
  // 流式输出
40
28
  const res = await postStream('/chat/completions', {
41
- model: model || 'DeepSeek-V3.2-Fast',
29
+ model: chatModel,
42
30
  messages: [{ role: 'user', content: prompt }],
43
31
  stream: true,
44
32
  })
@@ -66,13 +54,14 @@ async function pilot(flags) {
66
54
  }
67
55
  return
68
56
  }
57
+
69
58
  const result = await post('/chat/completions', {
70
- model: model || 'DeepSeek-V3.2-Fast',
59
+ model: chatModel,
71
60
  messages: [{ role: 'user', content: prompt }],
72
61
  })
73
62
  const text = result.choices?.[0]?.message?.content || JSON.stringify(result, null, 2)
74
- if (output) { fs.writeFileSync(output, text); console.log(`✅ 已保存到 ${output}`) }
75
- else console.log(text)
63
+ fs.writeFileSync(output, text)
64
+ console.log(`✅ 已保存到 ${output}`)
76
65
  return
77
66
  }
78
67
 
@@ -80,26 +69,6 @@ async function pilot(flags) {
80
69
  if (type === 'image') {
81
70
  const imageModel = model || 'gemini-3.1-flash-image-preview'
82
71
 
83
- // qwen-image-edit-plus:图片编辑,走 /v1/images/edits,multipart/form-data
84
- if (imageModel.startsWith('qwen-image')) {
85
- if (!flags.file) throw new Error('qwen-image-edit 需要 --file 指定原始图片路径')
86
- const imgBuf = fs.readFileSync(path.resolve(flags.file))
87
- const form = new FormData()
88
- form.append('model', imageModel)
89
- form.append('prompt', prompt)
90
- form.append('size', flags.size || '1024x1024')
91
- form.append('image', new Blob([imgBuf], { type: 'image/png' }), path.basename(flags.file))
92
- const res = await request('/images/edits', { method: 'POST', body: form })
93
- const data = await res.json()
94
- if (data.error) throw new Error(data.error.message || JSON.stringify(data.error))
95
- const url = data.data?.[0]?.url
96
- if (!url) throw new Error('未返回图像URL: ' + JSON.stringify(data).slice(0, 200))
97
- if (output) await downloadAndSave(url, output)
98
- else console.log('图像 URL:', url)
99
- return
100
- }
101
-
102
- // doubao-seedream-5.0-lite / 其他标准图像模型
103
72
  const res = await post('/images/generations', {
104
73
  model: imageModel,
105
74
  prompt,
@@ -107,8 +76,10 @@ async function pilot(flags) {
107
76
  size: flags.size || '1024x1024',
108
77
  })
109
78
  if (res.error) throw new Error(res.error.message || JSON.stringify(res.error))
79
+
110
80
  const url = res.data?.[0]?.url || res.data?.[0]?.b64_json
111
81
  if (!url) throw new Error('未返回图像数据: ' + JSON.stringify(res).slice(0, 200))
82
+
112
83
  if (output) {
113
84
  if (url.startsWith('http')) await downloadAndSave(url, output)
114
85
  else { fs.writeFileSync(output, Buffer.from(url, 'base64')); console.log(`✅ 已保存到 ${output}`) }
@@ -118,156 +89,48 @@ async function pilot(flags) {
118
89
  return
119
90
  }
120
91
 
121
- // ── TTS ───────────────────────────────────────────────────────────────────────
122
- if (type === 'tts') {
123
- const ttsModel = model || 'speech-2.6-hd'
124
- const text = prompt
92
+ // ── VIDEO(Veo 异步轮询)─────────────────────────────────────────────────────
93
+ if (type === 'video') {
94
+ if (!prompt) throw new Error('--prompt 是必需的(视频描述)')
95
+ const videoModel = model || 'veo-3.1-fast-generate-preview'
96
+ const seconds = flags.seconds || '8'
97
+ const size = flags.size || '1920x1080'
125
98
 
126
- if (ttsModel === 'speech-2.8-hd' || ttsModel === 'minimax-clone-lastversion') {
127
- // /v1/responses,返回 hex 音频
128
- const res = await request('/responses', {
129
- method: 'POST',
130
- body: JSON.stringify({
131
- model: ttsModel,
132
- input: text,
133
- stream: false,
134
- voice_setting: {
135
- voice_id: flags.voice || 'female-shaonv',
136
- speed: 1, vol: 1, pitch: 0, emotion: 'fluent',
137
- },
138
- audio_setting: { sample_rate: 32000, bitrate: 128000, format: 'mp3', channel: 1 },
139
- output_format: 'hex',
140
- }),
141
- })
142
- const data = await res.json()
143
- if (!data.data?.audio) throw new Error(JSON.stringify(data).slice(0, 200))
144
- const audioBuf = Buffer.from(data.data.audio, 'hex')
145
- if (output) { fs.writeFileSync(output, audioBuf); console.log(`✅ 已保存到 ${output}(${audioBuf.length} bytes)`) }
146
- else console.log(`✅ TTS 成功,时长约 ${(data.extra_info?.audio_length / 1000).toFixed(1)} 秒`)
99
+ console.log(`🎬 提交 Veo 视频任务(${videoModel},${size},${seconds}s)...`)
100
+ console.log('⏳ 视频生成中,预计需要 1-3 分钟...\n')
147
101
 
148
- } else if (ttsModel === 'gemini-2.5-pro-preview-tts' || ttsModel === 'gemini-2.5-flash-preview-tts') {
149
- // Gemini TTS:走 skillfree.tech 后端 /v1beta 路由
150
- // 注意:用 fetch+BASE_URL 而非 request(),因为 request() 会自动加 /v1 前缀
151
- const apiKey = getApiKey()
152
- const res = await fetch(BASE_URL + '/v1beta/models/' + ttsModel + ':generateContent', {
153
- method: 'POST',
154
- headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
155
- body: JSON.stringify({
156
- contents: [{ parts: [{ text }] }],
157
- generationConfig: {
158
- responseModalities: ['AUDIO'],
159
- speechConfig: {
160
- voiceConfig: { prebuiltVoiceConfig: { voiceName: flags.voice || 'Kore' } }
161
- }
162
- }
163
- }),
164
- })
165
- const data = await res.json()
166
- if (data.error) throw new Error(data.error.message || JSON.stringify(data.error))
167
- const pcm = Buffer.from(data.candidates[0].content.parts[0].inlineData.data, 'base64')
168
- const ext = (output || '').endsWith('.mp3') ? 'mp3' : 'wav'
169
- const finalBuf = ext === 'wav' ? pcmToWav(pcm) : pcm
170
- if (output) { fs.writeFileSync(output, finalBuf); console.log(`✅ 已保存到 ${output}(${finalBuf.length} bytes)`) }
171
- else console.log(`✅ Gemini TTS 成功,时长约 ${(pcm.length / (24000 * 2)).toFixed(1)} 秒`)
102
+ const apiKey = getApiKey()
103
+ const res = await fetch(`${BASE_URL}/v1/video/veo`, {
104
+ method: 'POST',
105
+ headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
106
+ body: JSON.stringify({ model: videoModel, input: prompt, seconds, size }),
107
+ })
172
108
 
173
- } else {
174
- // 标准 OpenAI 兼容(speech-2.6-hd 等)
175
- const res = await request('/audio/speech', {
176
- method: 'POST',
177
- body: JSON.stringify({
178
- model: ttsModel,
179
- input: text,
180
- voice: flags.voice || 'female-shaonv',
181
- }),
182
- })
183
- if (res.status !== 200) {
184
- const err = await res.json()
185
- throw new Error(err.error?.message || JSON.stringify(err))
186
- }
187
- const buf = Buffer.from(await res.arrayBuffer())
188
- if (output) { fs.writeFileSync(output, buf); console.log(`✅ 已保存到 ${output}(${buf.length} bytes)`) }
189
- else console.log(`✅ TTS 成功,${buf.length} bytes`)
109
+ if (!res.ok) {
110
+ const err = await res.json().catch(() => ({}))
111
+ throw new Error(err.error || `HTTP ${res.status}`)
190
112
  }
191
- return
192
- }
193
-
194
- // ── MUSIC ─────────────────────────────────────────────────────────────────────
195
- if (type === 'music') {
196
- const musicModel = model || 'chirp-v5'
197
-
198
- if (musicModel === 'chirp-v5') {
199
- // Suno 异步接口(路由在 /suno/,不在 /v1/ 下,需用 BASE_URL 直接请求)
200
- const apiKey = getApiKey()
201
- const res = await fetch(`${BASE_URL}/suno/submit/music`, {
202
- method: 'POST',
203
- headers: { 'Content-Type': 'application/json', 'Accept': 'application/json', 'Authorization': `Bearer ${apiKey}` },
204
- body: JSON.stringify({
205
- gpt_description_prompt: prompt,
206
- make_instrumental: true,
207
- mv: 'chirp-v5',
208
- notify_hook: '',
209
- }),
210
- })
211
- const submitData = await res.json()
212
- if (submitData.code !== 'success') throw new Error(submitData.message || JSON.stringify(submitData))
213
- const taskId = submitData.data
214
- console.log(`🎵 Suno 任务已提交,task_id: ${taskId},等待生成(约 60-90 秒)...`)
215
113
 
216
- // 轮询结果
217
- for (let i = 0; i < 15; i++) {
218
- await new Promise(r => setTimeout(r, 10000))
219
- const poll = await fetch(`${BASE_URL}/suno/fetch/${taskId}`, {
220
- headers: { 'Authorization': `Bearer ${apiKey}` }
221
- })
222
- const result = await poll.json()
223
- if (result.code !== 'success') throw new Error(result.message || JSON.stringify(result))
224
- const status = result.data?.status
225
- const progress = result.data?.progress || '0%'
226
- process.stdout.write(`\r状态: ${status} 进度: ${progress} `)
227
- if (status === 'SUCCESS') {
228
- const songs = result.data?.data || []
229
- process.stdout.write('\n')
230
- console.log(`✅ 生成成功!共 ${songs.length} 首`)
231
- songs.forEach((s, i) => {
232
- console.log(` 歌曲${i+1}: ${s.title} (${Math.floor(s.duration/60)}m${Math.floor(s.duration%60)}s)`)
233
- console.log(` 音频: ${s.audio_url}`)
234
- })
235
- // 如果指定 output,下载第一首
236
- if (output && songs[0]?.audio_url) await downloadAndSave(songs[0].audio_url, output)
237
- return
238
- }
239
- if (status === 'FAILED') throw new Error('Suno 任务失败: ' + JSON.stringify(result.data))
240
- }
241
- throw new Error(`Suno 生成超时(150s),任务仍在后台运行\n📋 稍后手动查询:skillfree task suno:${taskId}`)
114
+ const data = await res.json()
242
115
 
116
+ // 服务端已轮询完成,返回 video_base64 或 url
117
+ if (data.video_base64) {
118
+ const videoBytes = Buffer.from(data.video_base64, 'base64')
119
+ const savePath = output || `./veo_${Date.now()}.mp4`
120
+ fs.writeFileSync(savePath, videoBytes)
121
+ console.log(`✅ 视频已保存到 ${savePath}(${(videoBytes.length / 1024 / 1024).toFixed(1)} MB)`)
122
+ } else if (data.url) {
123
+ if (output) await downloadAndSave(data.url, output)
124
+ else console.log('🔗 视频链接:', data.url)
243
125
  } else {
244
- // music-2.5(MiniMax),走 /v1/responses,需要 lyrics
245
- const res = await request('/responses', {
246
- method: 'POST',
247
- body: JSON.stringify({
248
- model: 'music-2.5',
249
- input: prompt,
250
- lyrics: flags.lyrics || `[verse]\n${prompt}`,
251
- audio_setting: { sample_rate: 44100, bitrate: 256000, format: 'mp3' },
252
- output_format: 'url',
253
- stream: false,
254
- }),
255
- })
256
- const data = await res.json()
257
- if (data.error) throw new Error(data.error.message || JSON.stringify(data.error))
258
- const audioUrl = data.output?.[0]?.content?.[0]?.audio
259
- if (!audioUrl) throw new Error('未返回音频: ' + JSON.stringify(data).slice(0, 200))
260
- const duration = ((data.extra_info?.music_duration || 0) / 1000).toFixed(1)
261
- console.log(`✅ music-2.5 生成成功!时长约 ${duration} 秒`)
262
- console.log('音频 URL:', audioUrl)
263
- if (output) await downloadAndSave(audioUrl, output)
126
+ throw new Error('未返回视频数据: ' + JSON.stringify(data).slice(0, 200))
264
127
  }
128
+ return
265
129
  }
266
130
 
267
131
  // ── OCR ───────────────────────────────────────────────────────────────────────
268
132
  if (type === 'ocr') {
269
133
  let input = prompt
270
- // 如果是本地文件路径,读取为 base64
271
134
  if (flags.file && fs.existsSync(path.resolve(flags.file))) {
272
135
  input = fs.readFileSync(path.resolve(flags.file)).toString('base64')
273
136
  }
@@ -293,147 +156,7 @@ async function pilot(flags) {
293
156
  return
294
157
  }
295
158
 
296
- // ── STT ───────────────────────────────────────────────────────────────────────
297
- if (type === 'stt') {
298
- if (!flags.file) throw new Error('--file 是必需的(音频文件路径)')
299
- const audioBase64 = fs.readFileSync(path.resolve(flags.file)).toString('base64')
300
- const result = await post('/v1/audio/transcriptions', {
301
- model: 'whisper-1',
302
- file: audioBase64,
303
- filename: path.basename(flags.file),
304
- })
305
- const text = result.text || JSON.stringify(result, null, 2)
306
- if (output) { fs.writeFileSync(output, text); console.log(`✅ 已保存到 ${output}`) }
307
- else console.log(text)
308
- return
309
- }
310
-
311
- // ── EMBEDDING ─────────────────────────────────────────────────────────────────
312
- if (type === 'embedding') {
313
- // doubao-embedding-vision:走 /v1/responses(多模态),input 为数组
314
- // 简单文本 embedding 也可用
315
- const inputData = flags.file
316
- ? [{ type: 'image_url', image_url: { url: 'data:image/png;base64,' + fs.readFileSync(path.resolve(flags.file)).toString('base64') } }]
317
- : [{ type: 'text', text: prompt }]
318
- const res = await request('/embeddings', {
319
- method: 'POST',
320
- body: JSON.stringify({
321
- model: model || 'doubao-embedding-vision-251215',
322
- input: inputData,
323
- encoding_format: 'float',
324
- dimensions: 1024,
325
- sparse_embedding: { type: 'disabled' },
326
- }),
327
- })
328
- const data = await res.json()
329
- if (data.error) throw new Error(data.error.message || JSON.stringify(data.error))
330
- const embedding = data.data?.[0]?.embedding
331
- if (!embedding) throw new Error('未返回向量: ' + JSON.stringify(data).slice(0, 200))
332
- console.log(`✅ Embedding 成功!维度: ${embedding.length}`)
333
- if (output) {
334
- fs.writeFileSync(output, JSON.stringify(embedding))
335
- console.log(`已保存到 ${output}`)
336
- } else {
337
- console.log('前5维:', embedding.slice(0, 5))
338
- }
339
- return
340
- }
341
-
342
- // ── VIDEO ─────────────────────────────────────────────────────────────────────
343
- if (type === 'video') {
344
- if (!prompt) throw new Error('--prompt 是必需的(视频描述)')
345
- const videoModel = model || 'kling-v2-6-text2video'
346
-
347
- // 1. 提交任务
348
- process.stdout.write(`🎬 提交视频任务(${videoModel})...`)
349
- const res = await request('/video/generations', {
350
- method: 'POST',
351
- body: JSON.stringify({ model: videoModel, prompt }),
352
- })
353
- const submitData = await res.json()
354
- if (!submitData.task_id) throw new Error('提交失败: ' + JSON.stringify(submitData).slice(0, 200))
355
-
356
- const taskId = submitData.task_id
357
- console.log(` ✅\n📋 task_id: ${taskId}`)
358
- console.log('⏳ 视频生成中,预计需要 1-3 分钟...\n')
359
-
360
- // 2. 轮询状态(最多等 5 分钟)
361
- const maxWait = 300000
362
- const interval = 5000
363
- const start = Date.now()
364
-
365
- while (Date.now() - start < maxWait) {
366
- await new Promise(r => setTimeout(r, interval))
367
- const pollRes = await request(`/tasks/${taskId}`, { method: 'GET' })
368
- const task = await pollRes.json()
369
- const status = task.status
370
-
371
- process.stdout.write(`\r状态: ${status} 已等待: ${Math.round((Date.now() - start) / 1000)}s `)
372
-
373
- if (status === 'completed') {
374
- console.log('\n')
375
- const videoUrl = task.result_url || task.output_url
376
- if (!videoUrl) throw new Error('任务完成但未返回视频 URL: ' + JSON.stringify(task))
377
-
378
- if (output) {
379
- await downloadAndSave(videoUrl, output)
380
- } else {
381
- console.log(`✅ 视频生成完成!`)
382
- console.log(`🔗 下载链接:${videoUrl}`)
383
- console.log(`\n💡 加 --output ./video.mp4 可自动下载到本地`)
384
- }
385
- return
386
- }
387
-
388
- if (status === 'failed') {
389
- throw new Error('视频生成失败: ' + (task.error || JSON.stringify(task)))
390
- }
391
- }
392
-
393
- // 超时,给出手动查询命令
394
- console.log(`\n⏰ 等待超时(5分钟),任务仍在后台运行`)
395
- console.log(`📋 稍后可手动查询进度:`)
396
- console.log(` skillfree task ${taskId}`)
397
- return
398
- }
399
-
400
- // ── SEARCH ────────────────────────────────────────────────────────────────────
401
- if (type === 'search') {
402
- if (!prompt) throw new Error('--prompt 是必需的(搜索词)')
403
- const searchModel = model || 'tavily-search'
404
- const res = await request('/search', {
405
- method: 'POST',
406
- body: JSON.stringify({
407
- model: searchModel,
408
- query: prompt,
409
- include_answer: true,
410
- max_results: flags.maxResults || 5,
411
- }),
412
- })
413
- const data = await res.json()
414
- if (data.error) throw new Error(data.error.message || JSON.stringify(data.error))
415
-
416
- // 输出格式化结果
417
- if (data.answer) {
418
- console.log('\n📝 摘要答案:')
419
- console.log(data.answer)
420
- }
421
- if (data.results?.length) {
422
- console.log(`\n🔗 搜索结果(共 ${data.results.length} 条):`)
423
- data.results.forEach((r, i) => {
424
- console.log(`\n${i+1}. ${r.title}`)
425
- console.log(` ${r.url}`)
426
- if (r.content) console.log(` ${r.content.slice(0, 150)}...`)
427
- })
428
- }
429
- if (output) {
430
- fs.writeFileSync(output, JSON.stringify(data, null, 2))
431
- console.log(`\n✅ 完整结果已保存到 ${output}`)
432
- }
433
- return
434
- }
435
-
436
- throw new Error(`不支持的类型: ${type},可选: chat | image | tts | stt | music | ocr | video | embedding | search`)
159
+ throw new Error(`不支持的类型: ${type}\n可选: chat | image | video | ocr`)
437
160
  }
438
161
 
439
162
  module.exports = { pilot }
@@ -1,97 +1,21 @@
1
- const { run } = require('./run')
2
-
3
1
  /**
4
- * Video generation command
5
- * @param {object} params - Video parameters
6
- * @param {string} params.model - Model in "vendor/model" format
7
- * @param {string} params.prompt - Video generation prompt
8
- * @param {string} [params.output] - Output file path
9
- * @returns {Promise<object>} Video generation result
2
+ * Video generation via Veo(异步,服务端轮询完成后返回)
3
+ * @param {object} params
4
+ * @param {string} params.model - veo-3.1-fast-generate-preview | veo-3.1-generate-preview
5
+ * @param {string} params.prompt - 视频描述文本
6
+ * @param {string} [params.seconds] - 时长:4 / 6 / 8(默认 8)
7
+ * @param {string} [params.size] - 分辨率:1280x720 / 1920x1080 / 3840x2160(默认 1920x1080)
8
+ * @param {string} [params.output] - 本地保存路径(.mp4)
10
9
  */
11
10
  async function video(params) {
12
- if (!params.prompt) {
13
- throw new Error('--prompt is required for video generation')
14
- }
11
+ if (!params.prompt) throw new Error('--prompt is required for video generation')
15
12
 
16
- const [vendor] = params.model.split('/')
17
- const inputs = {}
18
-
19
- if (vendor === 'vertex') {
20
- // Vertex/Veo uses instances array format
21
- inputs.instances = [{ prompt: params.prompt }]
22
- inputs.parameters = {}
23
- } else if (vendor === 'mm') {
24
- // MM video models: t2v (text-to-video), i2v (image-to-video)
25
- inputs.prompt = params.prompt
26
- if (params.size) {
27
- // Convert "1280x720" to "1280*720" if needed
28
- inputs.size = params.size.replace('x', '*')
29
- }
30
- if (params.duration) {
31
- inputs.duration = parseInt(params.duration)
32
- }
33
- if (params.image) {
34
- // i2v mode: image-to-video
35
- inputs.image_url = params.image
36
- }
37
- } else {
38
- // MiniMax and others use 'prompt'
39
- inputs.prompt = params.prompt
13
+ return {
14
+ model: params.model || 'veo-3.1-fast-generate-preview',
15
+ input: params.prompt,
16
+ seconds: params.seconds || '8',
17
+ size: params.size || '1920x1080',
40
18
  }
41
-
42
- return run({ model: params.model, inputs, output: params.output })
43
- }
44
-
45
- /**
46
- * Multimodal understanding command (video/image/audio analysis)
47
- * @param {object} params - Multimodal parameters
48
- * @param {string} params.model - Model in "vendor/model" format (e.g., mm/qwen3-vl-plus)
49
- * @param {string} params.prompt - Text prompt/question about the media
50
- * @param {string} [params.video] - Video URL to analyze
51
- * @param {string} [params.image] - Image URL to analyze
52
- * @param {string} [params.audio] - Audio URL to analyze/transcribe
53
- * @returns {Promise<object>} Multimodal analysis result
54
- */
55
- async function multimodal(params) {
56
- if (!params.prompt) {
57
- throw new Error('--prompt is required for multimodal')
58
- }
59
- if (!params.video && !params.image && !params.audio) {
60
- throw new Error('At least one of --video, --image, or --audio is required')
61
- }
62
-
63
- const [vendor] = params.model.split('/')
64
- const inputs = {}
65
-
66
- if (vendor === 'mm') {
67
- // MM multimodal models use messages format
68
- const content = []
69
- if (params.video) {
70
- content.push({ video: params.video })
71
- if (params.fps) {
72
- content[content.length - 1].fps = parseInt(params.fps)
73
- }
74
- }
75
- if (params.image) {
76
- content.push({ image: params.image })
77
- }
78
- if (params.audio) {
79
- content.push({ audio: params.audio })
80
- }
81
- content.push({ text: params.prompt })
82
-
83
- inputs.input = {
84
- messages: [{ role: 'user', content }]
85
- }
86
- } else {
87
- // Generic format
88
- inputs.prompt = params.prompt
89
- if (params.video) inputs.video_url = params.video
90
- if (params.image) inputs.image_url = params.image
91
- if (params.audio) inputs.audio_url = params.audio
92
- }
93
-
94
- return run({ model: params.model, inputs })
95
19
  }
96
20
 
97
- module.exports = { video, multimodal }
21
+ module.exports = { video }
Binary file
@@ -1,28 +0,0 @@
1
- const { run } = require('./run')
2
-
3
- /**
4
- * Music generation command
5
- * @param {object} params - Music parameters
6
- * @param {string} params.model - Model in "vendor/model" format
7
- * @param {string} params.prompt - Music generation prompt
8
- * @param {number} [params.duration] - Duration in seconds
9
- * @param {string} [params.output] - Output file path
10
- * @returns {Promise<object>} Music generation result
11
- */
12
- async function music(params) {
13
- if (!params.prompt) {
14
- throw new Error('--prompt is required for music generation')
15
- }
16
-
17
- const inputs = {
18
- prompt: params.prompt,
19
- }
20
-
21
- if (params.duration) {
22
- inputs.duration = parseInt(params.duration)
23
- }
24
-
25
- return run({ model: params.model, inputs, output: params.output })
26
- }
27
-
28
- module.exports = { music }
@@ -1,47 +0,0 @@
1
- const fs = require('fs')
2
- const path = require('path')
3
- const { run } = require('./run')
4
-
5
- /**
6
- * Speech-to-text command
7
- * @param {object} params - STT parameters
8
- * @param {string} params.file - Local audio file path
9
- * @param {string} [params.model] - Model (default: openai/whisper-1)
10
- * @param {string} [params.prompt] - Optional prompt to guide transcription style
11
- * @param {string} [params.language] - Optional language code (e.g., "en")
12
- * @param {string} [params.output] - Optional output file path for transcript
13
- * @returns {Promise<object>} STT result with transcribed text
14
- */
15
- async function stt(params) {
16
- if (!params.file) {
17
- throw new Error('--file is required for STT (local audio file path)')
18
- }
19
-
20
- const filePath = path.resolve(params.file)
21
- if (!fs.existsSync(filePath)) {
22
- throw new Error(`Audio file not found: ${filePath}`)
23
- }
24
-
25
- const audioData = fs.readFileSync(filePath).toString('base64')
26
- const filename = path.basename(filePath)
27
-
28
- const inputs = {
29
- audio_data: audioData,
30
- filename,
31
- }
32
- if (params.prompt) inputs.prompt = params.prompt
33
- if (params.language) inputs.language = params.language
34
-
35
- const model = params.model || 'openai/whisper-1'
36
- const result = await run({ model, inputs })
37
-
38
- const text = result.text || JSON.stringify(result)
39
-
40
- if (params.output) {
41
- fs.writeFileSync(params.output, text)
42
- }
43
-
44
- return { text, ...(params.output ? { saved: params.output } : {}) }
45
- }
46
-
47
- module.exports = { stt }