@doufunao123/asset-gateway 0.14.3 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +66 -108
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -435,15 +435,10 @@ var SCHEMAS = {
435
435
  }
436
436
  },
437
437
  tts: {
438
- description: "TTS: default Qwen3-TTS (voice/language/instructions). ElevenLabs: --provider elevenlabs --voice-id <id>",
438
+ description: "Text-to-speech via MOSS-TTS-Nano (self-hosted)",
439
439
  params: {
440
440
  "--prompt": { type: "string", required: true },
441
- "--voice": { type: "string", description: "Qwen voice name or custom id", default: "Cherry" },
442
- "--voice-id": { type: "string", description: "ElevenLabs voice id (with --provider elevenlabs)" },
443
- "--language": { type: "string", default: "Auto" },
444
- "--model": { type: "string", default: "qwen3-tts-flash" },
445
- "--instructions": { type: "string", description: "Instruct-model style control" },
446
- "--provider": { type: "string", description: "qwen_tts | elevenlabs" },
441
+ "--input": { type: "string", description: "Reference audio file for voice cloning (local path or URL)" },
447
442
  "--output-dir": { type: "string", default: "." }
448
443
  }
449
444
  },
@@ -472,19 +467,15 @@ var SCHEMAS = {
472
467
  }
473
468
  },
474
469
  sprite: {
475
- description: "Generate character animation (Veo AI video + frame extraction)",
470
+ description: "Generate character animation spritesheet (AutoSprite)",
476
471
  params: {
477
472
  "--prompt": { type: "string", required: true, description: "Character description" },
478
473
  "--input": { type: "string", description: "Reference image path or URL" },
479
- "--animation-type": { type: "string", default: "walk" },
480
- "--direction": { type: "string", default: "right" },
481
- "--view": { type: "string", default: "auto", description: "Camera view: auto, side, front, back, three-quarter, none" },
482
- "--framing": { type: "string", default: "full-body", description: "Framing: full-body, waist-up, close-up, none" },
483
- "--background": { type: "string", default: "auto", description: "Background: auto, white, none, or free text" },
484
- "--duration": { type: "number", default: 2, description: "Video duration in seconds" },
485
- "--style": { type: "string" },
486
- "--output-format": { type: "string", default: "spritesheet", description: "spritesheet | gif" },
487
- "--fps": { type: "number", default: 8, description: "GIF frame rate" },
474
+ "--animation-type": { type: "string", default: "walk", description: "walk, run, idle, jump, attack, death, cast, dance, wave, interact, or custom text" },
475
+ "--style": { type: "string", description: "Art style: 16-bit, hd-pixel, isometric, retro-8bit, anime, chibi, painterly, vector" },
476
+ "--frame-count": { type: "number", default: 8, description: "Number of animation frames" },
477
+ "--frame-size": { type: "number", default: 256, description: "Frame size in pixels (square)" },
478
+ "--is-humanoid": { type: "boolean", default: true },
488
479
  "--output-dir": { type: "string", default: "." }
489
480
  }
490
481
  },
@@ -660,16 +651,8 @@ var SCHEMAS = {
660
651
  }
661
652
  },
662
653
  voice: {
663
- description: "Qwen3-TTS custom voices (clone / design / list / delete)",
654
+ description: "Qwen3-TTS voice designs (design / list / delete)",
664
655
  subcommands: {
665
- clone: {
666
- description: "Clone from audio sample",
667
- params: {
668
- "--audio": { type: "string", required: true },
669
- "--name": { type: "string", required: true },
670
- "--target-model": { type: "string" }
671
- }
672
- },
673
656
  design: {
674
657
  description: "Design voice from text",
675
658
  params: {
@@ -679,8 +662,8 @@ var SCHEMAS = {
679
662
  "--target-model": { type: "string" }
680
663
  }
681
664
  },
682
- list: { description: "List custom voices", params: { "--type": { type: "string" } } },
683
- delete: { description: "Delete by voice id", params: { "<voice-id>": { type: "string", required: true }, "--type": { type: "string" } } }
665
+ list: { description: "List designed voices", params: { "--type": { type: "string" } } },
666
+ delete: { description: "Delete designed voice", params: { "<voice-id>": { type: "string", required: true }, "--type": { type: "string" } } }
684
667
  }
685
668
  },
686
669
  upload: {
@@ -972,28 +955,23 @@ function createGenerateCommand() {
972
955
  })
973
956
  );
974
957
  command.addCommand(
975
- new Command3("tts").description(
976
- "Text-to-speech: default Qwen3-TTS; use --provider elevenlabs --voice-id for ElevenLabs"
977
- ).requiredOption("--prompt <text>", "Text to synthesize").option("--voice <name>", "Qwen voice name or custom voice id", "Cherry").option(
978
- "--voice-id <id>",
979
- "ElevenLabs voice_id (use with --provider elevenlabs; routes to TTS API)"
980
- ).option("--language <lang>", "Language hint: Auto, Chinese, English, Japanese, etc.", "Auto").option("--model <model>", "Model id (default: auto-detect from voice; qwen3-tts-flash for built-in voices)").option("--instructions <text>", "Natural language speaking instructions (for instruct models)").option("--provider <id>", "qwen_tts | elevenlabs | voicebox").option("--profile-id <id>", "VoiceBox profile_id (use with --provider voicebox)").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
958
+ new Command3("tts").description("Text-to-speech via MOSS-TTS-Nano (self-hosted)").requiredOption("--prompt <text>", "Text to synthesize").option("--input <path>", "Reference audio file for voice cloning (local path or URL)").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
981
959
  try {
982
960
  const ctx = createContext(this);
983
- const params = {
984
- voice: options.voice,
985
- language_type: options.language
986
- };
987
- if (options.instructions) params.instructions = options.instructions;
988
- if (options.voiceId) params.voice_id = options.voiceId;
989
- if (options.profileId) params.profile_id = options.profileId;
990
961
  const body = {
991
962
  asset_type: "tts",
992
- prompt: options.prompt,
993
- params
963
+ prompt: options.prompt
994
964
  };
995
- if (options.model) body.model = options.model;
996
- if (options.provider) body.provider = options.provider;
965
+ if (options.input) {
966
+ if (existsSync3(options.input)) {
967
+ const ext = extname(options.input).toLowerCase();
968
+ const mime = ext === ".wav" ? "audio/wav" : ext === ".mp3" ? "audio/mpeg" : "audio/wav";
969
+ const b64 = readFileSync2(options.input).toString("base64");
970
+ body.input_file = `data:${mime};base64,${b64}`;
971
+ } else {
972
+ body.input_file = options.input;
973
+ }
974
+ }
997
975
  const data = await ctx.client.post("/api/generate", body);
998
976
  const localPath = await saveOutput(data, "tts", options.outputDir);
999
977
  if (localPath) data.local_path = localPath;
@@ -1052,19 +1030,15 @@ function createGenerateCommand() {
1052
1030
  })
1053
1031
  );
1054
1032
  command.addCommand(
1055
- new Command3("sprite").description("Generate character animation (Veo AI video + frame extraction)").requiredOption("--prompt <text>", "Character description").option("--input <path>", "Reference image for character consistency (local path or URL)").option("--animation-type <type>", "Animation type (idle, walk, run, attack, death, jump, cast, dance, or any custom)", "walk").option("--direction <dir>", "Facing direction: front, left, right, back", "front").option("--view <view>", "Camera view angle: auto, side, front, back, three-quarter, none", "auto").option("--framing <framing>", "Framing: full-body, waist-up, close-up, none", "full-body").option("--background <bg>", "Background: auto, white, none, or free text (e.g. 'forest clearing')", "auto").option("--duration <n>", "Video duration in seconds (1-15)", "2").option("--style <style>", "Visual style (e.g. pixel art, hand-drawn, chibi)").option("--output-format <fmt>", "Output format: spritesheet or gif", "spritesheet").option("--fps <n>", "GIF frame rate", "8").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
1033
+ new Command3("sprite").description("Generate character animation spritesheet (AutoSprite)").requiredOption("--prompt <text>", "Character description").option("--input <path>", "Reference image for character consistency (local path or URL)").option("--animation-type <type>", "Animation type: walk, run, idle, jump, attack, death, cast, dance, wave, interact, or custom text", "walk").option("--style <style>", "Art style: 16-bit, hd-pixel, isometric, retro-8bit, anime, chibi, painterly, vector, or any text").option("--frame-count <n>", "Number of animation frames", "8").option("--frame-size <n>", "Frame size in pixels (square)", "256").option("--is-humanoid", "Character is humanoid (default true)", true).option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
1056
1034
  try {
1057
1035
  const ctx = createContext(this);
1058
1036
  const params = {
1059
1037
  animation_type: options.animationType,
1060
- direction: options.direction,
1061
- duration: Number(options.duration),
1062
- output_format: options.outputFormat,
1063
- fps: Number(options.fps)
1038
+ frame_count: Number(options.frameCount),
1039
+ frame_size: Number(options.frameSize),
1040
+ is_humanoid: options.isHumanoid
1064
1041
  };
1065
- if (options.view && options.view !== "auto") params.view = options.view;
1066
- if (options.framing && options.framing !== "full-body") params.framing = options.framing;
1067
- if (options.background && options.background !== "auto") params.background = options.background;
1068
1042
  if (options.style) params.style = options.style;
1069
1043
  let inputFile = options.input;
1070
1044
  if (inputFile && existsSync3(inputFile)) {
@@ -1675,33 +1649,7 @@ function createUploadCommand() {
1675
1649
  }
1676
1650
 
1677
1651
  // src/commands/voice.ts
1678
- import { existsSync as existsSync5, readFileSync as readFileSync4 } from "fs";
1679
- import { extname as extname2 } from "path";
1680
1652
  import { Command as Command9 } from "commander";
1681
- function inferAudioMime(filePath) {
1682
- const extension = extname2(filePath).toLowerCase();
1683
- const map = {
1684
- ".mp3": "audio/mpeg",
1685
- ".wav": "audio/wav",
1686
- ".pcm": "audio/pcm",
1687
- ".opus": "audio/opus",
1688
- ".ogg": "audio/ogg",
1689
- ".m4a": "audio/mp4",
1690
- ".aac": "audio/aac",
1691
- ".flac": "audio/flac"
1692
- };
1693
- return map[extension] ?? "application/octet-stream";
1694
- }
1695
- function readAudioAsBase64(filePath) {
1696
- if (!existsSync5(filePath)) {
1697
- throw configError(`Audio file not found: ${filePath}`);
1698
- }
1699
- const bytes = readFileSync4(filePath);
1700
- return {
1701
- audio_base64: bytes.toString("base64"),
1702
- audio_mime: inferAudioMime(filePath)
1703
- };
1704
- }
1705
1653
  function withVoiceType(path, type) {
1706
1654
  if (!type) {
1707
1655
  return path;
@@ -1710,26 +1658,9 @@ function withVoiceType(path, type) {
1710
1658
  return `${path}?${params.toString()}`;
1711
1659
  }
1712
1660
  function createVoiceCommand() {
1713
- const command = new Command9("voice").description("Manage Qwen3-TTS custom voices");
1661
+ const command = new Command9("voice").description("Manage Qwen3-TTS voice designs");
1714
1662
  command.addCommand(
1715
- new Command9("clone").description("Clone a voice from an audio sample").requiredOption("--audio <path>", "Reference audio file path").requiredOption("--name <name>", "Name for the cloned voice").option("--target-model <model>", "Voice cloning model, e.g. qwen3-tts-vc-2026-01-22").action(async function(options) {
1716
- try {
1717
- const ctx = createContext(this);
1718
- const audio = readAudioAsBase64(options.audio);
1719
- const body = {
1720
- ...audio,
1721
- name: options.name
1722
- };
1723
- if (options.targetModel) body.target_model = options.targetModel;
1724
- const data = await ctx.client.post("/api/voice/clone", body);
1725
- printSuccess("voice.clone", data, ctx);
1726
- } catch (error2) {
1727
- printError("voice.clone", error2);
1728
- }
1729
- })
1730
- );
1731
- command.addCommand(
1732
- new Command9("design").description("Create a synthetic voice from a text description").requiredOption("--prompt <text>", "Voice description prompt").requiredOption("--preview-text <text>", "Preview text for the generated sample").requiredOption("--name <name>", "Name for the designed voice").option("--target-model <model>", "Voice design model, e.g. qwen3-tts-vd-2026-01-26").action(async function(options) {
1663
+ new Command9("design").description("Create a synthetic voice and save the preview audio file").requiredOption("--prompt <text>", "Voice description prompt").requiredOption("--preview-text <text>", "Preview text for the generated sample").requiredOption("--name <name>", "Name for the designed voice").option("--target-model <model>", "Voice design model, e.g. qwen3-tts-vd-2026-01-26").option("--output-dir <dir>", "Directory to save preview audio", ".").action(async function(options) {
1733
1664
  try {
1734
1665
  const ctx = createContext(this);
1735
1666
  const body = {
@@ -1739,6 +1670,17 @@ function createVoiceCommand() {
1739
1670
  };
1740
1671
  if (options.targetModel) body.target_model = options.targetModel;
1741
1672
  const data = await ctx.client.post("/api/voice/design", body);
1673
+ const designData = data?.data;
1674
+ const b64Audio = designData?.preview_audio_data;
1675
+ if (b64Audio) {
1676
+ const { mkdirSync: mkdirSync5, writeFileSync: writeFileSync5 } = await import("fs");
1677
+ const { join: join5 } = await import("path");
1678
+ mkdirSync5(options.outputDir, { recursive: true });
1679
+ const buf = Buffer.from(b64Audio, "base64");
1680
+ const outPath = join5(options.outputDir, `${options.name}_preview.wav`);
1681
+ writeFileSync5(outPath, buf);
1682
+ data.preview_audio_path = outPath;
1683
+ }
1742
1684
  printSuccess("voice.design", data, ctx);
1743
1685
  } catch (error2) {
1744
1686
  printError("voice.design", error2);
@@ -1746,24 +1688,40 @@ function createVoiceCommand() {
1746
1688
  })
1747
1689
  );
1748
1690
  command.addCommand(
1749
- new Command9("create-custom").description("Design a custom voice and register it for self-hosted TTS (DashScope design \u2192 VoiceBox clone)").requiredOption("--prompt <text>", "Voice description (e.g. '\u5E74\u8F7B\u5973\u6027\uFF0C\u6E29\u6696\u4EB2\u5207\uFF0C\u6807\u51C6\u666E\u901A\u8BDD')").requiredOption("--preview-text <text>", "Sample text for voice preview").requiredOption("--name <name>", "Name for the custom voice").option("--language <lang>", "Language: zh, en, ja, ko, etc.", "zh").action(async function(options) {
1691
+ new Command9("synthesize").description("Synthesize speech using a designed voice").requiredOption("--voice <name>", "Voice name from voice design").requiredOption("--text <text>", "Text to synthesize").option("--model <model>", "TTS model (default: qwen3-tts-vd-realtime-2025-12-16)").option("--language <lang>", "Language: zh, en, ja, etc. (default: Auto)").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
1750
1692
  try {
1751
1693
  const ctx = createContext(this);
1752
1694
  const body = {
1753
- voice_prompt: options.prompt,
1754
- preview_text: options.previewText,
1755
- name: options.name,
1756
- language: options.language
1695
+ voice: options.voice,
1696
+ text: options.text
1757
1697
  };
1758
- const data = await ctx.client.post("/api/voice/create-custom", body);
1759
- printSuccess("voice.create_custom", data, ctx);
1698
+ if (options.model) body.model = options.model;
1699
+ if (options.language) body.language = options.language;
1700
+ const data = await ctx.client.post("/api/voice/synthesize", body);
1701
+ const result = data.data;
1702
+ const output2 = result?.output;
1703
+ const audio = output2?.audio;
1704
+ const audioUrl = audio?.url ?? output2?.url;
1705
+ if (audioUrl && options.outputDir) {
1706
+ const { mkdirSync: mkdirSync5, writeFileSync: writeFileSync5 } = await import("fs");
1707
+ const { join: join5 } = await import("path");
1708
+ mkdirSync5(options.outputDir, { recursive: true });
1709
+ const resp = await fetch(audioUrl);
1710
+ if (resp.ok) {
1711
+ const buf = Buffer.from(await resp.arrayBuffer());
1712
+ const outPath = join5(options.outputDir, `tts_${Date.now()}.wav`);
1713
+ writeFileSync5(outPath, buf);
1714
+ data.local_path = outPath;
1715
+ }
1716
+ }
1717
+ printSuccess("voice.synthesize", data, ctx);
1760
1718
  } catch (error2) {
1761
- printError("voice.create_custom", error2);
1719
+ printError("voice.synthesize", error2);
1762
1720
  }
1763
1721
  })
1764
1722
  );
1765
1723
  command.addCommand(
1766
- new Command9("list").description("List custom cloned or designed voices").option("--type <type>", "Voice type: vc or vd").action(async function(options) {
1724
+ new Command9("list").description("List custom designed voices").option("--type <type>", "Voice type: vd").action(async function(options) {
1767
1725
  try {
1768
1726
  const ctx = createContext(this);
1769
1727
  const data = await ctx.client.get(withVoiceType("/api/voice/list", options.type));
@@ -1774,7 +1732,7 @@ function createVoiceCommand() {
1774
1732
  })
1775
1733
  );
1776
1734
  command.addCommand(
1777
- new Command9("delete").description("Delete a custom cloned or designed voice").argument("<voice-id>", "Voice ID to delete").option("--type <type>", "Voice type: vc or vd").action(async function(voiceId, options) {
1735
+ new Command9("delete").description("Delete a custom designed voice").argument("<voice-id>", "Voice ID to delete").option("--type <type>", "Voice type: vd").action(async function(voiceId, options) {
1778
1736
  try {
1779
1737
  const ctx = createContext(this);
1780
1738
  const path = withVoiceType(`/api/voice/${encodeURIComponent(voiceId)}`, options.type);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@doufunao123/asset-gateway",
3
- "version": "0.14.3",
3
+ "version": "0.16.0",
4
4
  "description": "Universal asset generation gateway CLI",
5
5
  "type": "module",
6
6
  "bin": {