@doufunao123/asset-gateway 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +22 -143
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/index.ts
4
- import { Command as Command10 } from "commander";
4
+ import { Command as Command9 } from "commander";
5
5
 
6
6
  // src/commands/auth.ts
7
7
  import { existsSync as existsSync2, unlinkSync } from "fs";
@@ -415,12 +415,11 @@ var SCHEMAS = {
415
415
  "--output-dir": { type: "string", default: "." }
416
416
  }
417
417
  },
418
- audio: {
419
- description: "Generate audio from a text prompt",
418
+ sfx: {
419
+ description: "Generate sound effects (impacts, footsteps, UI sounds, ambience)",
420
420
  params: {
421
421
  "--prompt": { type: "string", required: true },
422
- "--type": { type: "string", description: "bgm | sfx" },
423
- "--duration": { type: "number", description: "Seconds" },
422
+ "--duration": { type: "number", required: true, description: "Duration in seconds (1-5s for short SFX, 5-15s for ambience, max 30s)" },
424
423
  "--output-dir": { type: "string", default: "." }
425
424
  }
426
425
  },
@@ -435,10 +434,11 @@ var SCHEMAS = {
435
434
  }
436
435
  },
437
436
  tts: {
438
- description: "Text-to-speech via MOSS-TTS-Nano (self-hosted)",
437
+ description: "Text-to-speech via Gemini 3.1 Flash TTS",
439
438
  params: {
440
439
  "--prompt": { type: "string", required: true },
441
- "--input": { type: "string", description: "Reference audio file for voice cloning (local path or URL)" },
440
+ "--voice": { type: "string", description: "Prebuilt voice name (default: Kore)" },
441
+ "--speakers": { type: "string", description: `Multi-speaker config JSON, e.g. '{"Name1":"Puck","Name2":"Kore"}'` },
442
442
  "--output-dir": { type: "string", default: "." }
443
443
  }
444
444
  },
@@ -650,22 +650,6 @@ var SCHEMAS = {
650
650
  }
651
651
  }
652
652
  },
653
- voice: {
654
- description: "Qwen3-TTS voice designs (design / list / delete)",
655
- subcommands: {
656
- design: {
657
- description: "Design voice from text",
658
- params: {
659
- "--prompt": { type: "string", required: true },
660
- "--preview-text": { type: "string", required: true },
661
- "--name": { type: "string", required: true },
662
- "--target-model": { type: "string" }
663
- }
664
- },
665
- list: { description: "List designed voices", params: { "--type": { type: "string" } } },
666
- delete: { description: "Delete designed voice", params: { "<voice-id>": { type: "string", required: true }, "--type": { type: "string" } } }
667
- }
668
- },
669
653
  upload: {
670
654
  description: "Upload and list gateway assets",
671
655
  subcommands: {
@@ -739,7 +723,7 @@ import { existsSync as existsSync3, mkdirSync as mkdirSync2, readFileSync as rea
739
723
  import { dirname as dirname2, extname, join as join2 } from "path";
740
724
  import { Command as Command3 } from "commander";
741
725
  function inferExtension(assetType) {
742
- const map = { image: "png", audio: "mp3", music: "mp3", tts: "mp3", video: "mp4", model3d: "glb", text: "txt", sprite: "png", world: "spz" };
726
+ const map = { image: "png", audio: "mp3", sfx: "mp3", music: "mp3", tts: "mp3", video: "mp4", model3d: "glb", text: "txt", sprite: "png", world: "spz" };
743
727
  return map[assetType] ?? "bin";
744
728
  }
745
729
  function inferExtFromResult(result) {
@@ -909,23 +893,23 @@ function createGenerateCommand() {
909
893
  })
910
894
  );
911
895
  command.addCommand(
912
- new Command3("audio").description("Generate audio from a text prompt").requiredOption("--prompt <text>", "Audio description prompt").option("--type <type>", "Audio type: bgm or sfx").option("--duration <seconds>", "Duration in seconds").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
896
+ new Command3("sfx").description("Generate sound effects (short audio clips: impacts, footsteps, UI sounds, ambience)").requiredOption("--prompt <text>", "Sound effect description").requiredOption("--duration <seconds>", "Duration in seconds (1-5s for short SFX, 5-15s for ambience, max 30s)").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
913
897
  try {
914
898
  const ctx = createContext(this);
899
+ const params = {
900
+ duration_seconds: Number(options.duration)
901
+ };
915
902
  const body = {
916
903
  asset_type: "audio",
917
- prompt: options.prompt
904
+ prompt: options.prompt,
905
+ params
918
906
  };
919
- const params = {};
920
- if (options.type) params.audio_type = options.type;
921
- if (options.duration) params.duration_seconds = Number(options.duration);
922
- if (Object.keys(params).length > 0) body.params = params;
923
907
  const data = await ctx.client.post("/api/generate", body);
924
908
  const localPath = await saveOutput(data, "audio", options.outputDir);
925
909
  if (localPath) data.local_path = localPath;
926
- printSuccess("generate.audio", data, ctx);
910
+ printSuccess("generate.sfx", data, ctx);
927
911
  } catch (error2) {
928
- printError("generate.audio", error2);
912
+ printError("generate.sfx", error2);
929
913
  }
930
914
  })
931
915
  );
@@ -955,23 +939,17 @@ function createGenerateCommand() {
955
939
  })
956
940
  );
957
941
  command.addCommand(
958
- new Command3("tts").description("Text-to-speech via MOSS-TTS-Nano (self-hosted)").requiredOption("--prompt <text>", "Text to synthesize").option("--input <path>", "Reference audio file for voice cloning (local path or URL)").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
942
+ new Command3("tts").description("Text-to-speech via Gemini 3.1 Flash TTS").requiredOption("--prompt <text>", "Text to synthesize").option("--voice <name>", "Prebuilt voice name (default: Kore)").option("--speakers <json>", `Multi-speaker config JSON, e.g. '{"Name1":"Puck","Name2":"Kore"}'`).option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
959
943
  try {
960
944
  const ctx = createContext(this);
961
945
  const body = {
962
946
  asset_type: "tts",
963
947
  prompt: options.prompt
964
948
  };
965
- if (options.input) {
966
- if (existsSync3(options.input)) {
967
- const ext = extname(options.input).toLowerCase();
968
- const mime = ext === ".wav" ? "audio/wav" : ext === ".mp3" ? "audio/mpeg" : "audio/wav";
969
- const b64 = readFileSync2(options.input).toString("base64");
970
- body.input_file = `data:${mime};base64,${b64}`;
971
- } else {
972
- body.input_file = options.input;
973
- }
974
- }
949
+ const params = {};
950
+ if (options.voice) params.voice = options.voice;
951
+ if (options.speakers) params.speakers = JSON.parse(options.speakers);
952
+ if (Object.keys(params).length > 0) body.params = params;
975
953
  const data = await ctx.client.post("/api/generate", body);
976
954
  const localPath = await saveOutput(data, "tts", options.outputDir);
977
955
  if (localPath) data.local_path = localPath;
@@ -1648,106 +1626,8 @@ function createUploadCommand() {
1648
1626
  return command;
1649
1627
  }
1650
1628
 
1651
- // src/commands/voice.ts
1652
- import { Command as Command9 } from "commander";
1653
- function withVoiceType(path, type) {
1654
- if (!type) {
1655
- return path;
1656
- }
1657
- const params = new URLSearchParams({ type });
1658
- return `${path}?${params.toString()}`;
1659
- }
1660
- function createVoiceCommand() {
1661
- const command = new Command9("voice").description("Manage Qwen3-TTS voice designs");
1662
- command.addCommand(
1663
- new Command9("design").description("Create a synthetic voice and save the preview audio file").requiredOption("--prompt <text>", "Voice description prompt").requiredOption("--preview-text <text>", "Preview text for the generated sample").requiredOption("--name <name>", "Name for the designed voice").option("--target-model <model>", "Voice design model, e.g. qwen3-tts-vd-2026-01-26").option("--output-dir <dir>", "Directory to save preview audio", ".").action(async function(options) {
1664
- try {
1665
- const ctx = createContext(this);
1666
- const body = {
1667
- voice_prompt: options.prompt,
1668
- preview_text: options.previewText,
1669
- name: options.name
1670
- };
1671
- if (options.targetModel) body.target_model = options.targetModel;
1672
- const data = await ctx.client.post("/api/voice/design", body);
1673
- const designData = data?.data;
1674
- const b64Audio = designData?.preview_audio_data;
1675
- if (b64Audio) {
1676
- const { mkdirSync: mkdirSync5, writeFileSync: writeFileSync5 } = await import("fs");
1677
- const { join: join5 } = await import("path");
1678
- mkdirSync5(options.outputDir, { recursive: true });
1679
- const buf = Buffer.from(b64Audio, "base64");
1680
- const outPath = join5(options.outputDir, `${options.name}_preview.wav`);
1681
- writeFileSync5(outPath, buf);
1682
- data.preview_audio_path = outPath;
1683
- }
1684
- printSuccess("voice.design", data, ctx);
1685
- } catch (error2) {
1686
- printError("voice.design", error2);
1687
- }
1688
- })
1689
- );
1690
- command.addCommand(
1691
- new Command9("synthesize").description("Synthesize speech using a designed voice").requiredOption("--voice <name>", "Voice name from voice design").requiredOption("--text <text>", "Text to synthesize").option("--model <model>", "TTS model (default: qwen3-tts-vd-realtime-2025-12-16)").option("--language <lang>", "Language: zh, en, ja, etc. (default: Auto)").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
1692
- try {
1693
- const ctx = createContext(this);
1694
- const body = {
1695
- voice: options.voice,
1696
- text: options.text
1697
- };
1698
- if (options.model) body.model = options.model;
1699
- if (options.language) body.language = options.language;
1700
- const data = await ctx.client.post("/api/voice/synthesize", body);
1701
- const result = data.data;
1702
- const output2 = result?.output;
1703
- const audio = output2?.audio;
1704
- const audioUrl = audio?.url ?? output2?.url;
1705
- if (audioUrl && options.outputDir) {
1706
- const { mkdirSync: mkdirSync5, writeFileSync: writeFileSync5 } = await import("fs");
1707
- const { join: join5 } = await import("path");
1708
- mkdirSync5(options.outputDir, { recursive: true });
1709
- const resp = await fetch(audioUrl);
1710
- if (resp.ok) {
1711
- const buf = Buffer.from(await resp.arrayBuffer());
1712
- const outPath = join5(options.outputDir, `tts_${Date.now()}.wav`);
1713
- writeFileSync5(outPath, buf);
1714
- data.local_path = outPath;
1715
- }
1716
- }
1717
- printSuccess("voice.synthesize", data, ctx);
1718
- } catch (error2) {
1719
- printError("voice.synthesize", error2);
1720
- }
1721
- })
1722
- );
1723
- command.addCommand(
1724
- new Command9("list").description("List custom designed voices").option("--type <type>", "Voice type: vd").action(async function(options) {
1725
- try {
1726
- const ctx = createContext(this);
1727
- const data = await ctx.client.get(withVoiceType("/api/voice/list", options.type));
1728
- printSuccess("voice.list", data, ctx);
1729
- } catch (error2) {
1730
- printError("voice.list", error2);
1731
- }
1732
- })
1733
- );
1734
- command.addCommand(
1735
- new Command9("delete").description("Delete a custom designed voice").argument("<voice-id>", "Voice ID to delete").option("--type <type>", "Voice type: vd").action(async function(voiceId, options) {
1736
- try {
1737
- const ctx = createContext(this);
1738
- const path = withVoiceType(`/api/voice/${encodeURIComponent(voiceId)}`, options.type);
1739
- const data = await ctx.client.delete(path);
1740
- printSuccess("voice.delete", data, ctx);
1741
- } catch (error2) {
1742
- printError("voice.delete", error2);
1743
- }
1744
- })
1745
- );
1746
- return command;
1747
- }
1748
-
1749
1629
  // src/index.ts
1750
- var program = new Command10().name("asset-gateway").description("Universal asset generation gateway CLI").version(CLI_VERSION).option(
1630
+ var program = new Command9().name("asset-gateway").description("Universal asset generation gateway CLI").version(CLI_VERSION).option(
1751
1631
  "--gateway-url <url>",
1752
1632
  `Gateway URL (default: $ASSET_GATEWAY_URL, auth config, or ${DEFAULT_GATEWAY_URL})`
1753
1633
  ).option("--token <token>", "API token for authentication").option("--human", "Human-readable output instead of JSON").option("--fields <fields>", "Comma-separated list of output fields");
@@ -1757,7 +1637,6 @@ program.addCommand(createProcessCommand());
1757
1637
  program.addCommand(createProcess3dCommand());
1758
1638
  program.addCommand(createProviderCommand());
1759
1639
  program.addCommand(createUploadCommand());
1760
- program.addCommand(createVoiceCommand());
1761
1640
  program.addCommand(createJobCommand());
1762
1641
  program.addCommand(createDescribeCommand());
1763
1642
  await program.parseAsync(process.argv);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@doufunao123/asset-gateway",
3
- "version": "0.16.0",
3
+ "version": "0.17.0",
4
4
  "description": "Universal asset generation gateway CLI",
5
5
  "type": "module",
6
6
  "bin": {