@doufunao123/asset-gateway 0.21.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +17 -0
  2. package/dist/index.js +151 -9
  3. package/package.json +2 -2
package/README.md CHANGED
@@ -59,6 +59,9 @@ asset-gateway generate image --prompt "a cat" --size 1024x1024
59
59
  asset-gateway generate image --prompt "icon" --transparent --provider flux
60
60
  asset-gateway generate video --prompt "ocean waves"
61
61
  asset-gateway generate sfx --prompt "epic battle impact" --duration 3
62
+ asset-gateway generate tts --prompt "(开心)今天天气真好!" --voice 冰糖
63
+ asset-gateway voice design --voice-prompt "warm narrator" --preview-text "Welcome." --name narrator --output ./narrator.wav
64
+ asset-gateway voice clone --audio ./sample.wav --preview-text "Welcome back." --name clone --output ./clone.wav
62
65
  asset-gateway generate character --prompt "medieval knight" --format fbx --pbr
63
66
  asset-gateway generate prop --prompt "ornate treasure chest" --polycount 5000
64
67
  asset-gateway generate model --image https://example.com/ref.png --ai-model latest
@@ -86,6 +89,20 @@ asset-gateway describe
86
89
  asset-gateway describe generate
87
90
  ```
88
91
 
92
+ Programmatic TTS uses the SDK directly. MiMo voices include `冰糖`, `茉莉`, `苏打`, `白桦`, `Mia`, `Chloe`, `Milo`, and `Dean`; prompts may include audio tags such as `(开心)`, `(唱歌)`, or `[whisper]`.
93
+
94
+ ```ts
95
+ await forge.tts("(开心)今天天气真好!", { voice: "冰糖" });
96
+ ```
97
+
98
+ Voice design and clone are also available from the CLI:
99
+
100
+ ```bash
101
+ asset-gateway voice design --voice-prompt "warm documentary narrator" --preview-text "Welcome to AssetForge." --name narrator --output ./narrator.wav
102
+ asset-gateway voice clone --audio ./sample.wav --preview-text "Welcome back." --name clone --output ./clone.wav
103
+ asset-gateway voice list --type vc
104
+ ```
105
+
89
106
  ## Output
90
107
 
91
108
  JSON by default. Use `--human` for readable output, `--fields` to filter:
package/dist/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/index.ts
4
- import { Command as Command10 } from "commander";
4
+ import { Command as Command11 } from "commander";
5
5
 
6
6
  // src/commands/auth.ts
7
7
  import { existsSync as existsSync2, unlinkSync } from "fs";
@@ -303,11 +303,11 @@ var SCHEMAS = {
303
303
  }
304
304
  },
305
305
  tts: {
306
- description: "Text-to-speech via Gemini 3.1 Flash TTS",
306
+ description: "Text-to-speech via MiMo v2.5 TTS",
307
307
  params: {
308
308
  "--prompt": { type: "string", required: true },
309
- "--voice": { type: "string", description: "Prebuilt voice name (default: Kore)" },
310
- "--speakers": { type: "string", description: `Multi-speaker config JSON, e.g. '{"Name1":"Puck","Name2":"Kore"}'` },
309
+ "--voice": { type: "string", description: "MiMo prebuilt voice name (default: server config, usually Mia)" },
310
+ "--context": { type: "string", description: "Natural-language style or director instruction" },
311
311
  "--output-dir": { type: "string", default: "." }
312
312
  }
313
313
  },
@@ -513,6 +513,36 @@ var SCHEMAS = {
513
513
  health: { description: "Health check", params: { "[name]": { type: "string", required: false } } }
514
514
  }
515
515
  },
516
+ voice: {
517
+ description: "Design, clone, list, and delete MiMo voices",
518
+ subcommands: {
519
+ design: {
520
+ description: "Generate a MiMo voice from a text description",
521
+ params: {
522
+ "--voice-prompt": { type: "string", required: true },
523
+ "--preview-text": { type: "string", required: true },
524
+ "--style": { type: "string" },
525
+ "--name": { type: "string" },
526
+ "--save-as": { type: "string" },
527
+ "--output": { type: "string", description: "Write preview WAV to this path" }
528
+ }
529
+ },
530
+ clone: {
531
+ description: "Clone a MiMo voice from an mp3/wav sample",
532
+ params: {
533
+ "--audio": { type: "string", required: true, description: "Voice sample mp3/wav file or data URL" },
534
+ "--preview-text": { type: "string", required: true },
535
+ "--audio-mime": { type: "string" },
536
+ "--style": { type: "string" },
537
+ "--name": { type: "string" },
538
+ "--save-as": { type: "string" },
539
+ "--output": { type: "string", description: "Write preview WAV to this path" }
540
+ }
541
+ },
542
+ list: { description: "List saved voices", params: { "--type": { type: "string", description: "vc | vd" } } },
543
+ delete: { description: "Delete a saved voice", params: { "<voice-id>": { type: "string", required: true }, "--type": { type: "string", description: "vc | vd" } } }
544
+ }
545
+ },
516
546
  job: {
517
547
  description: "Async job history",
518
548
  subcommands: {
@@ -576,7 +606,7 @@ function inferExtension(assetType) {
576
606
  audio: "mp3",
577
607
  sfx: "mp3",
578
608
  music: "mp3",
579
- tts: "mp3",
609
+ tts: "wav",
580
610
  video: "mp4",
581
611
  model3d: "glb",
582
612
  character: "glb",
@@ -599,6 +629,8 @@ function inferExtFromResult(result) {
599
629
  "image/jpeg": "jpg",
600
630
  "video/mp4": "mp4",
601
631
  "audio/mpeg": "mp3",
632
+ "audio/wav": "wav",
633
+ "audio/x-wav": "wav",
602
634
  "model/gltf-binary": "glb",
603
635
  "model/gltf+json": "gltf",
604
636
  "model/stl": "stl",
@@ -743,9 +775,12 @@ function createGenerateCommand() {
743
775
  new Command3("video").description("Generate a video from a text prompt (or image-to-video with --input)").requiredOption("--prompt <text>", "Video description prompt").option("--provider <id>", "Provider to use").option("--input <url>", "Reference image URL for image-to-video (Grok)").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
744
776
  try {
745
777
  const ctx = createContext(this);
746
- const data = await ctx.client.video(options.prompt, {
778
+ const requestOptions = {
747
779
  provider: options.provider,
748
780
  input: options.input
781
+ };
782
+ const data = await ctx.client.video(options.prompt, {
783
+ ...requestOptions
749
784
  });
750
785
  const localPath = await saveOutput(data, "video", options.outputDir);
751
786
  if (localPath) data.local_path = localPath;
@@ -841,11 +876,11 @@ function createGenerateCommand() {
841
876
  })
842
877
  );
843
878
  command.addCommand(
844
- new Command3("tts").description("Text-to-speech via Gemini 3.1 Flash TTS").requiredOption("--prompt <text>", "Text to synthesize").option("--voice <name>", "Prebuilt voice name (default: Kore)").option("--speakers <json>", `Multi-speaker config JSON, e.g. '{"Name1":"Puck","Name2":"Kore"}'`).option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
879
+ new Command3("tts").description("Text-to-speech via MiMo v2.5 TTS").requiredOption("--prompt <text>", "Text to synthesize").option("--voice <name>", "MiMo prebuilt voice name (default: server config, usually Mia)").option("--context <text>", "Natural-language style or director instruction").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
845
880
  try {
846
881
  const ctx = createContext(this);
847
882
  const params = {};
848
- if (options.speakers) params.speakers = JSON.parse(options.speakers);
883
+ if (options.context) params.context = options.context;
849
884
  const data = await ctx.client.tts(options.prompt, {
850
885
  voice: options.voice,
851
886
  params: Object.keys(params).length > 0 ? toJsonObject(params) : void 0
@@ -1505,8 +1540,114 @@ async function readLocalFile(filePath) {
1505
1540
  }
1506
1541
  }
1507
1542
 
1543
+ // src/commands/voice.ts
1544
+ import { existsSync as existsSync6, mkdirSync as mkdirSync5, readFileSync as readFileSync5, writeFileSync as writeFileSync5 } from "fs";
1545
+ import { dirname as dirname3, extname as extname2 } from "path";
1546
+ import { Command as Command10 } from "commander";
1547
+ function createVoiceCommand() {
1548
+ const command = new Command10("voice").description("Design, clone, list, and delete voices");
1549
+ command.addCommand(
1550
+ new Command10("design").description("Generate a MiMo voice from a text description").requiredOption("--voice-prompt <text>", "Voice description / style prompt").requiredOption("--preview-text <text>", "Text to synthesize for preview").option("--style <text>", "Additional director/style instruction").option("--name <name>", "Save generated voice with this name").option("--save-as <name>", "Alias for --name").option("--output <path>", "Write preview WAV to this path").action(async function(options) {
1551
+ const ctx = createContext(this);
1552
+ try {
1553
+ const request = {
1554
+ voice_prompt: options.voicePrompt,
1555
+ preview_text: options.previewText,
1556
+ style: options.style,
1557
+ name: options.name,
1558
+ save_as: options.saveAs
1559
+ };
1560
+ const data = await ctx.client.voice.design(request);
1561
+ const localPath = writeWavOutput(data, options.output);
1562
+ if (localPath) data.local_path = localPath;
1563
+ printSuccess("voice.design", data, ctx);
1564
+ } catch (error2) {
1565
+ printError("voice.design", error2, ctx.human);
1566
+ }
1567
+ })
1568
+ );
1569
+ command.addCommand(
1570
+ new Command10("clone").description("Clone a MiMo voice from an mp3/wav sample").requiredOption("--audio <path-or-data-url>", "Voice sample mp3/wav file or data URL").requiredOption("--preview-text <text>", "Text to synthesize for preview").option("--audio-mime <mime>", "Audio MIME type when --audio is raw base64").option("--style <text>", "Director/style instruction").option("--name <name>", "Save cloned voice with this name").option("--save-as <name>", "Alias for --name").option("--output <path>", "Write preview WAV to this path").action(async function(options) {
1571
+ const ctx = createContext(this);
1572
+ try {
1573
+ const sample = readVoiceSample(options.audio, options.audioMime);
1574
+ const request = {
1575
+ ...sample,
1576
+ preview_text: options.previewText,
1577
+ style: options.style,
1578
+ name: options.name,
1579
+ save_as: options.saveAs
1580
+ };
1581
+ const data = await ctx.client.voice.clone(request);
1582
+ const localPath = writeWavOutput(data, options.output);
1583
+ if (localPath) data.local_path = localPath;
1584
+ printSuccess("voice.clone", data, ctx);
1585
+ } catch (error2) {
1586
+ printError("voice.clone", error2, ctx.human);
1587
+ }
1588
+ })
1589
+ );
1590
+ command.addCommand(
1591
+ new Command10("list").description("List saved voices").option("--type <type>", "Voice type: vc or vd").action(async function(options) {
1592
+ const ctx = createContext(this);
1593
+ try {
1594
+ const data = await ctx.client.voice.list({ type: options.type });
1595
+ printSuccess("voice.list", data, ctx);
1596
+ } catch (error2) {
1597
+ printError("voice.list", error2, ctx.human);
1598
+ }
1599
+ })
1600
+ );
1601
+ command.addCommand(
1602
+ new Command10("delete").description("Delete a saved voice").argument("<voice-id>", "Voice ID").option("--type <type>", "Voice type: vc or vd").action(async function(voiceId, options) {
1603
+ const ctx = createContext(this);
1604
+ try {
1605
+ const data = await ctx.client.voice.delete(voiceId, { type: options.type });
1606
+ printSuccess("voice.delete", data, ctx);
1607
+ } catch (error2) {
1608
+ printError("voice.delete", error2, ctx.human);
1609
+ }
1610
+ })
1611
+ );
1612
+ return command;
1613
+ }
1614
+ function readVoiceSample(input, audioMime) {
1615
+ if (input.startsWith("data:")) {
1616
+ return { sample_data_url: input };
1617
+ }
1618
+ if (existsSync6(input)) {
1619
+ return {
1620
+ audio_base64: readFileSync5(input).toString("base64"),
1621
+ audio_mime: audioMime ?? inferAudioMime(input)
1622
+ };
1623
+ }
1624
+ return { audio_base64: input, audio_mime: audioMime ?? "audio/wav" };
1625
+ }
1626
+ function inferAudioMime(filePath) {
1627
+ const ext = extname2(filePath).toLowerCase();
1628
+ if (ext === ".mp3") return "audio/mpeg";
1629
+ if (ext === ".wav") return "audio/wav";
1630
+ return "audio/wav";
1631
+ }
1632
+ function writeWavOutput(data, outputPath) {
1633
+ if (!outputPath) {
1634
+ return null;
1635
+ }
1636
+ const raw = data.wav_base64 ?? data.output_data;
1637
+ if (typeof raw !== "string" || !raw) {
1638
+ return null;
1639
+ }
1640
+ mkdirSync5(dirname3(outputPath), { recursive: true });
1641
+ writeFileSync5(outputPath, Buffer.from(stripDataUri2(raw), "base64"));
1642
+ return outputPath;
1643
+ }
1644
+ function stripDataUri2(data) {
1645
+ const idx = data.indexOf(";base64,");
1646
+ return idx >= 0 ? data.slice(idx + 8) : data;
1647
+ }
1648
+
1508
1649
  // src/index.ts
1509
- var program = new Command10().name("asset-gateway").description("Universal asset generation gateway CLI").version(CLI_VERSION).option(
1650
+ var program = new Command11().name("asset-gateway").description("Universal asset generation gateway CLI").version(CLI_VERSION).option(
1510
1651
  "--gateway-url <url>",
1511
1652
  `Gateway URL (default: $ASSET_GATEWAY_URL, auth config, or ${DEFAULT_GATEWAY_URL})`
1512
1653
  ).option("--token <token>", "API token for authentication").option("--human", "Human-readable output instead of JSON").option("--fields <fields>", "Comma-separated list of output fields");
@@ -1518,5 +1659,6 @@ program.addCommand(createProcess3dCommand());
1518
1659
  program.addCommand(createProviderCommand());
1519
1660
  program.addCommand(createUploadCommand());
1520
1661
  program.addCommand(createJobCommand());
1662
+ program.addCommand(createVoiceCommand());
1521
1663
  program.addCommand(createDescribeCommand());
1522
1664
  await program.parseAsync(process.argv);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@doufunao123/asset-gateway",
3
- "version": "0.21.0",
3
+ "version": "0.22.0",
4
4
  "description": "Universal asset generation gateway CLI",
5
5
  "type": "module",
6
6
  "bin": {
@@ -27,7 +27,7 @@
27
27
  "node": ">=20"
28
28
  },
29
29
  "dependencies": {
30
- "@doufunao123/assetforge-sdk": "^0.4.0",
30
+ "@doufunao123/assetforge-sdk": "^0.6.0",
31
31
  "commander": "^13.1.0"
32
32
  },
33
33
  "devDependencies": {