@doufunao123/asset-gateway 0.20.0 → 0.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +19 -0
  2. package/dist/index.js +207 -29
  3. package/package.json +2 -2
package/README.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  Lightweight npm CLI client for the universal asset generation gateway.
4
4
 
5
+ The CLI is a thin wrapper around `@doufunao123/assetforge-sdk`. For programmatic access, use the SDK directly.
6
+
5
7
  ## Install
6
8
 
7
9
  ```bash
@@ -57,6 +59,9 @@ asset-gateway generate image --prompt "a cat" --size 1024x1024
57
59
  asset-gateway generate image --prompt "icon" --transparent --provider flux
58
60
  asset-gateway generate video --prompt "ocean waves"
59
61
  asset-gateway generate sfx --prompt "epic battle impact" --duration 3
62
+ asset-gateway generate tts --prompt "(开心)今天天气真好!" --voice 冰糖
63
+ asset-gateway voice design --voice-prompt "warm narrator" --preview-text "Welcome." --name narrator --output ./narrator.wav
64
+ asset-gateway voice clone --audio ./sample.wav --preview-text "Welcome back." --name clone --output ./clone.wav
60
65
  asset-gateway generate character --prompt "medieval knight" --format fbx --pbr
61
66
  asset-gateway generate prop --prompt "ornate treasure chest" --polycount 5000
62
67
  asset-gateway generate model --image https://example.com/ref.png --ai-model latest
@@ -84,6 +89,20 @@ asset-gateway describe
84
89
  asset-gateway describe generate
85
90
  ```
86
91
 
92
+ Programmatic TTS uses the SDK directly. MiMo voices include `冰糖`, `茉莉`, `苏打`, `白桦`, `Mia`, `Chloe`, `Milo`, and `Dean`; prompts may include audio tags such as `(开心)`, `(唱歌)`, or `[whisper]`.
93
+
94
+ ```ts
95
+ await forge.tts("(开心)今天天气真好!", { voice: "冰糖" });
96
+ ```
97
+
98
+ Voice design and clone are also available from the CLI:
99
+
100
+ ```bash
101
+ asset-gateway voice design --voice-prompt "warm documentary narrator" --preview-text "Welcome to AssetForge." --name narrator --output ./narrator.wav
102
+ asset-gateway voice clone --audio ./sample.wav --preview-text "Welcome back." --name clone --output ./clone.wav
103
+ asset-gateway voice list --type vc
104
+ ```
105
+
87
106
  ## Output
88
107
 
89
108
  JSON by default. Use `--human` for readable output, `--fields` to filter:
package/dist/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/index.ts
4
- import { Command as Command10 } from "commander";
4
+ import { Command as Command11 } from "commander";
5
5
 
6
6
  // src/commands/auth.ts
7
7
  import { existsSync as existsSync2, unlinkSync } from "fs";
@@ -303,11 +303,11 @@ var SCHEMAS = {
303
303
  }
304
304
  },
305
305
  tts: {
306
- description: "Text-to-speech via Gemini 3.1 Flash TTS",
306
+ description: "Text-to-speech via MiMo v2.5 TTS",
307
307
  params: {
308
308
  "--prompt": { type: "string", required: true },
309
- "--voice": { type: "string", description: "Prebuilt voice name (default: Kore)" },
310
- "--speakers": { type: "string", description: `Multi-speaker config JSON, e.g. '{"Name1":"Puck","Name2":"Kore"}'` },
309
+ "--voice": { type: "string", description: "MiMo prebuilt voice name (default: server config, usually Mia)" },
310
+ "--context": { type: "string", description: "Natural-language style or director instruction" },
311
311
  "--output-dir": { type: "string", default: "." }
312
312
  }
313
313
  },
@@ -472,6 +472,19 @@ var SCHEMAS = {
472
472
  "--output-dir": { type: "string", default: "." }
473
473
  }
474
474
  },
475
+ animate: {
476
+ description: "Apply a preset animation to a rigged character. Common presets: 0=Idle, 1=Walk, 4=Attack, 8=Dead, 14=Run, 16=RunFast, 466=Jump. 500+ presets available.",
477
+ params: {
478
+ "--task-id": { type: "string", required: true, description: "Meshy rigging task ID" },
479
+ "--action-id": {
480
+ type: "number",
481
+ required: true,
482
+ description: "Animation preset ID (see docs for full list)"
483
+ },
484
+ "--fps": { type: "number", description: "Target frame rate: 24, 25, 30, or 60" },
485
+ "--output-dir": { type: "string", default: "." }
486
+ }
487
+ },
475
488
  refine: {
476
489
  description: "Refine a Meshy preview model into a higher quality result",
477
490
  params: {
@@ -500,6 +513,36 @@ var SCHEMAS = {
500
513
  health: { description: "Health check", params: { "[name]": { type: "string", required: false } } }
501
514
  }
502
515
  },
516
+ voice: {
517
+ description: "Design, clone, list, and delete MiMo voices",
518
+ subcommands: {
519
+ design: {
520
+ description: "Generate a MiMo voice from a text description",
521
+ params: {
522
+ "--voice-prompt": { type: "string", required: true },
523
+ "--preview-text": { type: "string", required: true },
524
+ "--style": { type: "string" },
525
+ "--name": { type: "string" },
526
+ "--save-as": { type: "string" },
527
+ "--output": { type: "string", description: "Write preview WAV to this path" }
528
+ }
529
+ },
530
+ clone: {
531
+ description: "Clone a MiMo voice from an mp3/wav sample",
532
+ params: {
533
+ "--audio": { type: "string", required: true, description: "Voice sample mp3/wav file or data URL" },
534
+ "--preview-text": { type: "string", required: true },
535
+ "--audio-mime": { type: "string" },
536
+ "--style": { type: "string" },
537
+ "--name": { type: "string" },
538
+ "--save-as": { type: "string" },
539
+ "--output": { type: "string", description: "Write preview WAV to this path" }
540
+ }
541
+ },
542
+ list: { description: "List saved voices", params: { "--type": { type: "string", description: "vc | vd" } } },
543
+ delete: { description: "Delete a saved voice", params: { "<voice-id>": { type: "string", required: true }, "--type": { type: "string", description: "vc | vd" } } }
544
+ }
545
+ },
503
546
  job: {
504
547
  description: "Async job history",
505
548
  subcommands: {
@@ -563,7 +606,7 @@ function inferExtension(assetType) {
563
606
  audio: "mp3",
564
607
  sfx: "mp3",
565
608
  music: "mp3",
566
- tts: "mp3",
609
+ tts: "wav",
567
610
  video: "mp4",
568
611
  model3d: "glb",
569
612
  character: "glb",
@@ -586,6 +629,8 @@ function inferExtFromResult(result) {
586
629
  "image/jpeg": "jpg",
587
630
  "video/mp4": "mp4",
588
631
  "audio/mpeg": "mp3",
632
+ "audio/wav": "wav",
633
+ "audio/x-wav": "wav",
589
634
  "model/gltf-binary": "glb",
590
635
  "model/gltf+json": "gltf",
591
636
  "model/stl": "stl",
@@ -714,9 +759,9 @@ function createGenerateCommand() {
714
759
  size: options.size,
715
760
  transparent: options.transparent ? true : void 0,
716
761
  input: options.input,
717
- referenceImages: options.ref,
718
- editMode: options.editMode,
719
- sessionId: options.session
762
+ reference_images: options.ref,
763
+ edit_mode: options.editMode,
764
+ session_id: options.session
720
765
  });
721
766
  const localPath = await saveOutput(data, "image", options.outputDir);
722
767
  if (localPath) data.local_path = localPath;
@@ -730,9 +775,12 @@ function createGenerateCommand() {
730
775
  new Command3("video").description("Generate a video from a text prompt (or image-to-video with --input)").requiredOption("--prompt <text>", "Video description prompt").option("--provider <id>", "Provider to use").option("--input <url>", "Reference image URL for image-to-video (Grok)").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
731
776
  try {
732
777
  const ctx = createContext(this);
733
- const data = await ctx.client.video(options.prompt, {
778
+ const requestOptions = {
734
779
  provider: options.provider,
735
780
  input: options.input
781
+ };
782
+ const data = await ctx.client.video(options.prompt, {
783
+ ...requestOptions
736
784
  });
737
785
  const localPath = await saveOutput(data, "video", options.outputDir);
738
786
  if (localPath) data.local_path = localPath;
@@ -816,8 +864,8 @@ function createGenerateCommand() {
816
864
  const ctx = createContext(this);
817
865
  const data = await ctx.client.music(options.prompt, {
818
866
  duration: options.duration ? Number(options.duration) : void 0,
819
- forceInstrumental: options.forceInstrumental ? true : void 0,
820
- outputFormat: options.outputFormat
867
+ force_instrumental: options.forceInstrumental ? true : void 0,
868
+ output_format: options.outputFormat
821
869
  });
822
870
  const localPath = await saveOutput(data, "music", options.outputDir);
823
871
  if (localPath) data.local_path = localPath;
@@ -828,11 +876,11 @@ function createGenerateCommand() {
828
876
  })
829
877
  );
830
878
  command.addCommand(
831
- new Command3("tts").description("Text-to-speech via Gemini 3.1 Flash TTS").requiredOption("--prompt <text>", "Text to synthesize").option("--voice <name>", "Prebuilt voice name (default: Kore)").option("--speakers <json>", `Multi-speaker config JSON, e.g. '{"Name1":"Puck","Name2":"Kore"}'`).option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
879
+ new Command3("tts").description("Text-to-speech via MiMo v2.5 TTS").requiredOption("--prompt <text>", "Text to synthesize").option("--voice <name>", "MiMo prebuilt voice name (default: server config, usually Mia)").option("--context <text>", "Natural-language style or director instruction").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
832
880
  try {
833
881
  const ctx = createContext(this);
834
882
  const params = {};
835
- if (options.speakers) params.speakers = JSON.parse(options.speakers);
883
+ if (options.context) params.context = options.context;
836
884
  const data = await ctx.client.tts(options.prompt, {
837
885
  voice: options.voice,
838
886
  params: Object.keys(params).length > 0 ? toJsonObject(params) : void 0
@@ -858,12 +906,12 @@ function createGenerateCommand() {
858
906
  const data = await ctx.client.model3d(options.prompt ?? "", {
859
907
  input: toInputFile(options.image),
860
908
  format: options.format,
861
- aiModel: options.aiModel,
909
+ ai_model: options.aiModel,
862
910
  polycount: options.polycount ? Number(options.polycount) : void 0,
863
911
  pbr: options.pbr ? true : void 0,
864
- hdTexture: options.hdTexture ? true : void 0,
865
- poseMode: options.poseMode,
866
- autoSize: options.autoSize ? true : void 0,
912
+ hd_texture: options.hdTexture ? true : void 0,
913
+ pose_mode: options.poseMode,
914
+ auto_size: options.autoSize ? true : void 0,
867
915
  params: Object.keys(params).length > 0 ? toJsonObject(params) : void 0
868
916
  });
869
917
  const localPath = await saveOutput(data, "model3d", options.outputDir, options.format);
@@ -886,10 +934,10 @@ function createGenerateCommand() {
886
934
  format: options.format,
887
935
  polycount: options.polycount ? Number(options.polycount) : void 0,
888
936
  pbr: options.pbr ? true : void 0,
889
- hdTexture: options.hdTexture ? true : void 0,
890
- poseMode: options.poseMode,
891
- aiModel: options.aiModel,
892
- autoSize: options.autoSize ? true : void 0
937
+ hd_texture: options.hdTexture ? true : void 0,
938
+ pose_mode: options.poseMode,
939
+ ai_model: options.aiModel,
940
+ auto_size: options.autoSize ? true : void 0
893
941
  });
894
942
  const localPath = await saveOutput(data, "character", options.outputDir, options.format);
895
943
  if (localPath) data.local_path = localPath;
@@ -911,10 +959,10 @@ function createGenerateCommand() {
911
959
  format: options.format,
912
960
  polycount: options.polycount ? Number(options.polycount) : void 0,
913
961
  pbr: options.pbr ? true : void 0,
914
- hdTexture: options.hdTexture ? true : void 0,
915
- aiModel: options.aiModel,
916
- autoSize: options.autoSize ? true : void 0,
917
- texturePrompt: options.texturePrompt
962
+ hd_texture: options.hdTexture ? true : void 0,
963
+ ai_model: options.aiModel,
964
+ auto_size: options.autoSize ? true : void 0,
965
+ texture_prompt: options.texturePrompt
918
966
  });
919
967
  const localPath = await saveOutput(data, "prop", options.outputDir, options.format);
920
968
  if (localPath) data.local_path = localPath;
@@ -930,7 +978,7 @@ function createGenerateCommand() {
930
978
  const ctx = createContext(this);
931
979
  const data = await ctx.client.text(options.prompt, {
932
980
  model: options.model,
933
- maxTokens: options.maxTokens ? Number(options.maxTokens) : void 0
981
+ max_tokens: options.maxTokens ? Number(options.maxTokens) : void 0
934
982
  });
935
983
  const localPath = await saveOutput(data, "text", options.outputDir);
936
984
  if (localPath) data.local_path = localPath;
@@ -951,7 +999,7 @@ function createGenerateCommand() {
951
999
  };
952
1000
  const data = await ctx.client.sprite(options.prompt, {
953
1001
  input: toInputFile(options.input),
954
- animationType: options.animationType,
1002
+ animation_type: options.animationType,
955
1003
  style: options.style,
956
1004
  params: toJsonObject(params)
957
1005
  });
@@ -970,7 +1018,7 @@ function createGenerateCommand() {
970
1018
  const data = await ctx.client.world(options.prompt, {
971
1019
  input: toInputFile(options.input),
972
1020
  model: options.model,
973
- displayName: options.displayName
1021
+ display_name: options.displayName
974
1022
  });
975
1023
  const localPath = await saveOutput(data, "world", options.outputDir);
976
1024
  if (localPath) data.local_path = localPath;
@@ -1361,6 +1409,29 @@ function createProcess3dCommand() {
1361
1409
  }
1362
1410
  })
1363
1411
  );
1412
+ command.addCommand(
1413
+ new Command7("animate").description("Apply a preset animation to a rigged Meshy character").requiredOption("--task-id <id>", "Meshy rigging task ID").requiredOption("--action-id <n>", "Animation preset ID (see docs for full list)").option("--fps <n>", "Target frame rate: 24, 25, 30, or 60").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
1414
+ try {
1415
+ const ctx = createContext(this);
1416
+ const params = {
1417
+ action_id: Number(options.actionId)
1418
+ };
1419
+ if (options.fps) {
1420
+ params.fps = Number(options.fps);
1421
+ }
1422
+ const data = await ctx.client.process3d({
1423
+ task_id: options.taskId,
1424
+ operation: "animate",
1425
+ params: toJsonObject2(params)
1426
+ });
1427
+ const localPath = await saveProcess3dOutput(data, "animate", options.outputDir);
1428
+ if (localPath) data.local_path = localPath;
1429
+ printSuccess("process3d.animate", data, ctx);
1430
+ } catch (error2) {
1431
+ printError("process3d.animate", error2);
1432
+ }
1433
+ })
1434
+ );
1364
1435
  command.addCommand(
1365
1436
  new Command7("refine").description("Refine a Meshy preview model into a higher quality result").requiredOption("--task-id <id>", "Meshy preview task ID").option("--pbr", "Enable PBR materials").option("--hd-texture", "Request 4K texture output").option("--texture-prompt <text>", "Optional texture prompt for the refinement pass").option("--ai-model <name>", "Meshy model: meshy-5, meshy-6, latest").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
1366
1437
  try {
@@ -1469,8 +1540,114 @@ async function readLocalFile(filePath) {
1469
1540
  }
1470
1541
  }
1471
1542
 
1543
+ // src/commands/voice.ts
1544
+ import { existsSync as existsSync6, mkdirSync as mkdirSync5, readFileSync as readFileSync5, writeFileSync as writeFileSync5 } from "fs";
1545
+ import { dirname as dirname3, extname as extname2 } from "path";
1546
+ import { Command as Command10 } from "commander";
1547
+ function createVoiceCommand() {
1548
+ const command = new Command10("voice").description("Design, clone, list, and delete voices");
1549
+ command.addCommand(
1550
+ new Command10("design").description("Generate a MiMo voice from a text description").requiredOption("--voice-prompt <text>", "Voice description / style prompt").requiredOption("--preview-text <text>", "Text to synthesize for preview").option("--style <text>", "Additional director/style instruction").option("--name <name>", "Save generated voice with this name").option("--save-as <name>", "Alias for --name").option("--output <path>", "Write preview WAV to this path").action(async function(options) {
1551
+ const ctx = createContext(this);
1552
+ try {
1553
+ const request = {
1554
+ voice_prompt: options.voicePrompt,
1555
+ preview_text: options.previewText,
1556
+ style: options.style,
1557
+ name: options.name,
1558
+ save_as: options.saveAs
1559
+ };
1560
+ const data = await ctx.client.voice.design(request);
1561
+ const localPath = writeWavOutput(data, options.output);
1562
+ if (localPath) data.local_path = localPath;
1563
+ printSuccess("voice.design", data, ctx);
1564
+ } catch (error2) {
1565
+ printError("voice.design", error2, ctx.human);
1566
+ }
1567
+ })
1568
+ );
1569
+ command.addCommand(
1570
+ new Command10("clone").description("Clone a MiMo voice from an mp3/wav sample").requiredOption("--audio <path-or-data-url>", "Voice sample mp3/wav file or data URL").requiredOption("--preview-text <text>", "Text to synthesize for preview").option("--audio-mime <mime>", "Audio MIME type when --audio is raw base64").option("--style <text>", "Director/style instruction").option("--name <name>", "Save cloned voice with this name").option("--save-as <name>", "Alias for --name").option("--output <path>", "Write preview WAV to this path").action(async function(options) {
1571
+ const ctx = createContext(this);
1572
+ try {
1573
+ const sample = readVoiceSample(options.audio, options.audioMime);
1574
+ const request = {
1575
+ ...sample,
1576
+ preview_text: options.previewText,
1577
+ style: options.style,
1578
+ name: options.name,
1579
+ save_as: options.saveAs
1580
+ };
1581
+ const data = await ctx.client.voice.clone(request);
1582
+ const localPath = writeWavOutput(data, options.output);
1583
+ if (localPath) data.local_path = localPath;
1584
+ printSuccess("voice.clone", data, ctx);
1585
+ } catch (error2) {
1586
+ printError("voice.clone", error2, ctx.human);
1587
+ }
1588
+ })
1589
+ );
1590
+ command.addCommand(
1591
+ new Command10("list").description("List saved voices").option("--type <type>", "Voice type: vc or vd").action(async function(options) {
1592
+ const ctx = createContext(this);
1593
+ try {
1594
+ const data = await ctx.client.voice.list({ type: options.type });
1595
+ printSuccess("voice.list", data, ctx);
1596
+ } catch (error2) {
1597
+ printError("voice.list", error2, ctx.human);
1598
+ }
1599
+ })
1600
+ );
1601
+ command.addCommand(
1602
+ new Command10("delete").description("Delete a saved voice").argument("<voice-id>", "Voice ID").option("--type <type>", "Voice type: vc or vd").action(async function(voiceId, options) {
1603
+ const ctx = createContext(this);
1604
+ try {
1605
+ const data = await ctx.client.voice.delete(voiceId, { type: options.type });
1606
+ printSuccess("voice.delete", data, ctx);
1607
+ } catch (error2) {
1608
+ printError("voice.delete", error2, ctx.human);
1609
+ }
1610
+ })
1611
+ );
1612
+ return command;
1613
+ }
1614
+ function readVoiceSample(input, audioMime) {
1615
+ if (input.startsWith("data:")) {
1616
+ return { sample_data_url: input };
1617
+ }
1618
+ if (existsSync6(input)) {
1619
+ return {
1620
+ audio_base64: readFileSync5(input).toString("base64"),
1621
+ audio_mime: audioMime ?? inferAudioMime(input)
1622
+ };
1623
+ }
1624
+ return { audio_base64: input, audio_mime: audioMime ?? "audio/wav" };
1625
+ }
1626
+ function inferAudioMime(filePath) {
1627
+ const ext = extname2(filePath).toLowerCase();
1628
+ if (ext === ".mp3") return "audio/mpeg";
1629
+ if (ext === ".wav") return "audio/wav";
1630
+ return "audio/wav";
1631
+ }
1632
+ function writeWavOutput(data, outputPath) {
1633
+ if (!outputPath) {
1634
+ return null;
1635
+ }
1636
+ const raw = data.wav_base64 ?? data.output_data;
1637
+ if (typeof raw !== "string" || !raw) {
1638
+ return null;
1639
+ }
1640
+ mkdirSync5(dirname3(outputPath), { recursive: true });
1641
+ writeFileSync5(outputPath, Buffer.from(stripDataUri2(raw), "base64"));
1642
+ return outputPath;
1643
+ }
1644
+ function stripDataUri2(data) {
1645
+ const idx = data.indexOf(";base64,");
1646
+ return idx >= 0 ? data.slice(idx + 8) : data;
1647
+ }
1648
+
1472
1649
  // src/index.ts
1473
- var program = new Command10().name("asset-gateway").description("Universal asset generation gateway CLI").version(CLI_VERSION).option(
1650
+ var program = new Command11().name("asset-gateway").description("Universal asset generation gateway CLI").version(CLI_VERSION).option(
1474
1651
  "--gateway-url <url>",
1475
1652
  `Gateway URL (default: $ASSET_GATEWAY_URL, auth config, or ${DEFAULT_GATEWAY_URL})`
1476
1653
  ).option("--token <token>", "API token for authentication").option("--human", "Human-readable output instead of JSON").option("--fields <fields>", "Comma-separated list of output fields");
@@ -1482,5 +1659,6 @@ program.addCommand(createProcess3dCommand());
1482
1659
  program.addCommand(createProviderCommand());
1483
1660
  program.addCommand(createUploadCommand());
1484
1661
  program.addCommand(createJobCommand());
1662
+ program.addCommand(createVoiceCommand());
1485
1663
  program.addCommand(createDescribeCommand());
1486
1664
  await program.parseAsync(process.argv);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@doufunao123/asset-gateway",
3
- "version": "0.20.0",
3
+ "version": "0.21.1",
4
4
  "description": "Universal asset generation gateway CLI",
5
5
  "type": "module",
6
6
  "bin": {
@@ -27,7 +27,7 @@
27
27
  "node": ">=20"
28
28
  },
29
29
  "dependencies": {
30
- "@doufunao123/assetforge-sdk": "^0.3.0",
30
+ "@doufunao123/assetforge-sdk": "^0.5.2",
31
31
  "commander": "^13.1.0"
32
32
  },
33
33
  "devDependencies": {