npm - @doufunao123/asset-gateway - Versions diffs - 0.21.0 → 0.22.0 - Mend

@doufunao123/asset-gateway 0.21.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -59,6 +59,9 @@ asset-gateway generate image --prompt "a cat" --size 1024x1024
 asset-gateway generate image --prompt "icon" --transparent --provider flux
 asset-gateway generate video --prompt "ocean waves"
 asset-gateway generate sfx --prompt "epic battle impact" --duration 3
+asset-gateway generate tts --prompt "（开心）今天天气真好！" --voice 冰糖
+asset-gateway voice design --voice-prompt "warm narrator" --preview-text "Welcome." --name narrator --output ./narrator.wav
+asset-gateway voice clone --audio ./sample.wav --preview-text "Welcome back." --name clone --output ./clone.wav
 asset-gateway generate character --prompt "medieval knight" --format fbx --pbr
 asset-gateway generate prop --prompt "ornate treasure chest" --polycount 5000
 asset-gateway generate model --image https://example.com/ref.png --ai-model latest
@@ -86,6 +89,20 @@ asset-gateway describe
 asset-gateway describe generate
 ```
+Programmatic TTS uses the SDK directly. MiMo voices include `冰糖`, `茉莉`, `苏打`, `白桦`, `Mia`, `Chloe`, `Milo`, and `Dean`; prompts may include audio tags such as `（开心）`, `(唱歌)`, or `[whisper]`.
+```ts
+await forge.tts("（开心）今天天气真好！", { voice: "冰糖" });
+```
+Voice design and clone are also available from the CLI:
+```bash
+asset-gateway voice design --voice-prompt "warm documentary narrator" --preview-text "Welcome to AssetForge." --name narrator --output ./narrator.wav
+asset-gateway voice clone --audio ./sample.wav --preview-text "Welcome back." --name clone --output ./clone.wav
+asset-gateway voice list --type vc
+```
 ## Output
 JSON by default. Use `--human` for readable output, `--fields` to filter:

package/dist/index.js CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env node
 // src/index.ts
-import { Command as Command10 } from "commander";
+import { Command as Command11 } from "commander";
 // src/commands/auth.ts
 import { existsSync as existsSync2, unlinkSync } from "fs";
@@ -303,11 +303,11 @@ var SCHEMAS = {
         }
       },
       tts: {
-        description: "Text-to-speech via Gemini 3.1 Flash TTS",
+        description: "Text-to-speech via MiMo v2.5 TTS",
         params: {
           "--prompt": { type: "string", required: true },
-          "--voice": { type: "string", description: "Prebuilt voice name (default: Kore)" },
-          "--speakers": { type: "string", description: `Multi-speaker config JSON, e.g. '{"Name1":"Puck","Name2":"Kore"}'` },
+          "--voice": { type: "string", description: "MiMo prebuilt voice name (default: server config, usually Mia)" },
+          "--context": { type: "string", description: "Natural-language style or director instruction" },
           "--output-dir": { type: "string", default: "." }
         }
       },
@@ -513,6 +513,36 @@ var SCHEMAS = {
       health: { description: "Health check", params: { "[name]": { type: "string", required: false } } }
     }
   },
+  voice: {
+    description: "Design, clone, list, and delete MiMo voices",
+    subcommands: {
+      design: {
+        description: "Generate a MiMo voice from a text description",
+        params: {
+          "--voice-prompt": { type: "string", required: true },
+          "--preview-text": { type: "string", required: true },
+          "--style": { type: "string" },
+          "--name": { type: "string" },
+          "--save-as": { type: "string" },
+          "--output": { type: "string", description: "Write preview WAV to this path" }
+        }
+      },
+      clone: {
+        description: "Clone a MiMo voice from an mp3/wav sample",
+        params: {
+          "--audio": { type: "string", required: true, description: "Voice sample mp3/wav file or data URL" },
+          "--preview-text": { type: "string", required: true },
+          "--audio-mime": { type: "string" },
+          "--style": { type: "string" },
+          "--name": { type: "string" },
+          "--save-as": { type: "string" },
+          "--output": { type: "string", description: "Write preview WAV to this path" }
+        }
+      },
+      list: { description: "List saved voices", params: { "--type": { type: "string", description: "vc | vd" } } },
+      delete: { description: "Delete a saved voice", params: { "<voice-id>": { type: "string", required: true }, "--type": { type: "string", description: "vc | vd" } } }
+    }
+  },
   job: {
     description: "Async job history",
     subcommands: {
@@ -576,7 +606,7 @@ function inferExtension(assetType) {
     audio: "mp3",
     sfx: "mp3",
     music: "mp3",
-    tts: "mp3",
+    tts: "wav",
     video: "mp4",
     model3d: "glb",
     character: "glb",
@@ -599,6 +629,8 @@ function inferExtFromResult(result) {
     "image/jpeg": "jpg",
     "video/mp4": "mp4",
     "audio/mpeg": "mp3",
+    "audio/wav": "wav",
+    "audio/x-wav": "wav",
     "model/gltf-binary": "glb",
     "model/gltf+json": "gltf",
     "model/stl": "stl",
@@ -743,9 +775,12 @@ function createGenerateCommand() {
     new Command3("video").description("Generate a video from a text prompt (or image-to-video with --input)").requiredOption("--prompt <text>", "Video description prompt").option("--provider <id>", "Provider to use").option("--input <url>", "Reference image URL for image-to-video (Grok)").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
       try {
         const ctx = createContext(this);
-        const data = await ctx.client.video(options.prompt, {
+        const requestOptions = {
           provider: options.provider,
           input: options.input
+        };
+        const data = await ctx.client.video(options.prompt, {
+          ...requestOptions
         });
         const localPath = await saveOutput(data, "video", options.outputDir);
         if (localPath) data.local_path = localPath;
@@ -841,11 +876,11 @@ function createGenerateCommand() {
     })
   );
   command.addCommand(
-    new Command3("tts").description("Text-to-speech via Gemini 3.1 Flash TTS").requiredOption("--prompt <text>", "Text to synthesize").option("--voice <name>", "Prebuilt voice name (default: Kore)").option("--speakers <json>", `Multi-speaker config JSON, e.g. '{"Name1":"Puck","Name2":"Kore"}'`).option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
+    new Command3("tts").description("Text-to-speech via MiMo v2.5 TTS").requiredOption("--prompt <text>", "Text to synthesize").option("--voice <name>", "MiMo prebuilt voice name (default: server config, usually Mia)").option("--context <text>", "Natural-language style or director instruction").option("--output-dir <dir>", "Directory to save output", ".").action(async function(options) {
       try {
         const ctx = createContext(this);
         const params = {};
-        if (options.speakers) params.speakers = JSON.parse(options.speakers);
+        if (options.context) params.context = options.context;
         const data = await ctx.client.tts(options.prompt, {
           voice: options.voice,
           params: Object.keys(params).length > 0 ? toJsonObject(params) : void 0
@@ -1505,8 +1540,114 @@ async function readLocalFile(filePath) {
   }
 }
+// src/commands/voice.ts
+import { existsSync as existsSync6, mkdirSync as mkdirSync5, readFileSync as readFileSync5, writeFileSync as writeFileSync5 } from "fs";
+import { dirname as dirname3, extname as extname2 } from "path";
+import { Command as Command10 } from "commander";
+function createVoiceCommand() {
+  const command = new Command10("voice").description("Design, clone, list, and delete voices");
+  command.addCommand(
+    new Command10("design").description("Generate a MiMo voice from a text description").requiredOption("--voice-prompt <text>", "Voice description / style prompt").requiredOption("--preview-text <text>", "Text to synthesize for preview").option("--style <text>", "Additional director/style instruction").option("--name <name>", "Save generated voice with this name").option("--save-as <name>", "Alias for --name").option("--output <path>", "Write preview WAV to this path").action(async function(options) {
+      const ctx = createContext(this);
+      try {
+        const request = {
+          voice_prompt: options.voicePrompt,
+          preview_text: options.previewText,
+          style: options.style,
+          name: options.name,
+          save_as: options.saveAs
+        };
+        const data = await ctx.client.voice.design(request);
+        const localPath = writeWavOutput(data, options.output);
+        if (localPath) data.local_path = localPath;
+        printSuccess("voice.design", data, ctx);
+      } catch (error2) {
+        printError("voice.design", error2, ctx.human);
+      }
+    })
+  );
+  command.addCommand(
+    new Command10("clone").description("Clone a MiMo voice from an mp3/wav sample").requiredOption("--audio <path-or-data-url>", "Voice sample mp3/wav file or data URL").requiredOption("--preview-text <text>", "Text to synthesize for preview").option("--audio-mime <mime>", "Audio MIME type when --audio is raw base64").option("--style <text>", "Director/style instruction").option("--name <name>", "Save cloned voice with this name").option("--save-as <name>", "Alias for --name").option("--output <path>", "Write preview WAV to this path").action(async function(options) {
+      const ctx = createContext(this);
+      try {
+        const sample = readVoiceSample(options.audio, options.audioMime);
+        const request = {
+          ...sample,
+          preview_text: options.previewText,
+          style: options.style,
+          name: options.name,
+          save_as: options.saveAs
+        };
+        const data = await ctx.client.voice.clone(request);
+        const localPath = writeWavOutput(data, options.output);
+        if (localPath) data.local_path = localPath;
+        printSuccess("voice.clone", data, ctx);
+      } catch (error2) {
+        printError("voice.clone", error2, ctx.human);
+      }
+    })
+  );
+  command.addCommand(
+    new Command10("list").description("List saved voices").option("--type <type>", "Voice type: vc or vd").action(async function(options) {
+      const ctx = createContext(this);
+      try {
+        const data = await ctx.client.voice.list({ type: options.type });
+        printSuccess("voice.list", data, ctx);
+      } catch (error2) {
+        printError("voice.list", error2, ctx.human);
+      }
+    })
+  );
+  command.addCommand(
+    new Command10("delete").description("Delete a saved voice").argument("<voice-id>", "Voice ID").option("--type <type>", "Voice type: vc or vd").action(async function(voiceId, options) {
+      const ctx = createContext(this);
+      try {
+        const data = await ctx.client.voice.delete(voiceId, { type: options.type });
+        printSuccess("voice.delete", data, ctx);
+      } catch (error2) {
+        printError("voice.delete", error2, ctx.human);
+      }
+    })
+  );
+  return command;
+}
+function readVoiceSample(input, audioMime) {
+  if (input.startsWith("data:")) {
+    return { sample_data_url: input };
+  }
+  if (existsSync6(input)) {
+    return {
+      audio_base64: readFileSync5(input).toString("base64"),
+      audio_mime: audioMime ?? inferAudioMime(input)
+    };
+  }
+  return { audio_base64: input, audio_mime: audioMime ?? "audio/wav" };
+}
+function inferAudioMime(filePath) {
+  const ext = extname2(filePath).toLowerCase();
+  if (ext === ".mp3") return "audio/mpeg";
+  if (ext === ".wav") return "audio/wav";
+  return "audio/wav";
+}
+function writeWavOutput(data, outputPath) {
+  if (!outputPath) {
+    return null;
+  }
+  const raw = data.wav_base64 ?? data.output_data;
+  if (typeof raw !== "string" || !raw) {
+    return null;
+  }
+  mkdirSync5(dirname3(outputPath), { recursive: true });
+  writeFileSync5(outputPath, Buffer.from(stripDataUri2(raw), "base64"));
+  return outputPath;
+}
+function stripDataUri2(data) {
+  const idx = data.indexOf(";base64,");
+  return idx >= 0 ? data.slice(idx + 8) : data;
+}
 // src/index.ts
-var program = new Command10().name("asset-gateway").description("Universal asset generation gateway CLI").version(CLI_VERSION).option(
+var program = new Command11().name("asset-gateway").description("Universal asset generation gateway CLI").version(CLI_VERSION).option(
   "--gateway-url <url>",
   `Gateway URL (default: $ASSET_GATEWAY_URL, auth config, or ${DEFAULT_GATEWAY_URL})`
 ).option("--token <token>", "API token for authentication").option("--human", "Human-readable output instead of JSON").option("--fields <fields>", "Comma-separated list of output fields");
@@ -1518,5 +1659,6 @@ program.addCommand(createProcess3dCommand());
 program.addCommand(createProviderCommand());
 program.addCommand(createUploadCommand());
 program.addCommand(createJobCommand());
+program.addCommand(createVoiceCommand());
 program.addCommand(createDescribeCommand());
 await program.parseAsync(process.argv);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@doufunao123/asset-gateway",
-  "version": "0.21.0",
+  "version": "0.22.0",
   "description": "Universal asset generation gateway CLI",
   "type": "module",
   "bin": {
@@ -27,7 +27,7 @@
     "node": ">=20"
   },
   "dependencies": {
-    "@doufunao123/assetforge-sdk": "^0.4.0",
+    "@doufunao123/assetforge-sdk": "^0.6.0",
     "commander": "^13.1.0"
   },
   "devDependencies": {