@mux/ai 0.7.3 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ var __export = (target, all) => {
5
5
  };
6
6
 
7
7
  // package.json
8
- var version = "0.7.3";
8
+ var version = "0.7.4";
9
9
 
10
10
  // src/env.ts
11
11
  import { z } from "zod";
@@ -48,6 +48,9 @@ var EnvSchema = z.object({
48
48
  ),
49
49
  MUX_TEST_ASSET_ID_AUDIO_ONLY: optionalString("Mux test asset ID for audio-only assets.", "Mux test asset id for audio-only assets for testing"),
50
50
  MUX_TEST_ASSET_ID_VIOLENT_AUDIO_ONLY: optionalString("Mux test asset ID for audio-only assets with violent content.", "Mux test asset id for audio-only assets with violent content for testing"),
51
+ // Eval config
52
+ MUX_AI_EVAL_MODEL_SET: optionalString("Eval model selection mode.", "Choose between 'default' (provider defaults only) or 'all' (all configured models)"),
53
+ MUX_AI_EVAL_MODELS: optionalString("Comma-separated eval model pairs.", "Comma-separated provider:model pairs (e.g. 'openai:gpt-5.1,anthropic:claude-sonnet-4-5,google:gemini-3-flash-preview')"),
51
54
  // AI Providers
52
55
  OPENAI_API_KEY: optionalString("OpenAI API key for OpenAI-backed workflows.", "OpenAI API key"),
53
56
  ANTHROPIC_API_KEY: optionalString("Anthropic API key for Claude-backed workflows.", "Anthropic API key"),
@@ -808,6 +811,81 @@ var DEFAULT_EMBEDDING_MODELS = {
808
811
  openai: "text-embedding-3-small",
809
812
  google: "gemini-embedding-001"
810
813
  };
814
+ var LANGUAGE_MODELS = {
815
+ openai: ["gpt-5.1", "gpt-5-mini"],
816
+ anthropic: ["claude-sonnet-4-5"],
817
+ google: ["gemini-3-flash-preview", "gemini-2.5-flash"]
818
+ };
819
+ function getDefaultEvalModelConfigs() {
820
+ return Object.entries(DEFAULT_LANGUAGE_MODELS).map(([provider, modelId]) => ({ provider, modelId }));
821
+ }
822
+ function getAllEvalModelConfigs() {
823
+ return Object.entries(LANGUAGE_MODELS).flatMap(([provider, models]) => models.map((modelId) => ({ provider, modelId })));
824
+ }
825
+ function isSupportedProvider(value) {
826
+ return value === "openai" || value === "anthropic" || value === "google";
827
+ }
828
+ function parseEvalModelPair(value) {
829
+ const trimmed = value.trim();
830
+ const [providerRaw, modelIdRaw] = trimmed.split(":", 2);
831
+ const provider = providerRaw?.trim();
832
+ const modelId = modelIdRaw?.trim();
833
+ if (!provider || !modelId) {
834
+ throw new Error(
835
+ `Invalid eval model pair "${value}". Use "provider:model" (example: "openai:gpt-5.1").`
836
+ );
837
+ }
838
+ if (!isSupportedProvider(provider)) {
839
+ throw new Error(
840
+ `Unsupported eval provider "${provider}" in "${value}". Supported providers: ${Object.keys(LANGUAGE_MODELS).join(", ")}.`
841
+ );
842
+ }
843
+ const supportedModels = LANGUAGE_MODELS[provider];
844
+ if (!supportedModels.includes(modelId)) {
845
+ throw new Error(
846
+ `Unsupported eval model "${modelId}" for provider "${provider}". Supported models: ${supportedModels.join(", ")}.`
847
+ );
848
+ }
849
+ return {
850
+ provider,
851
+ modelId
852
+ };
853
+ }
854
+ function resolveEvalModelConfigs(options = {}) {
855
+ const explicitPairs = options.modelPairs?.map((value) => value.trim()).filter(Boolean) ?? [];
856
+ if (explicitPairs.length > 0) {
857
+ const dedupedPairs = Array.from(new Set(explicitPairs));
858
+ return dedupedPairs.map(parseEvalModelPair);
859
+ }
860
+ const selection = options.selection ?? "default";
861
+ if (selection === "all") {
862
+ return getAllEvalModelConfigs();
863
+ }
864
+ return getDefaultEvalModelConfigs();
865
+ }
866
+ function resolveEvalModelConfigsFromEnv(environment = env_default) {
867
+ const rawSelection = environment.MUX_AI_EVAL_MODEL_SET?.trim();
868
+ const rawModelPairs = environment.MUX_AI_EVAL_MODELS?.trim();
869
+ let selection;
870
+ if (!rawSelection || rawSelection === "default") {
871
+ selection = "default";
872
+ } else if (rawSelection === "all") {
873
+ selection = "all";
874
+ } else {
875
+ throw new Error(
876
+ `Invalid MUX_AI_EVAL_MODEL_SET="${rawSelection}". Expected "default" or "all".`
877
+ );
878
+ }
879
+ let modelPairs;
880
+ if (rawModelPairs) {
881
+ modelPairs = rawModelPairs.split(",").map((value) => value.trim()).filter(Boolean);
882
+ }
883
+ return resolveEvalModelConfigs({
884
+ selection,
885
+ modelPairs
886
+ });
887
+ }
888
+ var EVAL_MODEL_CONFIGS = resolveEvalModelConfigsFromEnv();
811
889
  function resolveLanguageModelConfig(options = {}) {
812
890
  const provider = options.provider || "openai";
813
891
  const modelId = options.model || DEFAULT_LANGUAGE_MODELS[provider];
@@ -3597,6 +3675,18 @@ function getReadyAudioStaticRendition(asset) {
3597
3675
  );
3598
3676
  }
3599
3677
  var hasReadyAudioStaticRendition = (asset) => Boolean(getReadyAudioStaticRendition(asset));
3678
+ function getAudioStaticRenditionStatus(asset) {
3679
+ const files = asset.static_renditions?.files;
3680
+ const audioRendition = files?.find((rendition) => rendition.name === "audio.m4a");
3681
+ if (typeof audioRendition?.status === "string" && audioRendition.status.length > 0) {
3682
+ return audioRendition.status;
3683
+ }
3684
+ const aggregateStatus = asset.static_renditions?.status;
3685
+ if (typeof aggregateStatus === "string" && aggregateStatus.length > 0) {
3686
+ return aggregateStatus;
3687
+ }
3688
+ return asset.static_renditions ? "requested" : "not_requested";
3689
+ }
3600
3690
  async function requestStaticRenditionCreation(assetId, credentials) {
3601
3691
  "use step";
3602
3692
  const muxClient = await resolveMuxClient(credentials);
@@ -3642,7 +3732,7 @@ async function waitForAudioStaticRendition({
3642
3732
  if (hasReadyAudioStaticRendition(currentAsset)) {
3643
3733
  return currentAsset;
3644
3734
  }
3645
- const currentStatus = currentAsset.static_renditions?.status || "unknown";
3735
+ const currentStatus = getAudioStaticRenditionStatus(currentAsset);
3646
3736
  console.warn(
3647
3737
  `\u231B Waiting for static rendition (attempt ${attempt}/${STATIC_RENDITION_MAX_ATTEMPTS}) \u2192 ${currentStatus}`
3648
3738
  );
@@ -3668,6 +3758,7 @@ async function createElevenLabsDubbingJob({
3668
3758
  audioBuffer,
3669
3759
  assetId,
3670
3760
  elevenLabsLangCode,
3761
+ elevenLabsSourceLangCode,
3671
3762
  numSpeakers,
3672
3763
  credentials
3673
3764
  }) {
@@ -3677,8 +3768,14 @@ async function createElevenLabsDubbingJob({
3677
3768
  const formData = new FormData();
3678
3769
  formData.append("file", audioBlob);
3679
3770
  formData.append("target_lang", elevenLabsLangCode);
3771
+ if (elevenLabsSourceLangCode) {
3772
+ formData.append("source_lang", elevenLabsSourceLangCode);
3773
+ }
3680
3774
  formData.append("num_speakers", numSpeakers.toString());
3681
- formData.append("name", `Mux Asset ${assetId} - auto to ${elevenLabsLangCode}`);
3775
+ formData.append(
3776
+ "name",
3777
+ `Mux Asset ${assetId} - ${elevenLabsSourceLangCode ?? "auto"} to ${elevenLabsLangCode}`
3778
+ );
3682
3779
  const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
3683
3780
  method: "POST",
3684
3781
  headers: {
@@ -3787,6 +3884,7 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
3787
3884
  "use workflow";
3788
3885
  const {
3789
3886
  provider = "elevenlabs",
3887
+ fromLanguageCode,
3790
3888
  numSpeakers = 0,
3791
3889
  // 0 = auto-detect
3792
3890
  uploadToMux = true,
@@ -3836,13 +3934,18 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
3836
3934
  }
3837
3935
  console.warn("\u{1F399}\uFE0F Creating dubbing job in ElevenLabs...");
3838
3936
  const elevenLabsLangCode = toISO639_3(toLanguageCode);
3839
- console.warn(`\u{1F50D} Creating dubbing job for asset ${assetId} with language code: ${elevenLabsLangCode}`);
3937
+ const normalizedFromLanguageCode = fromLanguageCode?.trim();
3938
+ const elevenLabsSourceLangCode = normalizedFromLanguageCode ? toISO639_3(normalizedFromLanguageCode) : void 0;
3939
+ console.warn(
3940
+ `\u{1F50D} Creating dubbing job for asset ${assetId}: ${elevenLabsSourceLangCode ?? "auto"} -> ${elevenLabsLangCode}`
3941
+ );
3840
3942
  let dubbingId;
3841
3943
  try {
3842
3944
  dubbingId = await createElevenLabsDubbingJob({
3843
3945
  audioBuffer,
3844
3946
  assetId,
3845
3947
  elevenLabsLangCode,
3948
+ elevenLabsSourceLangCode,
3846
3949
  numSpeakers,
3847
3950
  credentials
3848
3951
  });