noosphere 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -2716,6 +2716,36 @@ var OpenAIMediaProvider = class {
2716
2716
  modalities = ["image", "video", "tts", "stt"];
2717
2717
  isLocal = false;
2718
2718
  modelsCache = null;
2719
+ voicesCache = null;
2720
+ /** Auto-fetch available TTS voices by sending an invalid voice and parsing the error. */
2721
+ async fetchVoices() {
2722
+ if (this.voicesCache) return this.voicesCache;
2723
+ try {
2724
+ const res = await fetch(`${OPENAI_API_BASE}/audio/speech`, {
2725
+ method: "POST",
2726
+ headers: {
2727
+ "Content-Type": "application/json",
2728
+ Authorization: `Bearer ${this.apiKey}`
2729
+ },
2730
+ body: JSON.stringify({ model: "tts-1", input: ".", voice: "__discover_voices__" })
2731
+ });
2732
+ if (!res.ok) {
2733
+ const data = await res.json();
2734
+ const msg = data?.error?.message ?? "";
2735
+ const shouldBe = msg.match(/Input should be ([^"]+)/);
2736
+ if (shouldBe) {
2737
+ const voiceList = shouldBe[1].match(/'([a-z]+)'/g);
2738
+ if (voiceList && voiceList.length > 0) {
2739
+ this.voicesCache = voiceList.map((v) => v.replace(/'/g, ""));
2740
+ return this.voicesCache;
2741
+ }
2742
+ }
2743
+ }
2744
+ } catch {
2745
+ }
2746
+ this.voicesCache = [];
2747
+ return this.voicesCache;
2748
+ }
2719
2749
  async ping() {
2720
2750
  try {
2721
2751
  const controller = new AbortController();
@@ -2751,6 +2781,7 @@ var OpenAIMediaProvider = class {
2751
2781
  } finally {
2752
2782
  clearTimeout(timer);
2753
2783
  }
2784
+ const voices = await this.fetchVoices();
2754
2785
  const entries = data?.data ?? [];
2755
2786
  const logo = getProviderLogo("openai");
2756
2787
  const models = [];
@@ -2766,7 +2797,7 @@ var OpenAIMediaProvider = class {
2766
2797
  cost: { price: 0, unit: "per_request" },
2767
2798
  logo,
2768
2799
  description: entry.description,
2769
- capabilities: this.getCapabilities(entry.id, mod)
2800
+ capabilities: this.getCapabilities(entry.id, mod, voices)
2770
2801
  };
2771
2802
  models.push(info);
2772
2803
  }
@@ -2911,7 +2942,7 @@ var OpenAIMediaProvider = class {
2911
2942
  }
2912
2943
  };
2913
2944
  }
2914
- getCapabilities(id, modality) {
2945
+ getCapabilities(id, modality, voices) {
2915
2946
  if (modality === "image") {
2916
2947
  return {
2917
2948
  maxWidth: id.startsWith("dall-e-3") ? 1792 : 1024,
@@ -2920,7 +2951,7 @@ var OpenAIMediaProvider = class {
2920
2951
  }
2921
2952
  if (modality === "tts") {
2922
2953
  return {
2923
- voices: ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"]
2954
+ voices: voices && voices.length > 0 ? voices : void 0
2924
2955
  };
2925
2956
  }
2926
2957
  if (modality === "video") {
@@ -2941,18 +2972,34 @@ var OpenAIMediaProvider = class {
2941
2972
  // src/providers/google-media.ts
2942
2973
  var GOOGLE_API_BASE = "https://generativelanguage.googleapis.com/v1beta";
2943
2974
  var FETCH_TIMEOUT_MS6 = 8e3;
2944
- var GOOGLE_TTS_VOICES = [
2945
- "Aoede",
2946
- "Charon",
2947
- "Fenrir",
2948
- "Kore",
2949
- "Puck",
2950
- "Leda",
2951
- "Orus",
2952
- "Perseus",
2953
- "Zephyr",
2954
- "Callirrhoe"
2955
- ];
2975
+ async function fetchGoogleVoices(apiKey) {
2976
+ try {
2977
+ const res = await fetch(
2978
+ `${GOOGLE_API_BASE}/models/gemini-2.5-flash-preview-tts:generateContent?key=${apiKey}`,
2979
+ {
2980
+ method: "POST",
2981
+ headers: { "Content-Type": "application/json" },
2982
+ body: JSON.stringify({
2983
+ contents: [{ parts: [{ text: "." }] }],
2984
+ generationConfig: {
2985
+ response_modalities: ["AUDIO"],
2986
+ speech_config: { voiceConfig: { prebuiltVoiceConfig: { voiceName: "__discover_voices__" } } }
2987
+ }
2988
+ })
2989
+ }
2990
+ );
2991
+ if (!res.ok) {
2992
+ const data = await res.json();
2993
+ const msg = data?.error?.message ?? "";
2994
+ const match = msg.match(/Allowed voice names are:\s*(.+)/i);
2995
+ if (match) {
2996
+ return match[1].split(",").map((v) => v.trim()).filter(Boolean);
2997
+ }
2998
+ }
2999
+ } catch {
3000
+ }
3001
+ return [];
3002
+ }
2956
3003
  function classifyGoogleModel(model) {
2957
3004
  const name = (model.name ?? "").replace("models/", "");
2958
3005
  const methods = model.supportedGenerationMethods ?? [];
@@ -2970,6 +3017,7 @@ var GoogleMediaProvider = class {
2970
3017
  modalities = ["image", "video", "tts"];
2971
3018
  isLocal = false;
2972
3019
  modelsCache = null;
3020
+ voicesCache = null;
2973
3021
  async ping() {
2974
3022
  try {
2975
3023
  const controller = new AbortController();
@@ -3004,6 +3052,9 @@ var GoogleMediaProvider = class {
3004
3052
  clearTimeout(timer);
3005
3053
  }
3006
3054
  const entries = data?.models ?? [];
3055
+ if (!this.voicesCache) {
3056
+ this.voicesCache = await fetchGoogleVoices(this.apiKey);
3057
+ }
3007
3058
  const logo = getProviderLogo("google");
3008
3059
  const models = [];
3009
3060
  for (const entry of entries) {
@@ -3020,7 +3071,7 @@ var GoogleMediaProvider = class {
3020
3071
  cost: { price: 0, unit: modality2 === "video" ? "per_video" : "per_image" },
3021
3072
  logo,
3022
3073
  description: entry.description,
3023
- capabilities: modality2 === "video" ? { maxDuration: 8, supportsStreaming: false } : modality2 === "tts" ? { voices: GOOGLE_TTS_VOICES } : void 0
3074
+ capabilities: modality2 === "video" ? { maxDuration: 8, supportsStreaming: false } : modality2 === "tts" ? { voices: this.voicesCache && this.voicesCache.length > 0 ? this.voicesCache : void 0 } : void 0
3024
3075
  };
3025
3076
  models.push(info);
3026
3077
  }