noosphere 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -470,7 +470,10 @@ declare class OpenAIMediaProvider implements NoosphereProvider {
470
470
  readonly modalities: Modality[];
471
471
  readonly isLocal = false;
472
472
  private modelsCache;
473
+ private voicesCache;
473
474
  constructor(apiKey: string);
475
+ /** Auto-fetch available TTS voices by sending an invalid voice and parsing the error. */
476
+ private fetchVoices;
474
477
  ping(): Promise<boolean>;
475
478
  listModels(modality?: Modality): Promise<ModelInfo[]>;
476
479
  image(options: ImageOptions): Promise<NoosphereResult>;
@@ -486,6 +489,7 @@ declare class GoogleMediaProvider implements NoosphereProvider {
486
489
  readonly modalities: Modality[];
487
490
  readonly isLocal = false;
488
491
  private modelsCache;
492
+ private voicesCache;
489
493
  constructor(apiKey: string);
490
494
  ping(): Promise<boolean>;
491
495
  listModels(modality?: Modality): Promise<ModelInfo[]>;
package/dist/index.d.ts CHANGED
@@ -470,7 +470,10 @@ declare class OpenAIMediaProvider implements NoosphereProvider {
470
470
  readonly modalities: Modality[];
471
471
  readonly isLocal = false;
472
472
  private modelsCache;
473
+ private voicesCache;
473
474
  constructor(apiKey: string);
475
+ /** Auto-fetch available TTS voices by sending an invalid voice and parsing the error. */
476
+ private fetchVoices;
474
477
  ping(): Promise<boolean>;
475
478
  listModels(modality?: Modality): Promise<ModelInfo[]>;
476
479
  image(options: ImageOptions): Promise<NoosphereResult>;
@@ -486,6 +489,7 @@ declare class GoogleMediaProvider implements NoosphereProvider {
486
489
  readonly modalities: Modality[];
487
490
  readonly isLocal = false;
488
491
  private modelsCache;
492
+ private voicesCache;
489
493
  constructor(apiKey: string);
490
494
  ping(): Promise<boolean>;
491
495
  listModels(modality?: Modality): Promise<ModelInfo[]>;
package/dist/index.js CHANGED
@@ -2663,6 +2663,36 @@ var OpenAIMediaProvider = class {
2663
2663
  modalities = ["image", "video", "tts", "stt"];
2664
2664
  isLocal = false;
2665
2665
  modelsCache = null;
2666
+ voicesCache = null;
2667
+ /** Auto-fetch available TTS voices by sending an invalid voice and parsing the error. */
2668
+ async fetchVoices() {
2669
+ if (this.voicesCache) return this.voicesCache;
2670
+ try {
2671
+ const res = await fetch(`${OPENAI_API_BASE}/audio/speech`, {
2672
+ method: "POST",
2673
+ headers: {
2674
+ "Content-Type": "application/json",
2675
+ Authorization: `Bearer ${this.apiKey}`
2676
+ },
2677
+ body: JSON.stringify({ model: "tts-1", input: ".", voice: "__discover_voices__" })
2678
+ });
2679
+ if (!res.ok) {
2680
+ const data = await res.json();
2681
+ const msg = data?.error?.message ?? "";
2682
+ const shouldBe = msg.match(/Input should be ([^"]+)/);
2683
+ if (shouldBe) {
2684
+ const voiceList = shouldBe[1].match(/'([a-z]+)'/g);
2685
+ if (voiceList && voiceList.length > 0) {
2686
+ this.voicesCache = voiceList.map((v) => v.replace(/'/g, ""));
2687
+ return this.voicesCache;
2688
+ }
2689
+ }
2690
+ }
2691
+ } catch {
2692
+ }
2693
+ this.voicesCache = [];
2694
+ return this.voicesCache;
2695
+ }
2666
2696
  async ping() {
2667
2697
  try {
2668
2698
  const controller = new AbortController();
@@ -2698,6 +2728,7 @@ var OpenAIMediaProvider = class {
2698
2728
  } finally {
2699
2729
  clearTimeout(timer);
2700
2730
  }
2731
+ const voices = await this.fetchVoices();
2701
2732
  const entries = data?.data ?? [];
2702
2733
  const logo = getProviderLogo("openai");
2703
2734
  const models = [];
@@ -2713,7 +2744,7 @@ var OpenAIMediaProvider = class {
2713
2744
  cost: { price: 0, unit: "per_request" },
2714
2745
  logo,
2715
2746
  description: entry.description,
2716
- capabilities: this.getCapabilities(entry.id, mod)
2747
+ capabilities: this.getCapabilities(entry.id, mod, voices)
2717
2748
  };
2718
2749
  models.push(info);
2719
2750
  }
@@ -2858,7 +2889,7 @@ var OpenAIMediaProvider = class {
2858
2889
  }
2859
2890
  };
2860
2891
  }
2861
- getCapabilities(id, modality) {
2892
+ getCapabilities(id, modality, voices) {
2862
2893
  if (modality === "image") {
2863
2894
  return {
2864
2895
  maxWidth: id.startsWith("dall-e-3") ? 1792 : 1024,
@@ -2867,7 +2898,7 @@ var OpenAIMediaProvider = class {
2867
2898
  }
2868
2899
  if (modality === "tts") {
2869
2900
  return {
2870
- voices: ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"]
2901
+ voices: voices && voices.length > 0 ? voices : void 0
2871
2902
  };
2872
2903
  }
2873
2904
  if (modality === "video") {
@@ -2888,18 +2919,34 @@ var OpenAIMediaProvider = class {
2888
2919
  // src/providers/google-media.ts
2889
2920
  var GOOGLE_API_BASE = "https://generativelanguage.googleapis.com/v1beta";
2890
2921
  var FETCH_TIMEOUT_MS6 = 8e3;
2891
- var GOOGLE_TTS_VOICES = [
2892
- "Aoede",
2893
- "Charon",
2894
- "Fenrir",
2895
- "Kore",
2896
- "Puck",
2897
- "Leda",
2898
- "Orus",
2899
- "Perseus",
2900
- "Zephyr",
2901
- "Callirrhoe"
2902
- ];
2922
+ async function fetchGoogleVoices(apiKey) {
2923
+ try {
2924
+ const res = await fetch(
2925
+ `${GOOGLE_API_BASE}/models/gemini-2.5-flash-preview-tts:generateContent?key=${apiKey}`,
2926
+ {
2927
+ method: "POST",
2928
+ headers: { "Content-Type": "application/json" },
2929
+ body: JSON.stringify({
2930
+ contents: [{ parts: [{ text: "." }] }],
2931
+ generationConfig: {
2932
+ response_modalities: ["AUDIO"],
2933
+ speech_config: { voiceConfig: { prebuiltVoiceConfig: { voiceName: "__discover_voices__" } } }
2934
+ }
2935
+ })
2936
+ }
2937
+ );
2938
+ if (!res.ok) {
2939
+ const data = await res.json();
2940
+ const msg = data?.error?.message ?? "";
2941
+ const match = msg.match(/Allowed voice names are:\s*(.+)/i);
2942
+ if (match) {
2943
+ return match[1].split(",").map((v) => v.trim()).filter(Boolean);
2944
+ }
2945
+ }
2946
+ } catch {
2947
+ }
2948
+ return [];
2949
+ }
2903
2950
  function classifyGoogleModel(model) {
2904
2951
  const name = (model.name ?? "").replace("models/", "");
2905
2952
  const methods = model.supportedGenerationMethods ?? [];
@@ -2917,6 +2964,7 @@ var GoogleMediaProvider = class {
2917
2964
  modalities = ["image", "video", "tts"];
2918
2965
  isLocal = false;
2919
2966
  modelsCache = null;
2967
+ voicesCache = null;
2920
2968
  async ping() {
2921
2969
  try {
2922
2970
  const controller = new AbortController();
@@ -2951,6 +2999,9 @@ var GoogleMediaProvider = class {
2951
2999
  clearTimeout(timer);
2952
3000
  }
2953
3001
  const entries = data?.models ?? [];
3002
+ if (!this.voicesCache) {
3003
+ this.voicesCache = await fetchGoogleVoices(this.apiKey);
3004
+ }
2954
3005
  const logo = getProviderLogo("google");
2955
3006
  const models = [];
2956
3007
  for (const entry of entries) {
@@ -2967,7 +3018,7 @@ var GoogleMediaProvider = class {
2967
3018
  cost: { price: 0, unit: modality2 === "video" ? "per_video" : "per_image" },
2968
3019
  logo,
2969
3020
  description: entry.description,
2970
- capabilities: modality2 === "video" ? { maxDuration: 8, supportsStreaming: false } : modality2 === "tts" ? { voices: GOOGLE_TTS_VOICES } : void 0
3021
+ capabilities: modality2 === "video" ? { maxDuration: 8, supportsStreaming: false } : modality2 === "tts" ? { voices: this.voicesCache && this.voicesCache.length > 0 ? this.voicesCache : void 0 } : void 0
2971
3022
  };
2972
3023
  models.push(info);
2973
3024
  }