noosphere 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -21,11 +21,13 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
21
21
  var index_exports = {};
22
22
  __export(index_exports, {
23
23
  AudioCraftProvider: () => AudioCraftProvider,
24
+ GoogleMediaProvider: () => GoogleMediaProvider,
24
25
  HfLocalProvider: () => HfLocalProvider,
25
26
  Noosphere: () => Noosphere,
26
27
  NoosphereError: () => NoosphereError,
27
28
  OllamaProvider: () => OllamaProvider,
28
29
  OpenAICompatProvider: () => OpenAICompatProvider,
30
+ OpenAIMediaProvider: () => OpenAIMediaProvider,
29
31
  PROVIDER_IDS: () => PROVIDER_IDS,
30
32
  PROVIDER_LOGOS: () => PROVIDER_LOGOS,
31
33
  WhisperLocalProvider: () => WhisperLocalProvider,
@@ -342,20 +344,22 @@ var Registry = class {
342
344
  const cached = this.modelCache.get(provider);
343
345
  return cached?.models.find((m) => m.id === modelId) ?? null;
344
346
  }
345
- async syncProvider(providerId) {
347
+ async syncProvider(providerId, modality) {
346
348
  const provider = this.providers.get(providerId);
347
349
  if (!provider) return 0;
348
- const models = await provider.listModels();
350
+ if (modality && !provider.modalities.includes(modality)) return 0;
351
+ const models = await provider.listModels(modality);
349
352
  this.modelCache.set(providerId, { models, syncedAt: Date.now() });
350
353
  return models.length;
351
354
  }
352
- async syncAll() {
355
+ async syncAll(modality) {
353
356
  const byProvider = {};
354
357
  const errors = [];
355
358
  let synced = 0;
356
359
  for (const provider of this.providers.values()) {
360
+ if (modality && !provider.modalities.includes(modality)) continue;
357
361
  try {
358
- const count = await this.syncProvider(provider.id);
362
+ const count = await this.syncProvider(provider.id, modality);
359
363
  byProvider[provider.id] = count;
360
364
  synced += count;
361
365
  } catch (err) {
@@ -1140,6 +1144,7 @@ var ComfyUIProvider = class {
1140
1144
  id: `civitai-${item.id}`,
1141
1145
  provider: "comfyui",
1142
1146
  name: item.name ?? `CivitAI Model ${item.id}`,
1147
+ description: item.description ? item.description.replace(/<[^>]+>/g, "").trim().slice(0, 300) || void 0 : void 0,
1143
1148
  modality: "image",
1144
1149
  local: true,
1145
1150
  cost: { price: 0, unit: "free" },
@@ -1578,6 +1583,28 @@ async function fetchJson(url, options) {
1578
1583
  clearTimeout(timer);
1579
1584
  }
1580
1585
  }
1586
+ async function fetchOllamaDescriptions() {
1587
+ const controller = new AbortController();
1588
+ const timer = setTimeout(() => controller.abort(), 5e3);
1589
+ try {
1590
+ const res = await fetch("https://ollama.com/library", { signal: controller.signal });
1591
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
1592
+ const html = await res.text();
1593
+ const descriptions = /* @__PURE__ */ new Map();
1594
+ const cardRegex = /href="\/library\/([^"]+)"[\s\S]*?<p[^>]*>([\s\S]*?)<\/p>/g;
1595
+ let match;
1596
+ while ((match = cardRegex.exec(html)) !== null) {
1597
+ const modelName = match[1].trim();
1598
+ const desc = match[2].replace(/<[^>]*>/g, "").replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/&#39;/g, "'").replace(/\s+/g, " ").trim();
1599
+ if (modelName && desc) {
1600
+ descriptions.set(modelName, desc);
1601
+ }
1602
+ }
1603
+ return descriptions;
1604
+ } finally {
1605
+ clearTimeout(timer);
1606
+ }
1607
+ }
1581
1608
  var OllamaProvider = class {
1582
1609
  id = "ollama";
1583
1610
  name = "Ollama (Local)";
@@ -1601,10 +1628,11 @@ var OllamaProvider = class {
1601
1628
  }
1602
1629
  async listModels(_modality) {
1603
1630
  if (_modality && _modality !== "llm") return [];
1604
- const [localData, catalogData, runningData] = await Promise.all([
1631
+ const [localData, catalogData, runningData, descriptions] = await Promise.all([
1605
1632
  fetchJson(`${this.baseUrl}/api/tags`, { timeoutMs: 5e3 }).catch(() => null),
1606
1633
  fetchJson("https://ollama.com/api/tags", { timeoutMs: 5e3 }).catch(() => null),
1607
- fetchJson(`${this.baseUrl}/api/ps`, { timeoutMs: 5e3 }).catch(() => null)
1634
+ fetchJson(`${this.baseUrl}/api/ps`, { timeoutMs: 5e3 }).catch(() => null),
1635
+ fetchOllamaDescriptions().catch(() => /* @__PURE__ */ new Map())
1608
1636
  ]);
1609
1637
  const runningNames = /* @__PURE__ */ new Set();
1610
1638
  if (runningData?.models) {
@@ -1617,27 +1645,30 @@ var OllamaProvider = class {
1617
1645
  if (localData?.models) {
1618
1646
  for (const m of localData.models) {
1619
1647
  const isRunning = runningNames.has(m.name) || runningNames.has(m.model);
1620
- models.set(m.name, this.toModelInfo(m, isRunning ? "running" : "installed", true));
1648
+ models.set(m.name, this.toModelInfo(m, isRunning ? "running" : "installed", true, descriptions));
1621
1649
  }
1622
1650
  }
1623
1651
  if (catalogData?.models) {
1624
1652
  for (const m of catalogData.models) {
1625
1653
  const name = m.name;
1626
1654
  if (!models.has(name)) {
1627
- models.set(name, this.toModelInfo(m, "available", false));
1655
+ models.set(name, this.toModelInfo(m, "available", false, descriptions));
1628
1656
  }
1629
1657
  }
1630
1658
  }
1631
1659
  return Array.from(models.values());
1632
1660
  }
1633
- toModelInfo(m, status, isLocal) {
1661
+ toModelInfo(m, status, isLocal, descriptions) {
1634
1662
  const name = m.name ?? m.model ?? "unknown";
1635
1663
  const family = m.details?.family;
1636
1664
  const logoProvider = inferLogoProvider(name, family);
1665
+ const baseName = name.split(":")[0];
1666
+ const description = descriptions?.get(baseName);
1637
1667
  return {
1638
1668
  id: name,
1639
1669
  provider: "ollama",
1640
1670
  name,
1671
+ ...description ? { description } : {},
1641
1672
  modality: "llm",
1642
1673
  local: true,
1643
1674
  cost: { price: 0, unit: "free" },
@@ -1846,16 +1877,79 @@ var OllamaProvider = class {
1846
1877
  }
1847
1878
  };
1848
1879
 
1880
+ // src/utils/parse-readme.ts
1881
+ async function fetchReadmeDescription(modelId, timeoutMs = 5e3) {
1882
+ try {
1883
+ const controller = new AbortController();
1884
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
1885
+ try {
1886
+ const res = await fetch(
1887
+ `https://huggingface.co/${modelId}/raw/main/README.md`,
1888
+ { signal: controller.signal }
1889
+ );
1890
+ if (!res.ok) return void 0;
1891
+ const text = await res.text();
1892
+ return parseReadmeDescription(text);
1893
+ } finally {
1894
+ clearTimeout(timer);
1895
+ }
1896
+ } catch {
1897
+ return void 0;
1898
+ }
1899
+ }
1900
+ function parseReadmeDescription(readme) {
1901
+ const withoutFrontmatter = readme.replace(/^---[\s\S]*?---\s*/, "");
1902
+ const lines = withoutFrontmatter.split("\n");
1903
+ let paragraph = "";
1904
+ for (const line of lines) {
1905
+ const trimmed = line.trim();
1906
+ if (!trimmed) {
1907
+ if (paragraph) break;
1908
+ continue;
1909
+ }
1910
+ if (trimmed.startsWith("#")) {
1911
+ if (paragraph) break;
1912
+ continue;
1913
+ }
1914
+ if (/^\[?!\[/.test(trimmed) || /^</.test(trimmed)) continue;
1915
+ if (/^\[.*\]\(.*\)$/.test(trimmed)) continue;
1916
+ paragraph += (paragraph ? " " : "") + trimmed;
1917
+ }
1918
+ if (!paragraph) return void 0;
1919
+ paragraph = paragraph.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1").replace(/\*\*([^*]+)\*\*/g, "$1").replace(/`([^`]+)`/g, "$1").replace(/<[^>]+>/g, "").trim();
1920
+ if (!paragraph) return void 0;
1921
+ if (paragraph.length > 300) paragraph = paragraph.slice(0, 297) + "...";
1922
+ return paragraph;
1923
+ }
1924
+
1849
1925
  // src/providers/hf-local.ts
1850
1926
  var import_promises = require("fs/promises");
1851
1927
  var import_node_path = require("path");
1852
1928
  var import_node_os = require("os");
1853
1929
  var FETCH_TIMEOUT_MS3 = 5e3;
1854
1930
  var HF_HUB_API2 = "https://huggingface.co/api/models";
1931
+ var HF_ORG_TO_LOGO_PROVIDER = {
1932
+ "meta-llama": "meta",
1933
+ "facebook": "meta",
1934
+ "google": "google",
1935
+ "microsoft": "microsoft",
1936
+ "nvidia": "nvidia",
1937
+ "mistralai": "mistral",
1938
+ "Qwen": "qwen",
1939
+ "deepseek-ai": "deepseek",
1940
+ "openai": "openai",
1941
+ "CohereForAI": "cohere",
1942
+ "rhasspy": "piper",
1943
+ "stabilityai": "huggingface",
1944
+ "black-forest-labs": "huggingface",
1945
+ "tiiuae": "huggingface",
1946
+ "allenai": "huggingface",
1947
+ "Salesforce": "huggingface"
1948
+ };
1855
1949
  var PIPELINE_TAG_TO_MODALITY = {
1856
1950
  "text-to-image": "image",
1857
1951
  "text-to-video": "video",
1858
- "text-to-audio": "tts",
1952
+ "text-to-audio": "music",
1859
1953
  "text-to-speech": "tts",
1860
1954
  "automatic-speech-recognition": "stt"
1861
1955
  };
@@ -1883,7 +1977,7 @@ async function fetchJsonTimeout(url, timeoutMs = FETCH_TIMEOUT_MS3) {
1883
1977
  var HfLocalProvider = class {
1884
1978
  id = "hf-local";
1885
1979
  name = "HuggingFace Local Models";
1886
- modalities = ["image", "video", "tts", "stt"];
1980
+ modalities = ["image", "video", "tts", "stt", "music"];
1887
1981
  isLocal = true;
1888
1982
  cachedModels = null;
1889
1983
  async ping() {
@@ -1905,8 +1999,7 @@ var HfLocalProvider = class {
1905
1999
  }
1906
2000
  async fetchCatalog() {
1907
2001
  const seen = /* @__PURE__ */ new Set();
1908
- const models = [];
1909
- const logo = getProviderLogo("huggingface");
2002
+ const entries = [];
1910
2003
  const results = await Promise.allSettled(
1911
2004
  CATALOG_QUERIES.map(async (q) => {
1912
2005
  const params = new URLSearchParams({
@@ -1924,32 +2017,56 @@ var HfLocalProvider = class {
1924
2017
  const id = entry.id ?? entry.modelId;
1925
2018
  if (!id || seen.has(id)) continue;
1926
2019
  seen.add(id);
1927
- const pipelineTag = entry.pipeline_tag ?? "";
1928
- const modality = PIPELINE_TAG_TO_MODALITY[pipelineTag] ?? "image";
1929
- models.push({
2020
+ entries.push({
1930
2021
  id,
1931
- provider: "hf-local",
1932
- name: id.split("/").pop() ?? id,
1933
- modality,
1934
- local: true,
1935
- cost: { price: 0, unit: "free" },
1936
- logo,
1937
- status: "available",
1938
- localInfo: {
1939
- sizeBytes: 0,
1940
- runtime: "huggingface",
1941
- family: entry.library_name
1942
- },
1943
- capabilities: {}
2022
+ pipelineTag: entry.pipeline_tag ?? "",
2023
+ libraryName: entry.library_name
1944
2024
  });
1945
2025
  }
1946
2026
  }
2027
+ const descriptionMap = /* @__PURE__ */ new Map();
2028
+ for (let i = 0; i < entries.length; i += 10) {
2029
+ const batch = entries.slice(i, i + 10);
2030
+ const descs = await Promise.allSettled(
2031
+ batch.map(async (e) => {
2032
+ const desc = await fetchReadmeDescription(e.id);
2033
+ return { id: e.id, desc };
2034
+ })
2035
+ );
2036
+ for (const d of descs) {
2037
+ if (d.status === "fulfilled" && d.value.desc) {
2038
+ descriptionMap.set(d.value.id, d.value.desc);
2039
+ }
2040
+ }
2041
+ }
2042
+ const models = [];
2043
+ for (const e of entries) {
2044
+ const modality = PIPELINE_TAG_TO_MODALITY[e.pipelineTag] ?? "image";
2045
+ const org = e.id.includes("/") ? e.id.split("/")[0] : void 0;
2046
+ const logoProvider = org ? HF_ORG_TO_LOGO_PROVIDER[org] ?? "huggingface" : "huggingface";
2047
+ models.push({
2048
+ id: e.id,
2049
+ provider: "hf-local",
2050
+ name: e.id.split("/").pop() ?? e.id,
2051
+ modality,
2052
+ local: true,
2053
+ cost: { price: 0, unit: "free" },
2054
+ logo: getProviderLogo(logoProvider),
2055
+ description: descriptionMap.get(e.id),
2056
+ status: "available",
2057
+ localInfo: {
2058
+ sizeBytes: 0,
2059
+ runtime: "huggingface",
2060
+ family: e.libraryName
2061
+ },
2062
+ capabilities: {}
2063
+ });
2064
+ }
1947
2065
  return models;
1948
2066
  }
1949
2067
  async scanLocalCache() {
1950
2068
  const models = [];
1951
2069
  const cacheDir = (0, import_node_path.join)((0, import_node_os.homedir)(), ".cache", "huggingface", "hub");
1952
- const logo = getProviderLogo("huggingface");
1953
2070
  try {
1954
2071
  const entries = await (0, import_promises.readdir)(cacheDir, { withFileTypes: true });
1955
2072
  for (const entry of entries) {
@@ -1981,6 +2098,8 @@ var HfLocalProvider = class {
1981
2098
  }
1982
2099
  }
1983
2100
  const modality = PIPELINE_TAG_TO_MODALITY[pipelineTag] ?? "image";
2101
+ const org = modelId.includes("/") ? modelId.split("/")[0] : void 0;
2102
+ const logoProvider = org ? HF_ORG_TO_LOGO_PROVIDER[org] ?? "huggingface" : "huggingface";
1984
2103
  models.push({
1985
2104
  id: modelId,
1986
2105
  provider: "hf-local",
@@ -1988,7 +2107,7 @@ var HfLocalProvider = class {
1988
2107
  modality,
1989
2108
  local: true,
1990
2109
  cost: { price: 0, unit: "free" },
1991
- logo,
2110
+ logo: getProviderLogo(logoProvider),
1992
2111
  status: "installed",
1993
2112
  localInfo: {
1994
2113
  sizeBytes: 0,
@@ -2009,6 +2128,25 @@ var import_promises2 = require("fs/promises");
2009
2128
  var import_node_path2 = require("path");
2010
2129
  var import_node_os2 = require("os");
2011
2130
  var WHISPER_MODELS = ["tiny", "base", "small", "medium", "large", "large-v2", "large-v3", "turbo"];
2131
+ var WHISPER_HF_REPOS = {
2132
+ "tiny": "openai/whisper-tiny",
2133
+ "base": "openai/whisper-base",
2134
+ "small": "openai/whisper-small",
2135
+ "medium": "openai/whisper-medium",
2136
+ "large": "openai/whisper-large",
2137
+ "large-v2": "openai/whisper-large-v2",
2138
+ "large-v3": "openai/whisper-large-v3",
2139
+ "turbo": "openai/whisper-large-v3-turbo"
2140
+ };
2141
+ async function fetchWhisperDescriptions() {
2142
+ const descriptions = /* @__PURE__ */ new Map();
2143
+ const fetches = Object.entries(WHISPER_HF_REPOS).map(async ([size, repo]) => {
2144
+ const desc = await fetchReadmeDescription(repo, 8e3);
2145
+ if (desc) descriptions.set(size, desc);
2146
+ });
2147
+ await Promise.allSettled(fetches);
2148
+ return descriptions;
2149
+ }
2012
2150
  function runPython(code, timeoutMs = 5e3) {
2013
2151
  return new Promise((resolve, reject) => {
2014
2152
  const proc = (0, import_node_child_process.execFile)("python3", ["-c", code], { timeout: timeoutMs }, (err, stdout) => {
@@ -2054,7 +2192,8 @@ var WhisperLocalProvider = class {
2054
2192
  if (_modality && _modality !== "stt") return [];
2055
2193
  const runtime = await this.detectRuntime();
2056
2194
  if (!runtime) return [];
2057
- const logo = getProviderLogo("huggingface");
2195
+ const descMap = await fetchWhisperDescriptions().catch(() => /* @__PURE__ */ new Map());
2196
+ const logo = getProviderLogo("openai");
2058
2197
  const models = [];
2059
2198
  for (const name of WHISPER_MODELS) {
2060
2199
  const installed = await this.isModelCached(name, runtime);
@@ -2062,6 +2201,7 @@ var WhisperLocalProvider = class {
2062
2201
  id: `whisper-${name}`,
2063
2202
  provider: "whisper-local",
2064
2203
  name: `Whisper ${name}`,
2204
+ description: descMap.get(name),
2065
2205
  modality: "stt",
2066
2206
  local: true,
2067
2207
  cost: { price: 0, unit: "free" },
@@ -2125,6 +2265,22 @@ var AUDIOCRAFT_MODELS = [
2125
2265
  { id: "musicgen-melody", name: "MusicGen Melody" },
2126
2266
  { id: "audiogen-medium", name: "AudioGen Medium" }
2127
2267
  ];
2268
+ var AUDIOCRAFT_HF_REPOS = {
2269
+ "musicgen-small": "facebook/musicgen-small",
2270
+ "musicgen-medium": "facebook/musicgen-medium",
2271
+ "musicgen-large": "facebook/musicgen-large",
2272
+ "musicgen-melody": "facebook/musicgen-melody",
2273
+ "audiogen-medium": "facebook/audiogen-medium"
2274
+ };
2275
+ async function fetchAudioCraftDescriptions() {
2276
+ const descriptions = /* @__PURE__ */ new Map();
2277
+ const fetches = Object.entries(AUDIOCRAFT_HF_REPOS).map(async ([id, repo]) => {
2278
+ const desc = await fetchReadmeDescription(repo, 8e3);
2279
+ if (desc) descriptions.set(id, desc);
2280
+ });
2281
+ await Promise.allSettled(fetches);
2282
+ return descriptions;
2283
+ }
2128
2284
  function runPython2(code, timeoutMs = 5e3) {
2129
2285
  return new Promise((resolve, reject) => {
2130
2286
  (0, import_node_child_process2.execFile)("python3", ["-c", code], { timeout: timeoutMs }, (err, stdout) => {
@@ -2160,6 +2316,7 @@ var AudioCraftProvider = class {
2160
2316
  async listModels(_modality) {
2161
2317
  if (_modality && _modality !== "music") return [];
2162
2318
  if (!await this.ping()) return [];
2319
+ const descMap = await fetchAudioCraftDescriptions().catch(() => /* @__PURE__ */ new Map());
2163
2320
  const logo = getProviderLogo("meta");
2164
2321
  const models = [];
2165
2322
  for (const m of AUDIOCRAFT_MODELS) {
@@ -2169,6 +2326,7 @@ var AudioCraftProvider = class {
2169
2326
  id: m.id,
2170
2327
  provider: "audiocraft",
2171
2328
  name: m.name,
2329
+ description: descMap.get(m.id),
2172
2330
  modality: "music",
2173
2331
  local: true,
2174
2332
  cost: { price: 0, unit: "free" },
@@ -2382,6 +2540,501 @@ async function detectOpenAICompatServers() {
2382
2540
  return providers;
2383
2541
  }
2384
2542
 
2543
+ // src/providers/openai-media.ts
2544
+ var OPENAI_API_BASE = "https://api.openai.com/v1";
2545
+ var FETCH_TIMEOUT_MS5 = 8e3;
2546
+ var MODEL_PREFIX_MAP = [
2547
+ { prefix: "dall-e-", modality: "image" },
2548
+ { prefix: "gpt-image-", modality: "image" },
2549
+ { prefix: "sora-", modality: "video" },
2550
+ { prefix: "tts-", modality: "tts" },
2551
+ { prefix: "whisper-", modality: "stt" }
2552
+ ];
2553
+ function classifyModel(id) {
2554
+ for (const { prefix, modality } of MODEL_PREFIX_MAP) {
2555
+ if (id.startsWith(prefix)) return modality;
2556
+ }
2557
+ return null;
2558
+ }
2559
+ var OpenAIMediaProvider = class {
2560
+ constructor(apiKey) {
2561
+ this.apiKey = apiKey;
2562
+ }
2563
+ id = "openai-media";
2564
+ name = "OpenAI (Image, Video, TTS, STT)";
2565
+ modalities = ["image", "video", "tts", "stt"];
2566
+ isLocal = false;
2567
+ modelsCache = null;
2568
+ async ping() {
2569
+ try {
2570
+ const controller = new AbortController();
2571
+ const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS5);
2572
+ try {
2573
+ const res = await fetch(`${OPENAI_API_BASE}/models`, {
2574
+ headers: { Authorization: `Bearer ${this.apiKey}` },
2575
+ signal: controller.signal
2576
+ });
2577
+ return res.ok;
2578
+ } finally {
2579
+ clearTimeout(timer);
2580
+ }
2581
+ } catch {
2582
+ return false;
2583
+ }
2584
+ }
2585
+ async listModels(modality) {
2586
+ if (this.modelsCache) {
2587
+ return modality ? this.modelsCache.filter((m) => m.modality === modality) : this.modelsCache;
2588
+ }
2589
+ try {
2590
+ const controller = new AbortController();
2591
+ const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS5);
2592
+ let data;
2593
+ try {
2594
+ const res = await fetch(`${OPENAI_API_BASE}/models`, {
2595
+ headers: { Authorization: `Bearer ${this.apiKey}` },
2596
+ signal: controller.signal
2597
+ });
2598
+ if (!res.ok) return [];
2599
+ data = await res.json();
2600
+ } finally {
2601
+ clearTimeout(timer);
2602
+ }
2603
+ const entries = data?.data ?? [];
2604
+ const logo = getProviderLogo("openai");
2605
+ const models = [];
2606
+ for (const entry of entries) {
2607
+ const mod = classifyModel(entry.id);
2608
+ if (!mod) continue;
2609
+ const info = {
2610
+ id: entry.id,
2611
+ provider: "openai-media",
2612
+ name: entry.id,
2613
+ modality: mod,
2614
+ local: false,
2615
+ cost: { price: 0, unit: "per_request" },
2616
+ logo,
2617
+ description: entry.description,
2618
+ capabilities: this.getCapabilities(entry.id, mod)
2619
+ };
2620
+ models.push(info);
2621
+ }
2622
+ this.modelsCache = models;
2623
+ return modality ? models.filter((m) => m.modality === modality) : models;
2624
+ } catch {
2625
+ return [];
2626
+ }
2627
+ }
2628
+ async image(options) {
2629
+ const model = options.model ?? "gpt-image-1";
2630
+ const width = options.width ?? 1024;
2631
+ const height = options.height ?? 1024;
2632
+ const start = Date.now();
2633
+ const isGptImage = model.startsWith("gpt-image-");
2634
+ const body = {
2635
+ model,
2636
+ prompt: options.prompt,
2637
+ n: 1,
2638
+ size: `${width}x${height}`
2639
+ };
2640
+ if (!isGptImage) {
2641
+ body.response_format = "url";
2642
+ }
2643
+ const res = await fetch(`${OPENAI_API_BASE}/images/generations`, {
2644
+ method: "POST",
2645
+ headers: {
2646
+ "Content-Type": "application/json",
2647
+ Authorization: `Bearer ${this.apiKey}`
2648
+ },
2649
+ body: JSON.stringify(body)
2650
+ });
2651
+ if (!res.ok) {
2652
+ const errorBody = await res.text();
2653
+ throw new Error(`OpenAI image generation failed (${res.status}): ${errorBody}`);
2654
+ }
2655
+ const data = await res.json();
2656
+ const item = data?.data?.[0];
2657
+ const result = {
2658
+ provider: "openai-media",
2659
+ model,
2660
+ modality: "image",
2661
+ latencyMs: Date.now() - start,
2662
+ usage: {
2663
+ cost: 0,
2664
+ unit: "per_image"
2665
+ },
2666
+ media: {
2667
+ width,
2668
+ height,
2669
+ format: "png"
2670
+ }
2671
+ };
2672
+ if (item?.b64_json) {
2673
+ result.buffer = Buffer.from(item.b64_json, "base64");
2674
+ } else if (item?.url) {
2675
+ result.url = item.url;
2676
+ }
2677
+ return result;
2678
+ }
2679
+ async speak(options) {
2680
+ const model = options.model ?? "tts-1";
2681
+ const voice = options.voice ?? "alloy";
2682
+ const format = options.format ?? "mp3";
2683
+ const speed = options.speed ?? 1;
2684
+ const start = Date.now();
2685
+ const body = {
2686
+ model,
2687
+ input: options.text,
2688
+ voice,
2689
+ response_format: format,
2690
+ speed
2691
+ };
2692
+ const res = await fetch(`${OPENAI_API_BASE}/audio/speech`, {
2693
+ method: "POST",
2694
+ headers: {
2695
+ "Content-Type": "application/json",
2696
+ Authorization: `Bearer ${this.apiKey}`
2697
+ },
2698
+ body: JSON.stringify(body)
2699
+ });
2700
+ if (!res.ok) {
2701
+ const errorBody = await res.text();
2702
+ throw new Error(`OpenAI TTS failed (${res.status}): ${errorBody}`);
2703
+ }
2704
+ const arrayBuffer = await res.arrayBuffer();
2705
+ const buffer = Buffer.from(arrayBuffer);
2706
+ return {
2707
+ buffer,
2708
+ provider: "openai-media",
2709
+ model,
2710
+ modality: "tts",
2711
+ latencyMs: Date.now() - start,
2712
+ usage: {
2713
+ cost: 0,
2714
+ input: options.text.length,
2715
+ unit: "per_1k_chars"
2716
+ },
2717
+ media: {
2718
+ format
2719
+ }
2720
+ };
2721
+ }
2722
+ async video(options) {
2723
+ const model = options.model ?? "sora-2";
2724
+ const start = Date.now();
2725
+ const body = {
2726
+ model,
2727
+ prompt: options.prompt,
2728
+ n: 1
2729
+ };
2730
+ if (options.duration) body.duration = options.duration;
2731
+ if (options.width && options.height) body.size = `${options.width}x${options.height}`;
2732
+ const res = await fetch(`${OPENAI_API_BASE}/videos/generations`, {
2733
+ method: "POST",
2734
+ headers: {
2735
+ "Content-Type": "application/json",
2736
+ Authorization: `Bearer ${this.apiKey}`
2737
+ },
2738
+ body: JSON.stringify(body)
2739
+ });
2740
+ if (!res.ok) {
2741
+ const errorBody = await res.text();
2742
+ throw new Error(`OpenAI video generation failed (${res.status}): ${errorBody}`);
2743
+ }
2744
+ const data = await res.json();
2745
+ const videoUrl = data?.data?.[0]?.url;
2746
+ return {
2747
+ url: videoUrl,
2748
+ provider: "openai-media",
2749
+ model,
2750
+ modality: "video",
2751
+ latencyMs: Date.now() - start,
2752
+ usage: {
2753
+ cost: 0,
2754
+ unit: "per_video"
2755
+ },
2756
+ media: {
2757
+ duration: options.duration,
2758
+ width: options.width,
2759
+ height: options.height
2760
+ }
2761
+ };
2762
+ }
2763
+ getCapabilities(id, modality) {
2764
+ if (modality === "image") {
2765
+ return {
2766
+ maxWidth: id.startsWith("dall-e-3") ? 1792 : 1024,
2767
+ maxHeight: id.startsWith("dall-e-3") ? 1792 : 1024
2768
+ };
2769
+ }
2770
+ if (modality === "tts") {
2771
+ return {
2772
+ voices: ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"]
2773
+ };
2774
+ }
2775
+ if (modality === "video") {
2776
+ return {
2777
+ maxDuration: id.includes("pro") ? 20 : 10,
2778
+ supportsStreaming: false
2779
+ };
2780
+ }
2781
+ if (modality === "stt") {
2782
+ return {
2783
+ languages: ["en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl", "ca", "nl", "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk", "el", "ms", "cs", "ro", "da", "hu", "ta", "no", "th", "ur", "hr", "bg", "lt", "la", "mi", "ml", "cy", "sk", "te", "fa", "lv", "bn", "sr", "az", "sl", "kn", "et", "mk", "br", "eu", "is", "hy", "ne", "mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km", "sn", "yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi", "lo", "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my", "bo", "tl", "mg", "as", "tt", "haw", "ln", "ha", "ba", "jw", "su"]
2784
+ };
2785
+ }
2786
+ return void 0;
2787
+ }
2788
+ };
2789
+
2790
+ // src/providers/google-media.ts
2791
+ var GOOGLE_API_BASE = "https://generativelanguage.googleapis.com/v1beta";
2792
+ var FETCH_TIMEOUT_MS6 = 8e3;
2793
+ var GOOGLE_TTS_VOICES = [
2794
+ "Aoede",
2795
+ "Charon",
2796
+ "Fenrir",
2797
+ "Kore",
2798
+ "Puck",
2799
+ "Leda",
2800
+ "Orus",
2801
+ "Perseus",
2802
+ "Zephyr",
2803
+ "Callirrhoe"
2804
+ ];
2805
+ function classifyGoogleModel(model) {
2806
+ const name = (model.name ?? "").replace("models/", "");
2807
+ const methods = model.supportedGenerationMethods ?? [];
2808
+ if (name.startsWith("imagen") && methods.includes("predict")) return "image";
2809
+ if (name.startsWith("veo") && methods.includes("predictLongRunning")) return "video";
2810
+ if (name.includes("-tts") && methods.includes("generateContent")) return "tts";
2811
+ return null;
2812
+ }
2813
+ var GoogleMediaProvider = class {
2814
+ constructor(apiKey) {
2815
+ this.apiKey = apiKey;
2816
+ }
2817
+ id = "google-media";
2818
+ name = "Google (Image, Video, TTS)";
2819
+ modalities = ["image", "video", "tts"];
2820
+ isLocal = false;
2821
+ modelsCache = null;
2822
+ async ping() {
2823
+ try {
2824
+ const controller = new AbortController();
2825
+ const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS6);
2826
+ try {
2827
+ const res = await fetch(`${GOOGLE_API_BASE}/models?key=${this.apiKey}`, {
2828
+ signal: controller.signal
2829
+ });
2830
+ return res.ok;
2831
+ } finally {
2832
+ clearTimeout(timer);
2833
+ }
2834
+ } catch {
2835
+ return false;
2836
+ }
2837
+ }
2838
+ async listModels(modality) {
2839
+ if (this.modelsCache) {
2840
+ return modality ? this.modelsCache.filter((m) => m.modality === modality) : this.modelsCache;
2841
+ }
2842
+ try {
2843
+ const controller = new AbortController();
2844
+ const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS6);
2845
+ let data;
2846
+ try {
2847
+ const res = await fetch(`${GOOGLE_API_BASE}/models?key=${this.apiKey}`, {
2848
+ signal: controller.signal
2849
+ });
2850
+ if (!res.ok) return [];
2851
+ data = await res.json();
2852
+ } finally {
2853
+ clearTimeout(timer);
2854
+ }
2855
+ const entries = data?.models ?? [];
2856
+ const logo = getProviderLogo("google");
2857
+ const models = [];
2858
+ for (const entry of entries) {
2859
+ const modality2 = classifyGoogleModel(entry);
2860
+ if (!modality2) continue;
2861
+ const fullName = entry.name ?? "";
2862
+ const modelId = fullName.startsWith("models/") ? fullName.slice("models/".length) : fullName;
2863
+ const info = {
2864
+ id: modelId,
2865
+ provider: "google-media",
2866
+ name: entry.displayName ?? modelId,
2867
+ modality: modality2,
2868
+ local: false,
2869
+ cost: { price: 0, unit: modality2 === "video" ? "per_video" : "per_image" },
2870
+ logo,
2871
+ description: entry.description,
2872
+ capabilities: modality2 === "video" ? { maxDuration: 8, supportsStreaming: false } : modality2 === "tts" ? { voices: GOOGLE_TTS_VOICES } : void 0
2873
+ };
2874
+ models.push(info);
2875
+ }
2876
+ this.modelsCache = models;
2877
+ return modality ? models.filter((m) => m.modality === modality) : models;
2878
+ } catch {
2879
+ return [];
2880
+ }
2881
+ }
2882
+ async image(options) {
2883
+ const model = options.model ?? "imagen-4.0-generate-001";
2884
+ const start = Date.now();
2885
+ const body = {
2886
+ instances: [{ prompt: options.prompt }],
2887
+ parameters: {
2888
+ sampleCount: 1
2889
+ }
2890
+ };
2891
+ const res = await fetch(
2892
+ `${GOOGLE_API_BASE}/models/${model}:predict?key=${this.apiKey}`,
2893
+ {
2894
+ method: "POST",
2895
+ headers: { "Content-Type": "application/json" },
2896
+ body: JSON.stringify(body)
2897
+ }
2898
+ );
2899
+ if (!res.ok) {
2900
+ const errorBody = await res.text();
2901
+ throw new Error(`Google image generation failed (${res.status}): ${errorBody}`);
2902
+ }
2903
+ const data = await res.json();
2904
+ const base64 = data?.predictions?.[0]?.bytesBase64Encoded ?? data?.generatedImages?.[0]?.image?.imageBytes;
2905
+ if (!base64) {
2906
+ throw new Error("Google image generation returned no image data");
2907
+ }
2908
+ const buffer = Buffer.from(base64, "base64");
2909
+ return {
2910
+ buffer,
2911
+ provider: "google-media",
2912
+ model,
2913
+ modality: "image",
2914
+ latencyMs: Date.now() - start,
2915
+ usage: {
2916
+ cost: 0,
2917
+ unit: "per_image"
2918
+ },
2919
+ media: {
2920
+ format: "png"
2921
+ }
2922
+ };
2923
+ }
2924
+ async speak(options) {
2925
+ const model = options.model ?? "gemini-2.5-flash-preview-tts";
2926
+ const voice = options.voice ?? "Kore";
2927
+ const start = Date.now();
2928
+ const body = {
2929
+ contents: [{ parts: [{ text: options.text }] }],
2930
+ generationConfig: {
2931
+ response_modalities: ["AUDIO"],
2932
+ speech_config: {
2933
+ voiceConfig: {
2934
+ prebuiltVoiceConfig: { voiceName: voice }
2935
+ }
2936
+ }
2937
+ }
2938
+ };
2939
+ const res = await fetch(
2940
+ `${GOOGLE_API_BASE}/models/${model}:generateContent?key=${this.apiKey}`,
2941
+ {
2942
+ method: "POST",
2943
+ headers: { "Content-Type": "application/json" },
2944
+ body: JSON.stringify(body)
2945
+ }
2946
+ );
2947
+ if (!res.ok) {
2948
+ const errorBody = await res.text();
2949
+ throw new Error(`Google TTS failed (${res.status}): ${errorBody}`);
2950
+ }
2951
+ const data = await res.json();
2952
+ const inlineData = data?.candidates?.[0]?.content?.parts?.[0]?.inlineData;
2953
+ if (!inlineData?.data) {
2954
+ throw new Error("Google TTS returned no audio data");
2955
+ }
2956
+ const buffer = Buffer.from(inlineData.data, "base64");
2957
+ return {
2958
+ buffer,
2959
+ provider: "google-media",
2960
+ model,
2961
+ modality: "tts",
2962
+ latencyMs: Date.now() - start,
2963
+ usage: {
2964
+ cost: 0,
2965
+ input: options.text.length,
2966
+ unit: "per_1k_chars"
2967
+ },
2968
+ media: {
2969
+ format: "wav"
2970
+ // Google returns PCM L16, essentially WAV
2971
+ }
2972
+ };
2973
+ }
2974
+ async video(options) {
2975
+ const model = options.model ?? "veo-3.0-generate-001";
2976
+ const start = Date.now();
2977
+ const body = {
2978
+ instances: [{ prompt: options.prompt }],
2979
+ parameters: {
2980
+ sampleCount: 1
2981
+ }
2982
+ };
2983
+ if (options.duration) body.parameters.durationSeconds = options.duration;
2984
+ const res = await fetch(
2985
+ `${GOOGLE_API_BASE}/models/${model}:predictLongRunning?key=${this.apiKey}`,
2986
+ {
2987
+ method: "POST",
2988
+ headers: { "Content-Type": "application/json" },
2989
+ body: JSON.stringify(body)
2990
+ }
2991
+ );
2992
+ if (!res.ok) {
2993
+ const errorBody = await res.text();
2994
+ throw new Error(`Google video generation failed (${res.status}): ${errorBody}`);
2995
+ }
2996
+ const operation = await res.json();
2997
+ const operationName = operation?.name;
2998
+ if (!operationName) {
2999
+ throw new Error("Google video generation returned no operation name");
3000
+ }
3001
+ const deadline = Date.now() + 3e5;
3002
+ while (Date.now() < deadline) {
3003
+ await new Promise((r) => setTimeout(r, 5e3));
3004
+ const pollRes = await fetch(
3005
+ `${GOOGLE_API_BASE}/${operationName}?key=${this.apiKey}`
3006
+ );
3007
+ if (!pollRes.ok) continue;
3008
+ const status = await pollRes.json();
3009
+ if (status.done) {
3010
+ const videoBase64 = status.response?.generatedSamples?.[0]?.video?.bytesBase64Encoded;
3011
+ if (videoBase64) {
3012
+ return {
3013
+ buffer: Buffer.from(videoBase64, "base64"),
3014
+ provider: "google-media",
3015
+ model,
3016
+ modality: "video",
3017
+ latencyMs: Date.now() - start,
3018
+ usage: { cost: 0, unit: "per_video" },
3019
+ media: { format: "mp4", duration: options.duration }
3020
+ };
3021
+ }
3022
+ const videoUrl = status.response?.generatedSamples?.[0]?.video?.uri;
3023
+ return {
3024
+ url: videoUrl,
3025
+ provider: "google-media",
3026
+ model,
3027
+ modality: "video",
3028
+ latencyMs: Date.now() - start,
3029
+ usage: { cost: 0, unit: "per_video" },
3030
+ media: { format: "mp4", duration: options.duration }
3031
+ };
3032
+ }
3033
+ }
3034
+ throw new Error(`Google video generation timed out after 5 minutes`);
3035
+ }
3036
+ };
3037
+
2385
3038
  // src/noosphere.ts
2386
3039
  var Noosphere = class {
2387
3040
  config;
@@ -2564,9 +3217,9 @@ var Noosphere = class {
2564
3217
  if (!this.initialized) await this.init();
2565
3218
  return this.registry.getModel(provider, modelId);
2566
3219
  }
2567
- async syncModels() {
3220
+ async syncModels(modality) {
2568
3221
  if (!this.initialized) await this.init();
2569
- return this.registry.syncAll();
3222
+ return this.registry.syncAll(modality);
2570
3223
  }
2571
3224
  // --- Tracking Methods ---
2572
3225
  getUsage(options) {
@@ -2624,6 +3277,12 @@ var Noosphere = class {
2624
3277
  if (hasAnyLLMKey) {
2625
3278
  this.registry.addProvider(new PiAiProvider(llmKeys));
2626
3279
  }
3280
+ if (keys.openai) {
3281
+ this.registry.addProvider(new OpenAIMediaProvider(keys.openai));
3282
+ }
3283
+ if (keys.google) {
3284
+ this.registry.addProvider(new GoogleMediaProvider(keys.google));
3285
+ }
2627
3286
  if (keys.fal) {
2628
3287
  this.registry.addProvider(new FalProvider(keys.fal));
2629
3288
  }
@@ -2803,11 +3462,13 @@ var Noosphere = class {
2803
3462
  // Annotate the CommonJS export names for ESM import in node:
2804
3463
  0 && (module.exports = {
2805
3464
  AudioCraftProvider,
3465
+ GoogleMediaProvider,
2806
3466
  HfLocalProvider,
2807
3467
  Noosphere,
2808
3468
  NoosphereError,
2809
3469
  OllamaProvider,
2810
3470
  OpenAICompatProvider,
3471
+ OpenAIMediaProvider,
2811
3472
  PROVIDER_IDS,
2812
3473
  PROVIDER_LOGOS,
2813
3474
  WhisperLocalProvider,