noosphere 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -8
- package/dist/index.cjs +505 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +32 -1
- package/dist/index.d.ts +32 -1
- package/dist/index.js +503 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -407,6 +407,37 @@ declare class OpenAICompatProvider implements NoosphereProvider {
|
|
|
407
407
|
/** Auto-detect running OpenAI-compatible servers on common ports */
|
|
408
408
|
declare function detectOpenAICompatServers(): Promise<OpenAICompatProvider[]>;
|
|
409
409
|
|
|
410
|
+
declare class OpenAIMediaProvider implements NoosphereProvider {
|
|
411
|
+
private apiKey;
|
|
412
|
+
readonly id = "openai-media";
|
|
413
|
+
readonly name = "OpenAI (Image, Video, TTS, STT)";
|
|
414
|
+
readonly modalities: Modality[];
|
|
415
|
+
readonly isLocal = false;
|
|
416
|
+
private modelsCache;
|
|
417
|
+
constructor(apiKey: string);
|
|
418
|
+
ping(): Promise<boolean>;
|
|
419
|
+
listModels(modality?: Modality): Promise<ModelInfo[]>;
|
|
420
|
+
image(options: ImageOptions): Promise<NoosphereResult>;
|
|
421
|
+
speak(options: SpeakOptions): Promise<NoosphereResult>;
|
|
422
|
+
video(options: VideoOptions): Promise<NoosphereResult>;
|
|
423
|
+
private getCapabilities;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
declare class GoogleMediaProvider implements NoosphereProvider {
|
|
427
|
+
private apiKey;
|
|
428
|
+
readonly id = "google-media";
|
|
429
|
+
readonly name = "Google (Image, Video, TTS)";
|
|
430
|
+
readonly modalities: Modality[];
|
|
431
|
+
readonly isLocal = false;
|
|
432
|
+
private modelsCache;
|
|
433
|
+
constructor(apiKey: string);
|
|
434
|
+
ping(): Promise<boolean>;
|
|
435
|
+
listModels(modality?: Modality): Promise<ModelInfo[]>;
|
|
436
|
+
image(options: ImageOptions): Promise<NoosphereResult>;
|
|
437
|
+
speak(options: SpeakOptions): Promise<NoosphereResult>;
|
|
438
|
+
video(options: VideoOptions): Promise<NoosphereResult>;
|
|
439
|
+
}
|
|
440
|
+
|
|
410
441
|
interface ProviderLogo {
|
|
411
442
|
svg?: string;
|
|
412
443
|
png?: string;
|
|
@@ -425,4 +456,4 @@ declare function getProviderLogo(providerId: string | undefined | null): Provide
|
|
|
425
456
|
declare function getAllProviderLogos(): Record<string, ProviderLogo>;
|
|
426
457
|
declare const PROVIDER_LOGOS: Record<string, ProviderLogo>;
|
|
427
458
|
|
|
428
|
-
export { AudioCraftProvider, type BaseOptions, type ChatOptions, HfLocalProvider, type ImageOptions, type LocalModelInfo, type LocalServiceConfig, type Modality, type ModelInfo, type ModelStatus, type MusicOptions, Noosphere, type NoosphereConfig, NoosphereError, type NoosphereErrorCode, type NoosphereProvider, type NoosphereResult, type NoosphereStream, type OllamaModelDetail, OllamaProvider, type OllamaPullProgress, type OllamaRunningModel, type OpenAICompatConfig, OpenAICompatProvider, PROVIDER_IDS, PROVIDER_LOGOS, type ProviderInfo, type ProviderLogo$1 as ProviderLogo, type SpeakOptions, type StreamEvent, type SyncResult, type TranscriptionOptions, type TranscriptionResult, type UsageEvent, type UsageQueryOptions, type UsageSummary, type VideoOptions, WhisperLocalProvider, detectOpenAICompatServers, getAllProviderLogos, getProviderLogo };
|
|
459
|
+
export { AudioCraftProvider, type BaseOptions, type ChatOptions, GoogleMediaProvider, HfLocalProvider, type ImageOptions, type LocalModelInfo, type LocalServiceConfig, type Modality, type ModelInfo, type ModelStatus, type MusicOptions, Noosphere, type NoosphereConfig, NoosphereError, type NoosphereErrorCode, type NoosphereProvider, type NoosphereResult, type NoosphereStream, type OllamaModelDetail, OllamaProvider, type OllamaPullProgress, type OllamaRunningModel, type OpenAICompatConfig, OpenAICompatProvider, OpenAIMediaProvider, PROVIDER_IDS, PROVIDER_LOGOS, type ProviderInfo, type ProviderLogo$1 as ProviderLogo, type SpeakOptions, type StreamEvent, type SyncResult, type TranscriptionOptions, type TranscriptionResult, type UsageEvent, type UsageQueryOptions, type UsageSummary, type VideoOptions, WhisperLocalProvider, detectOpenAICompatServers, getAllProviderLogos, getProviderLogo };
|
package/dist/index.d.ts
CHANGED
|
@@ -407,6 +407,37 @@ declare class OpenAICompatProvider implements NoosphereProvider {
|
|
|
407
407
|
/** Auto-detect running OpenAI-compatible servers on common ports */
|
|
408
408
|
declare function detectOpenAICompatServers(): Promise<OpenAICompatProvider[]>;
|
|
409
409
|
|
|
410
|
+
declare class OpenAIMediaProvider implements NoosphereProvider {
|
|
411
|
+
private apiKey;
|
|
412
|
+
readonly id = "openai-media";
|
|
413
|
+
readonly name = "OpenAI (Image, Video, TTS, STT)";
|
|
414
|
+
readonly modalities: Modality[];
|
|
415
|
+
readonly isLocal = false;
|
|
416
|
+
private modelsCache;
|
|
417
|
+
constructor(apiKey: string);
|
|
418
|
+
ping(): Promise<boolean>;
|
|
419
|
+
listModels(modality?: Modality): Promise<ModelInfo[]>;
|
|
420
|
+
image(options: ImageOptions): Promise<NoosphereResult>;
|
|
421
|
+
speak(options: SpeakOptions): Promise<NoosphereResult>;
|
|
422
|
+
video(options: VideoOptions): Promise<NoosphereResult>;
|
|
423
|
+
private getCapabilities;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
declare class GoogleMediaProvider implements NoosphereProvider {
|
|
427
|
+
private apiKey;
|
|
428
|
+
readonly id = "google-media";
|
|
429
|
+
readonly name = "Google (Image, Video, TTS)";
|
|
430
|
+
readonly modalities: Modality[];
|
|
431
|
+
readonly isLocal = false;
|
|
432
|
+
private modelsCache;
|
|
433
|
+
constructor(apiKey: string);
|
|
434
|
+
ping(): Promise<boolean>;
|
|
435
|
+
listModels(modality?: Modality): Promise<ModelInfo[]>;
|
|
436
|
+
image(options: ImageOptions): Promise<NoosphereResult>;
|
|
437
|
+
speak(options: SpeakOptions): Promise<NoosphereResult>;
|
|
438
|
+
video(options: VideoOptions): Promise<NoosphereResult>;
|
|
439
|
+
}
|
|
440
|
+
|
|
410
441
|
interface ProviderLogo {
|
|
411
442
|
svg?: string;
|
|
412
443
|
png?: string;
|
|
@@ -425,4 +456,4 @@ declare function getProviderLogo(providerId: string | undefined | null): Provide
|
|
|
425
456
|
declare function getAllProviderLogos(): Record<string, ProviderLogo>;
|
|
426
457
|
declare const PROVIDER_LOGOS: Record<string, ProviderLogo>;
|
|
427
458
|
|
|
428
|
-
export { AudioCraftProvider, type BaseOptions, type ChatOptions, HfLocalProvider, type ImageOptions, type LocalModelInfo, type LocalServiceConfig, type Modality, type ModelInfo, type ModelStatus, type MusicOptions, Noosphere, type NoosphereConfig, NoosphereError, type NoosphereErrorCode, type NoosphereProvider, type NoosphereResult, type NoosphereStream, type OllamaModelDetail, OllamaProvider, type OllamaPullProgress, type OllamaRunningModel, type OpenAICompatConfig, OpenAICompatProvider, PROVIDER_IDS, PROVIDER_LOGOS, type ProviderInfo, type ProviderLogo$1 as ProviderLogo, type SpeakOptions, type StreamEvent, type SyncResult, type TranscriptionOptions, type TranscriptionResult, type UsageEvent, type UsageQueryOptions, type UsageSummary, type VideoOptions, WhisperLocalProvider, detectOpenAICompatServers, getAllProviderLogos, getProviderLogo };
|
|
459
|
+
export { AudioCraftProvider, type BaseOptions, type ChatOptions, GoogleMediaProvider, HfLocalProvider, type ImageOptions, type LocalModelInfo, type LocalServiceConfig, type Modality, type ModelInfo, type ModelStatus, type MusicOptions, Noosphere, type NoosphereConfig, NoosphereError, type NoosphereErrorCode, type NoosphereProvider, type NoosphereResult, type NoosphereStream, type OllamaModelDetail, OllamaProvider, type OllamaPullProgress, type OllamaRunningModel, type OpenAICompatConfig, OpenAICompatProvider, OpenAIMediaProvider, PROVIDER_IDS, PROVIDER_LOGOS, type ProviderInfo, type ProviderLogo$1 as ProviderLogo, type SpeakOptions, type StreamEvent, type SyncResult, type TranscriptionOptions, type TranscriptionResult, type UsageEvent, type UsageQueryOptions, type UsageSummary, type VideoOptions, WhisperLocalProvider, detectOpenAICompatServers, getAllProviderLogos, getProviderLogo };
|
package/dist/index.js
CHANGED
|
@@ -2508,6 +2508,501 @@ async function detectOpenAICompatServers() {
|
|
|
2508
2508
|
return providers;
|
|
2509
2509
|
}
|
|
2510
2510
|
|
|
2511
|
+
// src/providers/openai-media.ts
|
|
2512
|
+
var OPENAI_API_BASE = "https://api.openai.com/v1";
|
|
2513
|
+
var FETCH_TIMEOUT_MS5 = 8e3;
|
|
2514
|
+
var MODEL_PREFIX_MAP = [
|
|
2515
|
+
{ prefix: "dall-e-", modality: "image" },
|
|
2516
|
+
{ prefix: "gpt-image-", modality: "image" },
|
|
2517
|
+
{ prefix: "sora-", modality: "video" },
|
|
2518
|
+
{ prefix: "tts-", modality: "tts" },
|
|
2519
|
+
{ prefix: "whisper-", modality: "stt" }
|
|
2520
|
+
];
|
|
2521
|
+
function classifyModel(id) {
|
|
2522
|
+
for (const { prefix, modality } of MODEL_PREFIX_MAP) {
|
|
2523
|
+
if (id.startsWith(prefix)) return modality;
|
|
2524
|
+
}
|
|
2525
|
+
return null;
|
|
2526
|
+
}
|
|
2527
|
+
var OpenAIMediaProvider = class {
|
|
2528
|
+
constructor(apiKey) {
|
|
2529
|
+
this.apiKey = apiKey;
|
|
2530
|
+
}
|
|
2531
|
+
id = "openai-media";
|
|
2532
|
+
name = "OpenAI (Image, Video, TTS, STT)";
|
|
2533
|
+
modalities = ["image", "video", "tts", "stt"];
|
|
2534
|
+
isLocal = false;
|
|
2535
|
+
modelsCache = null;
|
|
2536
|
+
async ping() {
|
|
2537
|
+
try {
|
|
2538
|
+
const controller = new AbortController();
|
|
2539
|
+
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS5);
|
|
2540
|
+
try {
|
|
2541
|
+
const res = await fetch(`${OPENAI_API_BASE}/models`, {
|
|
2542
|
+
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
2543
|
+
signal: controller.signal
|
|
2544
|
+
});
|
|
2545
|
+
return res.ok;
|
|
2546
|
+
} finally {
|
|
2547
|
+
clearTimeout(timer);
|
|
2548
|
+
}
|
|
2549
|
+
} catch {
|
|
2550
|
+
return false;
|
|
2551
|
+
}
|
|
2552
|
+
}
|
|
2553
|
+
async listModels(modality) {
|
|
2554
|
+
if (this.modelsCache) {
|
|
2555
|
+
return modality ? this.modelsCache.filter((m) => m.modality === modality) : this.modelsCache;
|
|
2556
|
+
}
|
|
2557
|
+
try {
|
|
2558
|
+
const controller = new AbortController();
|
|
2559
|
+
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS5);
|
|
2560
|
+
let data;
|
|
2561
|
+
try {
|
|
2562
|
+
const res = await fetch(`${OPENAI_API_BASE}/models`, {
|
|
2563
|
+
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
2564
|
+
signal: controller.signal
|
|
2565
|
+
});
|
|
2566
|
+
if (!res.ok) return [];
|
|
2567
|
+
data = await res.json();
|
|
2568
|
+
} finally {
|
|
2569
|
+
clearTimeout(timer);
|
|
2570
|
+
}
|
|
2571
|
+
const entries = data?.data ?? [];
|
|
2572
|
+
const logo = getProviderLogo("openai");
|
|
2573
|
+
const models = [];
|
|
2574
|
+
for (const entry of entries) {
|
|
2575
|
+
const mod = classifyModel(entry.id);
|
|
2576
|
+
if (!mod) continue;
|
|
2577
|
+
const info = {
|
|
2578
|
+
id: entry.id,
|
|
2579
|
+
provider: "openai-media",
|
|
2580
|
+
name: entry.id,
|
|
2581
|
+
modality: mod,
|
|
2582
|
+
local: false,
|
|
2583
|
+
cost: { price: 0, unit: "per_request" },
|
|
2584
|
+
logo,
|
|
2585
|
+
description: entry.description,
|
|
2586
|
+
capabilities: this.getCapabilities(entry.id, mod)
|
|
2587
|
+
};
|
|
2588
|
+
models.push(info);
|
|
2589
|
+
}
|
|
2590
|
+
this.modelsCache = models;
|
|
2591
|
+
return modality ? models.filter((m) => m.modality === modality) : models;
|
|
2592
|
+
} catch {
|
|
2593
|
+
return [];
|
|
2594
|
+
}
|
|
2595
|
+
}
|
|
2596
|
+
async image(options) {
|
|
2597
|
+
const model = options.model ?? "gpt-image-1";
|
|
2598
|
+
const width = options.width ?? 1024;
|
|
2599
|
+
const height = options.height ?? 1024;
|
|
2600
|
+
const start = Date.now();
|
|
2601
|
+
const isGptImage = model.startsWith("gpt-image-");
|
|
2602
|
+
const body = {
|
|
2603
|
+
model,
|
|
2604
|
+
prompt: options.prompt,
|
|
2605
|
+
n: 1,
|
|
2606
|
+
size: `${width}x${height}`
|
|
2607
|
+
};
|
|
2608
|
+
if (!isGptImage) {
|
|
2609
|
+
body.response_format = "url";
|
|
2610
|
+
}
|
|
2611
|
+
const res = await fetch(`${OPENAI_API_BASE}/images/generations`, {
|
|
2612
|
+
method: "POST",
|
|
2613
|
+
headers: {
|
|
2614
|
+
"Content-Type": "application/json",
|
|
2615
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
2616
|
+
},
|
|
2617
|
+
body: JSON.stringify(body)
|
|
2618
|
+
});
|
|
2619
|
+
if (!res.ok) {
|
|
2620
|
+
const errorBody = await res.text();
|
|
2621
|
+
throw new Error(`OpenAI image generation failed (${res.status}): ${errorBody}`);
|
|
2622
|
+
}
|
|
2623
|
+
const data = await res.json();
|
|
2624
|
+
const item = data?.data?.[0];
|
|
2625
|
+
const result = {
|
|
2626
|
+
provider: "openai-media",
|
|
2627
|
+
model,
|
|
2628
|
+
modality: "image",
|
|
2629
|
+
latencyMs: Date.now() - start,
|
|
2630
|
+
usage: {
|
|
2631
|
+
cost: 0,
|
|
2632
|
+
unit: "per_image"
|
|
2633
|
+
},
|
|
2634
|
+
media: {
|
|
2635
|
+
width,
|
|
2636
|
+
height,
|
|
2637
|
+
format: "png"
|
|
2638
|
+
}
|
|
2639
|
+
};
|
|
2640
|
+
if (item?.b64_json) {
|
|
2641
|
+
result.buffer = Buffer.from(item.b64_json, "base64");
|
|
2642
|
+
} else if (item?.url) {
|
|
2643
|
+
result.url = item.url;
|
|
2644
|
+
}
|
|
2645
|
+
return result;
|
|
2646
|
+
}
|
|
2647
|
+
async speak(options) {
|
|
2648
|
+
const model = options.model ?? "tts-1";
|
|
2649
|
+
const voice = options.voice ?? "alloy";
|
|
2650
|
+
const format = options.format ?? "mp3";
|
|
2651
|
+
const speed = options.speed ?? 1;
|
|
2652
|
+
const start = Date.now();
|
|
2653
|
+
const body = {
|
|
2654
|
+
model,
|
|
2655
|
+
input: options.text,
|
|
2656
|
+
voice,
|
|
2657
|
+
response_format: format,
|
|
2658
|
+
speed
|
|
2659
|
+
};
|
|
2660
|
+
const res = await fetch(`${OPENAI_API_BASE}/audio/speech`, {
|
|
2661
|
+
method: "POST",
|
|
2662
|
+
headers: {
|
|
2663
|
+
"Content-Type": "application/json",
|
|
2664
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
2665
|
+
},
|
|
2666
|
+
body: JSON.stringify(body)
|
|
2667
|
+
});
|
|
2668
|
+
if (!res.ok) {
|
|
2669
|
+
const errorBody = await res.text();
|
|
2670
|
+
throw new Error(`OpenAI TTS failed (${res.status}): ${errorBody}`);
|
|
2671
|
+
}
|
|
2672
|
+
const arrayBuffer = await res.arrayBuffer();
|
|
2673
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
2674
|
+
return {
|
|
2675
|
+
buffer,
|
|
2676
|
+
provider: "openai-media",
|
|
2677
|
+
model,
|
|
2678
|
+
modality: "tts",
|
|
2679
|
+
latencyMs: Date.now() - start,
|
|
2680
|
+
usage: {
|
|
2681
|
+
cost: 0,
|
|
2682
|
+
input: options.text.length,
|
|
2683
|
+
unit: "per_1k_chars"
|
|
2684
|
+
},
|
|
2685
|
+
media: {
|
|
2686
|
+
format
|
|
2687
|
+
}
|
|
2688
|
+
};
|
|
2689
|
+
}
|
|
2690
|
+
async video(options) {
|
|
2691
|
+
const model = options.model ?? "sora-2";
|
|
2692
|
+
const start = Date.now();
|
|
2693
|
+
const body = {
|
|
2694
|
+
model,
|
|
2695
|
+
prompt: options.prompt,
|
|
2696
|
+
n: 1
|
|
2697
|
+
};
|
|
2698
|
+
if (options.duration) body.duration = options.duration;
|
|
2699
|
+
if (options.width && options.height) body.size = `${options.width}x${options.height}`;
|
|
2700
|
+
const res = await fetch(`${OPENAI_API_BASE}/videos/generations`, {
|
|
2701
|
+
method: "POST",
|
|
2702
|
+
headers: {
|
|
2703
|
+
"Content-Type": "application/json",
|
|
2704
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
2705
|
+
},
|
|
2706
|
+
body: JSON.stringify(body)
|
|
2707
|
+
});
|
|
2708
|
+
if (!res.ok) {
|
|
2709
|
+
const errorBody = await res.text();
|
|
2710
|
+
throw new Error(`OpenAI video generation failed (${res.status}): ${errorBody}`);
|
|
2711
|
+
}
|
|
2712
|
+
const data = await res.json();
|
|
2713
|
+
const videoUrl = data?.data?.[0]?.url;
|
|
2714
|
+
return {
|
|
2715
|
+
url: videoUrl,
|
|
2716
|
+
provider: "openai-media",
|
|
2717
|
+
model,
|
|
2718
|
+
modality: "video",
|
|
2719
|
+
latencyMs: Date.now() - start,
|
|
2720
|
+
usage: {
|
|
2721
|
+
cost: 0,
|
|
2722
|
+
unit: "per_video"
|
|
2723
|
+
},
|
|
2724
|
+
media: {
|
|
2725
|
+
duration: options.duration,
|
|
2726
|
+
width: options.width,
|
|
2727
|
+
height: options.height
|
|
2728
|
+
}
|
|
2729
|
+
};
|
|
2730
|
+
}
|
|
2731
|
+
getCapabilities(id, modality) {
|
|
2732
|
+
if (modality === "image") {
|
|
2733
|
+
return {
|
|
2734
|
+
maxWidth: id.startsWith("dall-e-3") ? 1792 : 1024,
|
|
2735
|
+
maxHeight: id.startsWith("dall-e-3") ? 1792 : 1024
|
|
2736
|
+
};
|
|
2737
|
+
}
|
|
2738
|
+
if (modality === "tts") {
|
|
2739
|
+
return {
|
|
2740
|
+
voices: ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"]
|
|
2741
|
+
};
|
|
2742
|
+
}
|
|
2743
|
+
if (modality === "video") {
|
|
2744
|
+
return {
|
|
2745
|
+
maxDuration: id.includes("pro") ? 20 : 10,
|
|
2746
|
+
supportsStreaming: false
|
|
2747
|
+
};
|
|
2748
|
+
}
|
|
2749
|
+
if (modality === "stt") {
|
|
2750
|
+
return {
|
|
2751
|
+
languages: ["en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl", "ca", "nl", "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk", "el", "ms", "cs", "ro", "da", "hu", "ta", "no", "th", "ur", "hr", "bg", "lt", "la", "mi", "ml", "cy", "sk", "te", "fa", "lv", "bn", "sr", "az", "sl", "kn", "et", "mk", "br", "eu", "is", "hy", "ne", "mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km", "sn", "yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi", "lo", "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my", "bo", "tl", "mg", "as", "tt", "haw", "ln", "ha", "ba", "jw", "su"]
|
|
2752
|
+
};
|
|
2753
|
+
}
|
|
2754
|
+
return void 0;
|
|
2755
|
+
}
|
|
2756
|
+
};
|
|
2757
|
+
|
|
2758
|
+
// src/providers/google-media.ts
|
|
2759
|
+
var GOOGLE_API_BASE = "https://generativelanguage.googleapis.com/v1beta";
|
|
2760
|
+
var FETCH_TIMEOUT_MS6 = 8e3;
|
|
2761
|
+
var GOOGLE_TTS_VOICES = [
|
|
2762
|
+
"Aoede",
|
|
2763
|
+
"Charon",
|
|
2764
|
+
"Fenrir",
|
|
2765
|
+
"Kore",
|
|
2766
|
+
"Puck",
|
|
2767
|
+
"Leda",
|
|
2768
|
+
"Orus",
|
|
2769
|
+
"Perseus",
|
|
2770
|
+
"Zephyr",
|
|
2771
|
+
"Callirrhoe"
|
|
2772
|
+
];
|
|
2773
|
+
function classifyGoogleModel(model) {
|
|
2774
|
+
const name = (model.name ?? "").replace("models/", "");
|
|
2775
|
+
const methods = model.supportedGenerationMethods ?? [];
|
|
2776
|
+
if (name.startsWith("imagen") && methods.includes("predict")) return "image";
|
|
2777
|
+
if (name.startsWith("veo") && methods.includes("predictLongRunning")) return "video";
|
|
2778
|
+
if (name.includes("-tts") && methods.includes("generateContent")) return "tts";
|
|
2779
|
+
return null;
|
|
2780
|
+
}
|
|
2781
|
+
var GoogleMediaProvider = class {
|
|
2782
|
+
constructor(apiKey) {
|
|
2783
|
+
this.apiKey = apiKey;
|
|
2784
|
+
}
|
|
2785
|
+
id = "google-media";
|
|
2786
|
+
name = "Google (Image, Video, TTS)";
|
|
2787
|
+
modalities = ["image", "video", "tts"];
|
|
2788
|
+
isLocal = false;
|
|
2789
|
+
modelsCache = null;
|
|
2790
|
+
async ping() {
|
|
2791
|
+
try {
|
|
2792
|
+
const controller = new AbortController();
|
|
2793
|
+
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS6);
|
|
2794
|
+
try {
|
|
2795
|
+
const res = await fetch(`${GOOGLE_API_BASE}/models?key=${this.apiKey}`, {
|
|
2796
|
+
signal: controller.signal
|
|
2797
|
+
});
|
|
2798
|
+
return res.ok;
|
|
2799
|
+
} finally {
|
|
2800
|
+
clearTimeout(timer);
|
|
2801
|
+
}
|
|
2802
|
+
} catch {
|
|
2803
|
+
return false;
|
|
2804
|
+
}
|
|
2805
|
+
}
|
|
2806
|
+
async listModels(modality) {
|
|
2807
|
+
if (this.modelsCache) {
|
|
2808
|
+
return modality ? this.modelsCache.filter((m) => m.modality === modality) : this.modelsCache;
|
|
2809
|
+
}
|
|
2810
|
+
try {
|
|
2811
|
+
const controller = new AbortController();
|
|
2812
|
+
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS6);
|
|
2813
|
+
let data;
|
|
2814
|
+
try {
|
|
2815
|
+
const res = await fetch(`${GOOGLE_API_BASE}/models?key=${this.apiKey}`, {
|
|
2816
|
+
signal: controller.signal
|
|
2817
|
+
});
|
|
2818
|
+
if (!res.ok) return [];
|
|
2819
|
+
data = await res.json();
|
|
2820
|
+
} finally {
|
|
2821
|
+
clearTimeout(timer);
|
|
2822
|
+
}
|
|
2823
|
+
const entries = data?.models ?? [];
|
|
2824
|
+
const logo = getProviderLogo("google");
|
|
2825
|
+
const models = [];
|
|
2826
|
+
for (const entry of entries) {
|
|
2827
|
+
const modality2 = classifyGoogleModel(entry);
|
|
2828
|
+
if (!modality2) continue;
|
|
2829
|
+
const fullName = entry.name ?? "";
|
|
2830
|
+
const modelId = fullName.startsWith("models/") ? fullName.slice("models/".length) : fullName;
|
|
2831
|
+
const info = {
|
|
2832
|
+
id: modelId,
|
|
2833
|
+
provider: "google-media",
|
|
2834
|
+
name: entry.displayName ?? modelId,
|
|
2835
|
+
modality: modality2,
|
|
2836
|
+
local: false,
|
|
2837
|
+
cost: { price: 0, unit: modality2 === "video" ? "per_video" : "per_image" },
|
|
2838
|
+
logo,
|
|
2839
|
+
description: entry.description,
|
|
2840
|
+
capabilities: modality2 === "video" ? { maxDuration: 8, supportsStreaming: false } : modality2 === "tts" ? { voices: GOOGLE_TTS_VOICES } : void 0
|
|
2841
|
+
};
|
|
2842
|
+
models.push(info);
|
|
2843
|
+
}
|
|
2844
|
+
this.modelsCache = models;
|
|
2845
|
+
return modality ? models.filter((m) => m.modality === modality) : models;
|
|
2846
|
+
} catch {
|
|
2847
|
+
return [];
|
|
2848
|
+
}
|
|
2849
|
+
}
|
|
2850
|
+
async image(options) {
|
|
2851
|
+
const model = options.model ?? "imagen-4.0-generate-001";
|
|
2852
|
+
const start = Date.now();
|
|
2853
|
+
const body = {
|
|
2854
|
+
instances: [{ prompt: options.prompt }],
|
|
2855
|
+
parameters: {
|
|
2856
|
+
sampleCount: 1
|
|
2857
|
+
}
|
|
2858
|
+
};
|
|
2859
|
+
const res = await fetch(
|
|
2860
|
+
`${GOOGLE_API_BASE}/models/${model}:predict?key=${this.apiKey}`,
|
|
2861
|
+
{
|
|
2862
|
+
method: "POST",
|
|
2863
|
+
headers: { "Content-Type": "application/json" },
|
|
2864
|
+
body: JSON.stringify(body)
|
|
2865
|
+
}
|
|
2866
|
+
);
|
|
2867
|
+
if (!res.ok) {
|
|
2868
|
+
const errorBody = await res.text();
|
|
2869
|
+
throw new Error(`Google image generation failed (${res.status}): ${errorBody}`);
|
|
2870
|
+
}
|
|
2871
|
+
const data = await res.json();
|
|
2872
|
+
const base64 = data?.predictions?.[0]?.bytesBase64Encoded ?? data?.generatedImages?.[0]?.image?.imageBytes;
|
|
2873
|
+
if (!base64) {
|
|
2874
|
+
throw new Error("Google image generation returned no image data");
|
|
2875
|
+
}
|
|
2876
|
+
const buffer = Buffer.from(base64, "base64");
|
|
2877
|
+
return {
|
|
2878
|
+
buffer,
|
|
2879
|
+
provider: "google-media",
|
|
2880
|
+
model,
|
|
2881
|
+
modality: "image",
|
|
2882
|
+
latencyMs: Date.now() - start,
|
|
2883
|
+
usage: {
|
|
2884
|
+
cost: 0,
|
|
2885
|
+
unit: "per_image"
|
|
2886
|
+
},
|
|
2887
|
+
media: {
|
|
2888
|
+
format: "png"
|
|
2889
|
+
}
|
|
2890
|
+
};
|
|
2891
|
+
}
|
|
2892
|
+
async speak(options) {
|
|
2893
|
+
const model = options.model ?? "gemini-2.5-flash-preview-tts";
|
|
2894
|
+
const voice = options.voice ?? "Kore";
|
|
2895
|
+
const start = Date.now();
|
|
2896
|
+
const body = {
|
|
2897
|
+
contents: [{ parts: [{ text: options.text }] }],
|
|
2898
|
+
generationConfig: {
|
|
2899
|
+
response_modalities: ["AUDIO"],
|
|
2900
|
+
speech_config: {
|
|
2901
|
+
voiceConfig: {
|
|
2902
|
+
prebuiltVoiceConfig: { voiceName: voice }
|
|
2903
|
+
}
|
|
2904
|
+
}
|
|
2905
|
+
}
|
|
2906
|
+
};
|
|
2907
|
+
const res = await fetch(
|
|
2908
|
+
`${GOOGLE_API_BASE}/models/${model}:generateContent?key=${this.apiKey}`,
|
|
2909
|
+
{
|
|
2910
|
+
method: "POST",
|
|
2911
|
+
headers: { "Content-Type": "application/json" },
|
|
2912
|
+
body: JSON.stringify(body)
|
|
2913
|
+
}
|
|
2914
|
+
);
|
|
2915
|
+
if (!res.ok) {
|
|
2916
|
+
const errorBody = await res.text();
|
|
2917
|
+
throw new Error(`Google TTS failed (${res.status}): ${errorBody}`);
|
|
2918
|
+
}
|
|
2919
|
+
const data = await res.json();
|
|
2920
|
+
const inlineData = data?.candidates?.[0]?.content?.parts?.[0]?.inlineData;
|
|
2921
|
+
if (!inlineData?.data) {
|
|
2922
|
+
throw new Error("Google TTS returned no audio data");
|
|
2923
|
+
}
|
|
2924
|
+
const buffer = Buffer.from(inlineData.data, "base64");
|
|
2925
|
+
return {
|
|
2926
|
+
buffer,
|
|
2927
|
+
provider: "google-media",
|
|
2928
|
+
model,
|
|
2929
|
+
modality: "tts",
|
|
2930
|
+
latencyMs: Date.now() - start,
|
|
2931
|
+
usage: {
|
|
2932
|
+
cost: 0,
|
|
2933
|
+
input: options.text.length,
|
|
2934
|
+
unit: "per_1k_chars"
|
|
2935
|
+
},
|
|
2936
|
+
media: {
|
|
2937
|
+
format: "wav"
|
|
2938
|
+
// Google returns PCM L16, essentially WAV
|
|
2939
|
+
}
|
|
2940
|
+
};
|
|
2941
|
+
}
|
|
2942
|
+
async video(options) {
|
|
2943
|
+
const model = options.model ?? "veo-3.0-generate-001";
|
|
2944
|
+
const start = Date.now();
|
|
2945
|
+
const body = {
|
|
2946
|
+
instances: [{ prompt: options.prompt }],
|
|
2947
|
+
parameters: {
|
|
2948
|
+
sampleCount: 1
|
|
2949
|
+
}
|
|
2950
|
+
};
|
|
2951
|
+
if (options.duration) body.parameters.durationSeconds = options.duration;
|
|
2952
|
+
const res = await fetch(
|
|
2953
|
+
`${GOOGLE_API_BASE}/models/${model}:predictLongRunning?key=${this.apiKey}`,
|
|
2954
|
+
{
|
|
2955
|
+
method: "POST",
|
|
2956
|
+
headers: { "Content-Type": "application/json" },
|
|
2957
|
+
body: JSON.stringify(body)
|
|
2958
|
+
}
|
|
2959
|
+
);
|
|
2960
|
+
if (!res.ok) {
|
|
2961
|
+
const errorBody = await res.text();
|
|
2962
|
+
throw new Error(`Google video generation failed (${res.status}): ${errorBody}`);
|
|
2963
|
+
}
|
|
2964
|
+
const operation = await res.json();
|
|
2965
|
+
const operationName = operation?.name;
|
|
2966
|
+
if (!operationName) {
|
|
2967
|
+
throw new Error("Google video generation returned no operation name");
|
|
2968
|
+
}
|
|
2969
|
+
const deadline = Date.now() + 3e5;
|
|
2970
|
+
while (Date.now() < deadline) {
|
|
2971
|
+
await new Promise((r) => setTimeout(r, 5e3));
|
|
2972
|
+
const pollRes = await fetch(
|
|
2973
|
+
`${GOOGLE_API_BASE}/${operationName}?key=${this.apiKey}`
|
|
2974
|
+
);
|
|
2975
|
+
if (!pollRes.ok) continue;
|
|
2976
|
+
const status = await pollRes.json();
|
|
2977
|
+
if (status.done) {
|
|
2978
|
+
const videoBase64 = status.response?.generatedSamples?.[0]?.video?.bytesBase64Encoded;
|
|
2979
|
+
if (videoBase64) {
|
|
2980
|
+
return {
|
|
2981
|
+
buffer: Buffer.from(videoBase64, "base64"),
|
|
2982
|
+
provider: "google-media",
|
|
2983
|
+
model,
|
|
2984
|
+
modality: "video",
|
|
2985
|
+
latencyMs: Date.now() - start,
|
|
2986
|
+
usage: { cost: 0, unit: "per_video" },
|
|
2987
|
+
media: { format: "mp4", duration: options.duration }
|
|
2988
|
+
};
|
|
2989
|
+
}
|
|
2990
|
+
const videoUrl = status.response?.generatedSamples?.[0]?.video?.uri;
|
|
2991
|
+
return {
|
|
2992
|
+
url: videoUrl,
|
|
2993
|
+
provider: "google-media",
|
|
2994
|
+
model,
|
|
2995
|
+
modality: "video",
|
|
2996
|
+
latencyMs: Date.now() - start,
|
|
2997
|
+
usage: { cost: 0, unit: "per_video" },
|
|
2998
|
+
media: { format: "mp4", duration: options.duration }
|
|
2999
|
+
};
|
|
3000
|
+
}
|
|
3001
|
+
}
|
|
3002
|
+
throw new Error(`Google video generation timed out after 5 minutes`);
|
|
3003
|
+
}
|
|
3004
|
+
};
|
|
3005
|
+
|
|
2511
3006
|
// src/noosphere.ts
|
|
2512
3007
|
var Noosphere = class {
|
|
2513
3008
|
config;
|
|
@@ -2750,6 +3245,12 @@ var Noosphere = class {
|
|
|
2750
3245
|
if (hasAnyLLMKey) {
|
|
2751
3246
|
this.registry.addProvider(new PiAiProvider(llmKeys));
|
|
2752
3247
|
}
|
|
3248
|
+
if (keys.openai) {
|
|
3249
|
+
this.registry.addProvider(new OpenAIMediaProvider(keys.openai));
|
|
3250
|
+
}
|
|
3251
|
+
if (keys.google) {
|
|
3252
|
+
this.registry.addProvider(new GoogleMediaProvider(keys.google));
|
|
3253
|
+
}
|
|
2753
3254
|
if (keys.fal) {
|
|
2754
3255
|
this.registry.addProvider(new FalProvider(keys.fal));
|
|
2755
3256
|
}
|
|
@@ -2928,11 +3429,13 @@ var Noosphere = class {
|
|
|
2928
3429
|
};
|
|
2929
3430
|
export {
|
|
2930
3431
|
AudioCraftProvider,
|
|
3432
|
+
GoogleMediaProvider,
|
|
2931
3433
|
HfLocalProvider,
|
|
2932
3434
|
Noosphere,
|
|
2933
3435
|
NoosphereError,
|
|
2934
3436
|
OllamaProvider,
|
|
2935
3437
|
OpenAICompatProvider,
|
|
3438
|
+
OpenAIMediaProvider,
|
|
2936
3439
|
PROVIDER_IDS,
|
|
2937
3440
|
PROVIDER_LOGOS,
|
|
2938
3441
|
WhisperLocalProvider,
|