vargai 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -0
- package/.env.example +27 -0
- package/.github/workflows/ci.yml +23 -0
- package/.husky/README.md +102 -0
- package/.husky/commit-msg +6 -0
- package/.husky/pre-commit +9 -0
- package/.husky/pre-push +6 -0
- package/.size-limit.json +8 -0
- package/.test-hooks.ts +5 -0
- package/CLAUDE.md +125 -0
- package/CONTRIBUTING.md +150 -0
- package/LICENSE.md +53 -0
- package/README.md +78 -0
- package/SKILLS.md +173 -0
- package/STRUCTURE.md +92 -0
- package/biome.json +34 -0
- package/bun.lock +1254 -0
- package/commitlint.config.js +22 -0
- package/docs/plan.md +66 -0
- package/docs/todo.md +14 -0
- package/docs/varg-sdk.md +812 -0
- package/ffmpeg/CLAUDE.md +68 -0
- package/package.json +69 -0
- package/pipeline/cookbooks/SKILL.md +285 -0
- package/pipeline/cookbooks/remotion-video.md +585 -0
- package/pipeline/cookbooks/round-video-character.md +337 -0
- package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
- package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
- package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
- package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
- package/pipeline/cookbooks/talking-character.md +59 -0
- package/pipeline/cookbooks/text-to-tiktok.md +669 -0
- package/pipeline/cookbooks/trendwatching.md +156 -0
- package/plan.md +281 -0
- package/scripts/.gitkeep +0 -0
- package/src/ai-sdk/cache.ts +142 -0
- package/src/ai-sdk/examples/cached-generation.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
- package/src/ai-sdk/examples/duet-video.ts +56 -0
- package/src/ai-sdk/examples/editly-composition.ts +63 -0
- package/src/ai-sdk/examples/editly-test.ts +57 -0
- package/src/ai-sdk/examples/editly-video-test.ts +52 -0
- package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
- package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
- package/src/ai-sdk/examples/music-generation.ts +19 -0
- package/src/ai-sdk/examples/openai-sora.ts +34 -0
- package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
- package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
- package/src/ai-sdk/examples/talking-lion.ts +55 -0
- package/src/ai-sdk/examples/video-generation.ts +39 -0
- package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
- package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
- package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
- package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
- package/src/ai-sdk/file-cache.ts +112 -0
- package/src/ai-sdk/file.ts +238 -0
- package/src/ai-sdk/generate-element.ts +92 -0
- package/src/ai-sdk/generate-music.ts +46 -0
- package/src/ai-sdk/generate-video.ts +165 -0
- package/src/ai-sdk/index.ts +72 -0
- package/src/ai-sdk/music-model.ts +110 -0
- package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
- package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
- package/src/ai-sdk/providers/editly/index.ts +817 -0
- package/src/ai-sdk/providers/editly/layers.ts +772 -0
- package/src/ai-sdk/providers/editly/plan.md +144 -0
- package/src/ai-sdk/providers/editly/types.ts +328 -0
- package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
- package/src/ai-sdk/providers/fal-provider.ts +512 -0
- package/src/ai-sdk/providers/higgsfield.ts +379 -0
- package/src/ai-sdk/providers/openai.ts +251 -0
- package/src/ai-sdk/providers/replicate.ts +16 -0
- package/src/ai-sdk/video-model.ts +185 -0
- package/src/cli/commands/find.tsx +137 -0
- package/src/cli/commands/help.tsx +85 -0
- package/src/cli/commands/index.ts +9 -0
- package/src/cli/commands/list.tsx +238 -0
- package/src/cli/commands/run.tsx +511 -0
- package/src/cli/commands/which.tsx +253 -0
- package/src/cli/index.ts +112 -0
- package/src/cli/quiet.ts +44 -0
- package/src/cli/types.ts +32 -0
- package/src/cli/ui/components/Badge.tsx +29 -0
- package/src/cli/ui/components/DataTable.tsx +51 -0
- package/src/cli/ui/components/Header.tsx +23 -0
- package/src/cli/ui/components/HelpBlock.tsx +44 -0
- package/src/cli/ui/components/KeyValue.tsx +33 -0
- package/src/cli/ui/components/OptionRow.tsx +81 -0
- package/src/cli/ui/components/Separator.tsx +23 -0
- package/src/cli/ui/components/StatusBox.tsx +108 -0
- package/src/cli/ui/components/VargBox.tsx +51 -0
- package/src/cli/ui/components/VargProgress.tsx +36 -0
- package/src/cli/ui/components/VargSpinner.tsx +34 -0
- package/src/cli/ui/components/VargText.tsx +56 -0
- package/src/cli/ui/components/index.ts +19 -0
- package/src/cli/ui/index.ts +12 -0
- package/src/cli/ui/render.ts +35 -0
- package/src/cli/ui/theme.ts +63 -0
- package/src/cli/utils.ts +78 -0
- package/src/core/executor/executor.ts +201 -0
- package/src/core/executor/index.ts +13 -0
- package/src/core/executor/job.ts +214 -0
- package/src/core/executor/pipeline.ts +222 -0
- package/src/core/index.ts +11 -0
- package/src/core/registry/index.ts +9 -0
- package/src/core/registry/loader.ts +149 -0
- package/src/core/registry/registry.ts +221 -0
- package/src/core/registry/resolver.ts +206 -0
- package/src/core/schema/helpers.ts +134 -0
- package/src/core/schema/index.ts +8 -0
- package/src/core/schema/shared.ts +102 -0
- package/src/core/schema/types.ts +279 -0
- package/src/core/schema/validator.ts +92 -0
- package/src/definitions/actions/captions.ts +261 -0
- package/src/definitions/actions/edit.ts +298 -0
- package/src/definitions/actions/image.ts +125 -0
- package/src/definitions/actions/index.ts +114 -0
- package/src/definitions/actions/music.ts +205 -0
- package/src/definitions/actions/sync.ts +128 -0
- package/src/definitions/actions/transcribe.ts +200 -0
- package/src/definitions/actions/upload.ts +111 -0
- package/src/definitions/actions/video.ts +163 -0
- package/src/definitions/actions/voice.ts +119 -0
- package/src/definitions/index.ts +23 -0
- package/src/definitions/models/elevenlabs.ts +50 -0
- package/src/definitions/models/flux.ts +56 -0
- package/src/definitions/models/index.ts +36 -0
- package/src/definitions/models/kling.ts +56 -0
- package/src/definitions/models/llama.ts +54 -0
- package/src/definitions/models/nano-banana-pro.ts +102 -0
- package/src/definitions/models/sonauto.ts +68 -0
- package/src/definitions/models/soul.ts +65 -0
- package/src/definitions/models/wan.ts +54 -0
- package/src/definitions/models/whisper.ts +44 -0
- package/src/definitions/skills/index.ts +12 -0
- package/src/definitions/skills/talking-character.ts +87 -0
- package/src/definitions/skills/text-to-tiktok.ts +97 -0
- package/src/index.ts +118 -0
- package/src/providers/apify.ts +269 -0
- package/src/providers/base.ts +264 -0
- package/src/providers/elevenlabs.ts +217 -0
- package/src/providers/fal.ts +392 -0
- package/src/providers/ffmpeg.ts +544 -0
- package/src/providers/fireworks.ts +193 -0
- package/src/providers/groq.ts +149 -0
- package/src/providers/higgsfield.ts +145 -0
- package/src/providers/index.ts +143 -0
- package/src/providers/replicate.ts +147 -0
- package/src/providers/storage.ts +206 -0
- package/src/tests/all.test.ts +509 -0
- package/src/tests/index.ts +33 -0
- package/src/tests/unit.test.ts +403 -0
- package/tsconfig.json +45 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fireworks.ai provider for audio transcription with word-level timestamps
|
|
3
|
+
* Supports Whisper models with advanced features like diarization and VAD
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { readFileSync, writeFileSync } from "node:fs";
|
|
7
|
+
import type { JobStatusUpdate, ProviderConfig } from "../core/schema/types";
|
|
8
|
+
import { BaseProvider } from "./base";
|
|
9
|
+
|
|
10
|
+
// Types
|
|
11
|
+
export interface FireworksWord {
|
|
12
|
+
word: string;
|
|
13
|
+
language: string;
|
|
14
|
+
probability: number;
|
|
15
|
+
hallucination_score: number;
|
|
16
|
+
start: number;
|
|
17
|
+
end: number;
|
|
18
|
+
retry_count: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface FireworksResponse {
|
|
22
|
+
task: string;
|
|
23
|
+
language: string;
|
|
24
|
+
text: string;
|
|
25
|
+
request_id: string;
|
|
26
|
+
words: FireworksWord[];
|
|
27
|
+
duration: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export class FireworksProvider extends BaseProvider {
|
|
31
|
+
readonly name = "fireworks";
|
|
32
|
+
|
|
33
|
+
async submit(
|
|
34
|
+
_model: string,
|
|
35
|
+
_inputs: Record<string, unknown>,
|
|
36
|
+
_config?: ProviderConfig,
|
|
37
|
+
): Promise<string> {
|
|
38
|
+
const jobId = `fw_${Date.now()}_${Math.random().toString(36).slice(2)}`;
|
|
39
|
+
console.log(`[fireworks] starting transcription: ${jobId}`);
|
|
40
|
+
return jobId;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async getStatus(_jobId: string): Promise<JobStatusUpdate> {
|
|
44
|
+
return { status: "completed" };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
async getResult(_jobId: string): Promise<unknown> {
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// ============================================================================
|
|
52
|
+
// High-level convenience methods
|
|
53
|
+
// ============================================================================
|
|
54
|
+
|
|
55
|
+
async transcribe(options: {
|
|
56
|
+
audioPath: string;
|
|
57
|
+
vadModel?: "whisperx-pyannet" | "silero";
|
|
58
|
+
alignmentModel?: "tdnn_ffn" | "wav2vec2";
|
|
59
|
+
responseFormat?: "json" | "verbose_json" | "text" | "srt" | "vtt";
|
|
60
|
+
preprocessing?: "none" | "denoise";
|
|
61
|
+
temperature?: string;
|
|
62
|
+
timestampGranularities?: "word" | "segment";
|
|
63
|
+
diarize?: boolean;
|
|
64
|
+
language?: string;
|
|
65
|
+
outputPath?: string;
|
|
66
|
+
}): Promise<FireworksResponse> {
|
|
67
|
+
const {
|
|
68
|
+
audioPath,
|
|
69
|
+
vadModel = "whisperx-pyannet",
|
|
70
|
+
alignmentModel = "tdnn_ffn",
|
|
71
|
+
responseFormat = "verbose_json",
|
|
72
|
+
preprocessing = "none",
|
|
73
|
+
temperature = "0,0.2,0.4,0.6,0.8,1",
|
|
74
|
+
timestampGranularities = "word",
|
|
75
|
+
diarize = false,
|
|
76
|
+
language,
|
|
77
|
+
outputPath,
|
|
78
|
+
} = options;
|
|
79
|
+
|
|
80
|
+
if (!process.env.FIREWORKS_API_KEY) {
|
|
81
|
+
throw new Error("FIREWORKS_API_KEY environment variable is required");
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
console.log("[fireworks] transcribing audio...");
|
|
85
|
+
|
|
86
|
+
// Load audio file
|
|
87
|
+
let audioBlob: Blob;
|
|
88
|
+
let fileName = "audio.mp3";
|
|
89
|
+
|
|
90
|
+
if (audioPath.startsWith("http://") || audioPath.startsWith("https://")) {
|
|
91
|
+
const audioResponse = await fetch(audioPath);
|
|
92
|
+
audioBlob = await audioResponse.blob();
|
|
93
|
+
fileName = audioPath.split("/").pop()?.split("?")[0] || "audio.mp3";
|
|
94
|
+
} else {
|
|
95
|
+
const buffer = readFileSync(audioPath);
|
|
96
|
+
audioBlob = new Blob([buffer]);
|
|
97
|
+
fileName = audioPath.split("/").pop() || "audio.mp3";
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Prepare form data
|
|
101
|
+
const formData = new FormData();
|
|
102
|
+
formData.append("file", audioBlob, fileName);
|
|
103
|
+
formData.append("vad_model", vadModel);
|
|
104
|
+
formData.append("alignment_model", alignmentModel);
|
|
105
|
+
formData.append("response_format", responseFormat);
|
|
106
|
+
formData.append("preprocessing", preprocessing);
|
|
107
|
+
formData.append("temperature", temperature);
|
|
108
|
+
formData.append("timestamp_granularities", timestampGranularities);
|
|
109
|
+
formData.append("diarize", diarize.toString());
|
|
110
|
+
|
|
111
|
+
if (language) {
|
|
112
|
+
formData.append("language", language);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Call Fireworks API
|
|
116
|
+
const response = await fetch(
|
|
117
|
+
"https://audio-prod.us-virginia-1.direct.fireworks.ai/v1/audio/transcriptions",
|
|
118
|
+
{
|
|
119
|
+
method: "POST",
|
|
120
|
+
headers: {
|
|
121
|
+
Authorization: `Bearer ${process.env.FIREWORKS_API_KEY}`,
|
|
122
|
+
},
|
|
123
|
+
body: formData,
|
|
124
|
+
},
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
if (!response.ok) {
|
|
128
|
+
const errorText = await response.text();
|
|
129
|
+
console.error("[fireworks] api error:", errorText);
|
|
130
|
+
throw new Error(`fireworks api error: ${response.statusText}`);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const data = (await response.json()) as FireworksResponse;
|
|
134
|
+
|
|
135
|
+
console.log(
|
|
136
|
+
`[fireworks] transcription complete (${data.words?.length || 0} words)`,
|
|
137
|
+
);
|
|
138
|
+
|
|
139
|
+
// Save to file if requested
|
|
140
|
+
if (outputPath) {
|
|
141
|
+
let content: string;
|
|
142
|
+
|
|
143
|
+
if (outputPath.endsWith(".srt")) {
|
|
144
|
+
content = convertToSRT(data.words || []);
|
|
145
|
+
} else if (outputPath.endsWith(".json")) {
|
|
146
|
+
content = JSON.stringify(data, null, 2);
|
|
147
|
+
} else {
|
|
148
|
+
content = data.text;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
writeFileSync(outputPath, content);
|
|
152
|
+
console.log(`[fireworks] saved to ${outputPath}`);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return data;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Helper function to convert words to SRT format
|
|
160
|
+
function formatTime(seconds: number): string {
|
|
161
|
+
const hours = Math.floor(seconds / 3600);
|
|
162
|
+
const minutes = Math.floor((seconds % 3600) / 60);
|
|
163
|
+
const secs = Math.floor(seconds % 60);
|
|
164
|
+
const millis = Math.floor((seconds % 1) * 1000);
|
|
165
|
+
|
|
166
|
+
return `${String(hours).padStart(2, "0")}:${String(minutes).padStart(2, "0")}:${String(secs).padStart(2, "0")},${String(millis).padStart(3, "0")}`;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
export function convertToSRT(words: FireworksWord[]): string {
|
|
170
|
+
let srt = "";
|
|
171
|
+
let index = 1;
|
|
172
|
+
|
|
173
|
+
for (const word of words) {
|
|
174
|
+
const startTime = formatTime(word.start);
|
|
175
|
+
const endTime = formatTime(word.end);
|
|
176
|
+
|
|
177
|
+
srt += `${index}\n`;
|
|
178
|
+
srt += `${startTime} --> ${endTime}\n`;
|
|
179
|
+
srt += `${word.word.trim()}\n\n`;
|
|
180
|
+
index++;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
return srt;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Export singleton instance
|
|
187
|
+
export const fireworksProvider = new FireworksProvider();
|
|
188
|
+
|
|
189
|
+
// Re-export convenience functions for backward compatibility
|
|
190
|
+
export const transcribeWithFireworks = (
|
|
191
|
+
options: Parameters<FireworksProvider["transcribe"]>[0],
|
|
192
|
+
) => fireworksProvider.transcribe(options);
|
|
193
|
+
export const convertFireworksToSRT = convertToSRT;
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Groq provider for ultra-fast LLM inference
|
|
3
|
+
* Supports Llama, Mixtral, Gemma models and Whisper transcription
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import Groq from "groq-sdk";
|
|
7
|
+
import type { Uploadable } from "groq-sdk/uploads";
|
|
8
|
+
import type { JobStatusUpdate, ProviderConfig } from "../core/schema/types";
|
|
9
|
+
import { BaseProvider } from "./base";
|
|
10
|
+
|
|
11
|
+
export class GroqProvider extends BaseProvider {
|
|
12
|
+
readonly name = "groq";
|
|
13
|
+
private client: Groq;
|
|
14
|
+
|
|
15
|
+
constructor(config?: ProviderConfig) {
|
|
16
|
+
super(config);
|
|
17
|
+
this.client = new Groq({
|
|
18
|
+
apiKey: config?.apiKey || process.env.GROQ_API_KEY || "",
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
async submit(
|
|
23
|
+
_model: string,
|
|
24
|
+
_inputs: Record<string, unknown>,
|
|
25
|
+
_config?: ProviderConfig,
|
|
26
|
+
): Promise<string> {
|
|
27
|
+
// Groq is synchronous for chat, so we generate a fake job ID
|
|
28
|
+
const jobId = `groq_${Date.now()}_${Math.random().toString(36).slice(2)}`;
|
|
29
|
+
console.log(`[groq] starting inference: ${jobId}`);
|
|
30
|
+
return jobId;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async getStatus(_jobId: string): Promise<JobStatusUpdate> {
|
|
34
|
+
return { status: "completed" };
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
async getResult(_jobId: string): Promise<unknown> {
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// ============================================================================
|
|
42
|
+
// High-level convenience methods
|
|
43
|
+
// ============================================================================
|
|
44
|
+
|
|
45
|
+
async chatCompletion(options: {
|
|
46
|
+
model?: string;
|
|
47
|
+
messages: Array<{
|
|
48
|
+
role: "system" | "user" | "assistant";
|
|
49
|
+
content: string;
|
|
50
|
+
}>;
|
|
51
|
+
temperature?: number;
|
|
52
|
+
maxTokens?: number;
|
|
53
|
+
stream?: boolean;
|
|
54
|
+
}) {
|
|
55
|
+
const {
|
|
56
|
+
model = GROQ_MODELS.LLAMA_90B,
|
|
57
|
+
messages,
|
|
58
|
+
temperature = 1,
|
|
59
|
+
maxTokens = 1024,
|
|
60
|
+
stream = false,
|
|
61
|
+
} = options;
|
|
62
|
+
|
|
63
|
+
console.log(`[groq] chat completion with ${model}...`);
|
|
64
|
+
|
|
65
|
+
if (stream) {
|
|
66
|
+
const streamResponse = await this.client.chat.completions.create({
|
|
67
|
+
model,
|
|
68
|
+
messages,
|
|
69
|
+
temperature,
|
|
70
|
+
max_tokens: maxTokens,
|
|
71
|
+
stream: true,
|
|
72
|
+
});
|
|
73
|
+
console.log(`[groq] streaming response...`);
|
|
74
|
+
return streamResponse;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const response = await this.client.chat.completions.create({
|
|
78
|
+
model,
|
|
79
|
+
messages,
|
|
80
|
+
temperature,
|
|
81
|
+
max_tokens: maxTokens,
|
|
82
|
+
stream: false,
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
const content = response.choices[0]?.message?.content || "";
|
|
86
|
+
console.log(`[groq] completed (${response.usage?.total_tokens} tokens)`);
|
|
87
|
+
return content;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async transcribeAudio(options: {
|
|
91
|
+
file: Uploadable;
|
|
92
|
+
model?: string;
|
|
93
|
+
language?: string;
|
|
94
|
+
prompt?: string;
|
|
95
|
+
temperature?: number;
|
|
96
|
+
}) {
|
|
97
|
+
const {
|
|
98
|
+
file,
|
|
99
|
+
model = GROQ_MODELS.WHISPER_LARGE,
|
|
100
|
+
language,
|
|
101
|
+
prompt,
|
|
102
|
+
temperature,
|
|
103
|
+
} = options;
|
|
104
|
+
|
|
105
|
+
console.log(`[groq] transcribing audio with ${model}...`);
|
|
106
|
+
|
|
107
|
+
const response = await this.client.audio.transcriptions.create({
|
|
108
|
+
file,
|
|
109
|
+
model,
|
|
110
|
+
language,
|
|
111
|
+
prompt,
|
|
112
|
+
temperature,
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
console.log(`[groq] transcription completed`);
|
|
116
|
+
return response.text;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
async listModels() {
|
|
120
|
+
console.log(`[groq] fetching available models...`);
|
|
121
|
+
const response = await this.client.models.list();
|
|
122
|
+
const models = Array.from(response.data);
|
|
123
|
+
console.log(`[groq] found ${models.length} models`);
|
|
124
|
+
return models;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Popular models
|
|
129
|
+
export const GROQ_MODELS = {
|
|
130
|
+
LLAMA_90B: "llama-3.3-70b-versatile",
|
|
131
|
+
LLAMA_8B: "llama-3.1-8b-instant",
|
|
132
|
+
LLAMA_70B: "llama-3.1-70b-versatile",
|
|
133
|
+
MIXTRAL_8X7B: "mixtral-8x7b-32768",
|
|
134
|
+
GEMMA_7B: "gemma-7b-it",
|
|
135
|
+
GEMMA_2_9B: "gemma2-9b-it",
|
|
136
|
+
WHISPER_LARGE: "whisper-large-v3",
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
// Export singleton instance
|
|
140
|
+
export const groqProvider = new GroqProvider();
|
|
141
|
+
|
|
142
|
+
// Re-export convenience functions for backward compatibility
|
|
143
|
+
export const chatCompletion = (
|
|
144
|
+
options: Parameters<GroqProvider["chatCompletion"]>[0],
|
|
145
|
+
) => groqProvider.chatCompletion(options);
|
|
146
|
+
export const transcribeAudio = (
|
|
147
|
+
options: Parameters<GroqProvider["transcribeAudio"]>[0],
|
|
148
|
+
) => groqProvider.transcribeAudio(options);
|
|
149
|
+
export const listModels = () => groqProvider.listModels();
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Higgsfield provider for Soul image generation and character creation
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
BatchSize,
|
|
7
|
+
HiggsfieldClient,
|
|
8
|
+
InputImageType,
|
|
9
|
+
SoulQuality,
|
|
10
|
+
SoulSize,
|
|
11
|
+
} from "@higgsfield/client";
|
|
12
|
+
import type { JobStatusUpdate, ProviderConfig } from "../core/schema/types";
|
|
13
|
+
import { BaseProvider } from "./base";
|
|
14
|
+
|
|
15
|
+
export class HiggsfieldProvider extends BaseProvider {
|
|
16
|
+
readonly name = "higgsfield";
|
|
17
|
+
private _client: HiggsfieldClient | null = null;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Lazy initialization of the client to avoid errors when API keys aren't set
|
|
21
|
+
*/
|
|
22
|
+
private get client(): HiggsfieldClient {
|
|
23
|
+
if (!this._client) {
|
|
24
|
+
const apiKey =
|
|
25
|
+
this.config.apiKey ||
|
|
26
|
+
process.env.HIGGSFIELD_API_KEY ||
|
|
27
|
+
process.env.HF_API_KEY;
|
|
28
|
+
const apiSecret =
|
|
29
|
+
process.env.HIGGSFIELD_SECRET || process.env.HF_API_SECRET;
|
|
30
|
+
|
|
31
|
+
if (!apiKey || !apiSecret) {
|
|
32
|
+
throw new Error(
|
|
33
|
+
"Higgsfield API credentials not found. Set HIGGSFIELD_API_KEY/HF_API_KEY and HIGGSFIELD_SECRET/HF_API_SECRET environment variables.",
|
|
34
|
+
);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
this._client = new HiggsfieldClient({ apiKey, apiSecret });
|
|
38
|
+
}
|
|
39
|
+
return this._client;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async submit(
|
|
43
|
+
model: string,
|
|
44
|
+
inputs: Record<string, unknown>,
|
|
45
|
+
_config?: ProviderConfig,
|
|
46
|
+
): Promise<string> {
|
|
47
|
+
const jobSet = await this.client.generate(model as "/v1/text2image/soul", {
|
|
48
|
+
prompt: inputs.prompt as string,
|
|
49
|
+
width_and_height:
|
|
50
|
+
(inputs.widthAndHeight as (typeof SoulSize)[keyof typeof SoulSize]) ||
|
|
51
|
+
SoulSize.PORTRAIT_1152x2048,
|
|
52
|
+
quality:
|
|
53
|
+
(inputs.quality as (typeof SoulQuality)[keyof typeof SoulQuality]) ||
|
|
54
|
+
SoulQuality.HD,
|
|
55
|
+
style_id: inputs.styleId as string | undefined,
|
|
56
|
+
batch_size:
|
|
57
|
+
(inputs.batchSize as (typeof BatchSize)[keyof typeof BatchSize]) ||
|
|
58
|
+
BatchSize.SINGLE,
|
|
59
|
+
enhance_prompt: (inputs.enhancePrompt as boolean) ?? false,
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
console.log(`[higgsfield] job submitted: ${jobSet.id}`);
|
|
63
|
+
return jobSet.id;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async getStatus(_jobId: string): Promise<JobStatusUpdate> {
|
|
67
|
+
// Higgsfield jobs complete synchronously via submit
|
|
68
|
+
return { status: "completed" };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async getResult(_jobId: string): Promise<unknown> {
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// ============================================================================
|
|
76
|
+
// High-level convenience methods
|
|
77
|
+
// ============================================================================
|
|
78
|
+
|
|
79
|
+
async generateSoul(args: {
|
|
80
|
+
prompt: string;
|
|
81
|
+
widthAndHeight?: (typeof SoulSize)[keyof typeof SoulSize];
|
|
82
|
+
quality?: (typeof SoulQuality)[keyof typeof SoulQuality];
|
|
83
|
+
styleId?: string;
|
|
84
|
+
batchSize?: (typeof BatchSize)[keyof typeof BatchSize];
|
|
85
|
+
enhancePrompt?: boolean;
|
|
86
|
+
}) {
|
|
87
|
+
console.log("[higgsfield] generating soul image");
|
|
88
|
+
console.log(`[higgsfield] prompt: ${args.prompt}`);
|
|
89
|
+
|
|
90
|
+
const jobSet = await this.client.generate("/v1/text2image/soul", {
|
|
91
|
+
prompt: args.prompt,
|
|
92
|
+
width_and_height: args.widthAndHeight || SoulSize.PORTRAIT_1152x2048,
|
|
93
|
+
quality: args.quality || SoulQuality.HD,
|
|
94
|
+
style_id: args.styleId,
|
|
95
|
+
batch_size: args.batchSize || BatchSize.SINGLE,
|
|
96
|
+
enhance_prompt: args.enhancePrompt ?? false,
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
console.log(`[higgsfield] job created: ${jobSet.id}`);
|
|
100
|
+
return jobSet;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
async listSoulStyles() {
|
|
104
|
+
console.log("[higgsfield] fetching soul styles");
|
|
105
|
+
return this.client.getSoulStyles();
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async createSoulId(args: { name: string; imageUrls: string[] }) {
|
|
109
|
+
console.log(`[higgsfield] creating soul id: ${args.name}`);
|
|
110
|
+
console.log(`[higgsfield] images: ${args.imageUrls.length}`);
|
|
111
|
+
|
|
112
|
+
const soulId = await this.client.createSoulId({
|
|
113
|
+
name: args.name,
|
|
114
|
+
input_images: args.imageUrls.map((url) => ({
|
|
115
|
+
type: InputImageType.IMAGE_URL,
|
|
116
|
+
image_url: url,
|
|
117
|
+
})),
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
console.log(`[higgsfield] soul id created: ${soulId.id}`);
|
|
121
|
+
return soulId;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
async listSoulIds(page = 1, pageSize = 20) {
|
|
125
|
+
console.log("[higgsfield] listing soul ids");
|
|
126
|
+
return this.client.listSoulIds(page, pageSize);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Re-export useful enums
|
|
131
|
+
export { BatchSize, SoulQuality, SoulSize };
|
|
132
|
+
|
|
133
|
+
// Export singleton instance (lazy initialization means no error on import)
|
|
134
|
+
export const higgsfieldProvider = new HiggsfieldProvider();
|
|
135
|
+
|
|
136
|
+
// Re-export convenience functions for backward compatibility
|
|
137
|
+
export const generateSoul = (
|
|
138
|
+
args: Parameters<HiggsfieldProvider["generateSoul"]>[0],
|
|
139
|
+
) => higgsfieldProvider.generateSoul(args);
|
|
140
|
+
export const listSoulStyles = () => higgsfieldProvider.listSoulStyles();
|
|
141
|
+
export const createSoulId = (
|
|
142
|
+
args: Parameters<HiggsfieldProvider["createSoulId"]>[0],
|
|
143
|
+
) => higgsfieldProvider.createSoulId(args);
|
|
144
|
+
export const listSoulIds = (page?: number, pageSize?: number) =>
|
|
145
|
+
higgsfieldProvider.listSoulIds(page, pageSize);
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider exports
|
|
3
|
+
* Central registry of all available providers
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export type {
|
|
7
|
+
ApifyProviderConfig,
|
|
8
|
+
ApifyRunResult,
|
|
9
|
+
RunActorOptions,
|
|
10
|
+
} from "./apify";
|
|
11
|
+
// Apify provider (web scraping / actors)
|
|
12
|
+
export {
|
|
13
|
+
ACTORS,
|
|
14
|
+
ApifyProvider,
|
|
15
|
+
apifyProvider,
|
|
16
|
+
downloadVideos,
|
|
17
|
+
getDataset,
|
|
18
|
+
getKeyValueStoreValue,
|
|
19
|
+
getRunInfo,
|
|
20
|
+
runActor,
|
|
21
|
+
waitForRun,
|
|
22
|
+
} from "./apify";
|
|
23
|
+
export type { ProviderResult } from "./base";
|
|
24
|
+
// Base provider infrastructure
|
|
25
|
+
export {
|
|
26
|
+
BaseProvider,
|
|
27
|
+
downloadToFile,
|
|
28
|
+
ensureUrl,
|
|
29
|
+
getExtension,
|
|
30
|
+
ProviderRegistry,
|
|
31
|
+
providers,
|
|
32
|
+
} from "./base";
|
|
33
|
+
// ElevenLabs provider (voice/audio)
|
|
34
|
+
export {
|
|
35
|
+
ElevenLabsProvider,
|
|
36
|
+
elevenlabsProvider,
|
|
37
|
+
generateMusic as generateMusicElevenlabs,
|
|
38
|
+
generateSoundEffect,
|
|
39
|
+
getVoice,
|
|
40
|
+
listVoices,
|
|
41
|
+
textToSpeech,
|
|
42
|
+
VOICES,
|
|
43
|
+
} from "./elevenlabs";
|
|
44
|
+
// Fal.ai provider (video/image generation)
|
|
45
|
+
export {
|
|
46
|
+
FalProvider,
|
|
47
|
+
falProvider,
|
|
48
|
+
generateImage,
|
|
49
|
+
imageToImage,
|
|
50
|
+
imageToVideo,
|
|
51
|
+
textToMusic,
|
|
52
|
+
textToVideo,
|
|
53
|
+
wan25,
|
|
54
|
+
} from "./fal";
|
|
55
|
+
export type { ProbeResult } from "./ffmpeg";
|
|
56
|
+
// FFmpeg provider (local video editing)
|
|
57
|
+
export {
|
|
58
|
+
addAudio,
|
|
59
|
+
concatVideos,
|
|
60
|
+
convertFormat,
|
|
61
|
+
extractAudio,
|
|
62
|
+
FFmpegProvider,
|
|
63
|
+
fadeVideo,
|
|
64
|
+
ffmpegProvider,
|
|
65
|
+
getVideoDuration,
|
|
66
|
+
probe,
|
|
67
|
+
resizeVideo,
|
|
68
|
+
splitAtTimestamps,
|
|
69
|
+
trimVideo,
|
|
70
|
+
xfadeVideos,
|
|
71
|
+
} from "./ffmpeg";
|
|
72
|
+
export type { FireworksResponse, FireworksWord } from "./fireworks";
|
|
73
|
+
// Fireworks provider (transcription)
|
|
74
|
+
export {
|
|
75
|
+
convertFireworksToSRT,
|
|
76
|
+
FireworksProvider,
|
|
77
|
+
fireworksProvider,
|
|
78
|
+
transcribeWithFireworks,
|
|
79
|
+
} from "./fireworks";
|
|
80
|
+
// Groq provider (LLM inference)
|
|
81
|
+
export {
|
|
82
|
+
chatCompletion,
|
|
83
|
+
GROQ_MODELS,
|
|
84
|
+
GroqProvider,
|
|
85
|
+
groqProvider,
|
|
86
|
+
listModels,
|
|
87
|
+
transcribeAudio,
|
|
88
|
+
} from "./groq";
|
|
89
|
+
// Higgsfield provider (Soul image generation)
|
|
90
|
+
export {
|
|
91
|
+
BatchSize,
|
|
92
|
+
createSoulId,
|
|
93
|
+
generateSoul,
|
|
94
|
+
HiggsfieldProvider,
|
|
95
|
+
higgsfieldProvider,
|
|
96
|
+
listSoulIds,
|
|
97
|
+
listSoulStyles,
|
|
98
|
+
SoulQuality,
|
|
99
|
+
SoulSize,
|
|
100
|
+
} from "./higgsfield";
|
|
101
|
+
// Replicate provider (video/image generation)
|
|
102
|
+
export {
|
|
103
|
+
MODELS,
|
|
104
|
+
ReplicateProvider,
|
|
105
|
+
replicateProvider,
|
|
106
|
+
runImage,
|
|
107
|
+
runModel,
|
|
108
|
+
runVideo,
|
|
109
|
+
} from "./replicate";
|
|
110
|
+
export type { StorageConfig } from "./storage";
|
|
111
|
+
// Storage provider (Cloudflare R2 / S3)
|
|
112
|
+
export {
|
|
113
|
+
generatePresignedUrl,
|
|
114
|
+
getPublicUrl,
|
|
115
|
+
StorageProvider,
|
|
116
|
+
storageProvider,
|
|
117
|
+
uploadBuffer,
|
|
118
|
+
uploadFile,
|
|
119
|
+
uploadFromUrl,
|
|
120
|
+
} from "./storage";
|
|
121
|
+
|
|
122
|
+
// Register all providers
|
|
123
|
+
import { apifyProvider } from "./apify";
|
|
124
|
+
import { providers } from "./base";
|
|
125
|
+
import { elevenlabsProvider } from "./elevenlabs";
|
|
126
|
+
import { falProvider } from "./fal";
|
|
127
|
+
import { ffmpegProvider } from "./ffmpeg";
|
|
128
|
+
import { fireworksProvider } from "./fireworks";
|
|
129
|
+
import { groqProvider } from "./groq";
|
|
130
|
+
import { higgsfieldProvider } from "./higgsfield";
|
|
131
|
+
import { replicateProvider } from "./replicate";
|
|
132
|
+
import { storageProvider } from "./storage";
|
|
133
|
+
|
|
134
|
+
// Auto-register all providers
|
|
135
|
+
providers.register(apifyProvider);
|
|
136
|
+
providers.register(falProvider);
|
|
137
|
+
providers.register(replicateProvider);
|
|
138
|
+
providers.register(elevenlabsProvider);
|
|
139
|
+
providers.register(groqProvider);
|
|
140
|
+
providers.register(fireworksProvider);
|
|
141
|
+
providers.register(higgsfieldProvider);
|
|
142
|
+
providers.register(ffmpegProvider);
|
|
143
|
+
providers.register(storageProvider);
|