myaidev-method 0.2.19 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CHANGELOG.md +123 -5
  2. package/README.md +205 -13
  3. package/TECHNICAL_ARCHITECTURE.md +64 -2
  4. package/USER_GUIDE.md +453 -48
  5. package/bin/cli.js +187 -2
  6. package/content-rules.example.md +80 -0
  7. package/dist/mcp/mcp-config.json +138 -1
  8. package/dist/mcp/mcp-launcher.js +237 -0
  9. package/dist/mcp/openstack-server.js +1607 -0
  10. package/dist/server/.tsbuildinfo +1 -1
  11. package/dist/server/auth/layers.d.ts +1 -1
  12. package/dist/server/auth/services/AuthService.d.ts +1 -1
  13. package/dist/server/auth/services/TokenService.js.map +1 -1
  14. package/dist/server/auth/services/example.d.ts +5 -5
  15. package/package.json +17 -17
  16. package/src/config/workflows.js +532 -0
  17. package/src/index.js +21 -8
  18. package/src/lib/payloadcms-utils.js +206 -0
  19. package/src/lib/update-manager.js +2 -1
  20. package/src/lib/visual-config-utils.js +321 -295
  21. package/src/lib/visual-generation-utils.js +1080 -740
  22. package/src/lib/workflow-installer.js +512 -0
  23. package/src/libs/security/authorization-checker.js +606 -0
  24. package/src/mcp/openstack-server.js +1607 -0
  25. package/src/scripts/configure-wordpress-mcp.js +8 -3
  26. package/src/scripts/generate-visual-cli.js +365 -235
  27. package/src/scripts/openstack-setup.sh +110 -0
  28. package/src/scripts/ping.js +250 -0
  29. package/src/scripts/security/environment-detect.js +425 -0
  30. package/src/scripts/wordpress/publish-to-wordpress.js +165 -0
  31. package/src/server/auth/services/TokenService.ts +1 -1
  32. package/src/templates/claude/agents/content-rules-setup.md +657 -0
  33. package/src/templates/claude/agents/content-writer.md +328 -1
  34. package/src/templates/claude/agents/openstack-vm-manager.md +281 -0
  35. package/src/templates/claude/agents/osint-researcher.md +1075 -0
  36. package/src/templates/claude/agents/penetration-tester.md +908 -0
  37. package/src/templates/claude/agents/security-auditor.md +244 -0
  38. package/src/templates/claude/agents/security-setup.md +1094 -0
  39. package/src/templates/claude/agents/visual-content-generator.md +182 -4
  40. package/src/templates/claude/agents/webapp-security-tester.md +581 -0
  41. package/src/templates/claude/commands/myai-configure.md +85 -1
  42. package/src/templates/claude/commands/myai-content-rules-setup.md +204 -0
  43. package/src/templates/claude/commands/myai-openstack.md +229 -0
  44. package/src/templates/claude/commands/sc:security-exploit.md +464 -0
  45. package/src/templates/claude/commands/sc:security-recon.md +281 -0
  46. package/src/templates/claude/commands/sc:security-report.md +756 -0
  47. package/src/templates/claude/commands/sc:security-scan.md +441 -0
  48. package/src/templates/claude/commands/sc:security-setup.md +501 -0
  49. package/src/templates/codex/commands/myai-content-rules-setup.md +85 -0
  50. package/src/templates/gemini/commands/myai-content-rules-setup.toml +57 -0
  51. package/.claude/mcp/sparc-orchestrator-server.js +0 -607
  52. package/.claude/mcp/wordpress-server.js +0 -1277
  53. package/src/agents/content-writer-prompt.md +0 -164
  54. package/src/agents/content-writer.json +0 -70
  55. package/src/templates/claude/mcp_config.json +0 -30
  56. package/src/templates/claude/slash_commands.json +0 -166
  57. package/src/templates/scripts/configure-wordpress-mcp.js +0 -181
@@ -2,157 +2,176 @@
2
2
  * Visual Content Generation Utilities
3
3
  *
4
4
  * Provides image and video generation capabilities using:
5
- * - Google Gemini 2.5 Flash Image ("Nano Banana")
6
- * - Google Imagen 3
7
- * - OpenAI GPT-Image-1 (GPT-4o Image Generation)
8
- * - Google Veo 2 (video)
5
+ *
6
+ * RECOMMENDED SOTA MODELS:
7
+ * - Google Gemini 3.0 Pro Image ("Nano Banana") - fast, cost-effective
8
+ * - OpenAI GPT Image 1.5 - state-of-the-art quality, best text rendering
9
+ *
10
+ * ADDITIONAL MODELS:
11
+ * - Google Imagen 3 - premium quality via Gemini API
12
+ * - Black Forest Labs FLUX 2 (pro, flex, dev) - excellent quality
13
+ * - Google Veo 3 - latest video generation
14
+ *
15
+ * Authentication: Uses simple API keys (GEMINI_API_KEY, OPENAI_API_KEY, FAL_KEY)
9
16
  *
10
17
  * Platform support: Claude Code, Gemini CLI, Codex CLI
11
18
  *
12
19
  * @module visual-generation-utils
13
20
  */
14
21
 
15
- import fetch from 'node-fetch';
16
- import fs from 'fs-extra';
17
- import path from 'path';
18
- import dotenv from 'dotenv';
19
- import { GoogleAuth } from 'google-auth-library';
22
+ import fetch from "node-fetch";
23
+ import fs from "fs-extra";
24
+ import path from "path";
25
+ import dotenv from "dotenv";
20
26
 
21
27
  dotenv.config();
22
28
 
23
29
  // API Configuration
24
- const GOOGLE_API_BASE = 'https://generativelanguage.googleapis.com/v1beta';
25
- const OPENAI_API_BASE = 'https://api.openai.com/v1';
30
+ const GEMINI_API_BASE = "https://generativelanguage.googleapis.com/v1beta";
31
+ const OPENAI_API_BASE = "https://api.openai.com/v1";
32
+
33
+ // Gemini Models for image generation
34
+ const GEMINI_IMAGE_MODEL = "gemini-3-pro-image-preview"; // Gemini 3.0 "Nano Banana" preview
35
+ const GEMINI_IMAGEN_MODEL = "imagen-3.0-generate-002"; // Imagen via Gemini API
36
+
37
+ // OpenAI GPT Image Models (SOTA)
38
+ const OPENAI_IMAGE_MODELS = {
39
+ "gpt-image-1.5": "gpt-image-1.5", // State-of-the-art (recommended)
40
+ "gpt-image-1": "gpt-image-1", // Main model
41
+ "gpt-image-1-mini": "gpt-image-1-mini", // Cost-effective option
42
+ };
26
43
 
27
- /**
28
- * Get OAuth2 access token for Vertex AI
29
- * Uses Google Application Default Credentials (ADC)
30
- *
31
- * @returns {Promise<string>} OAuth2 access token
32
- * @throws {Error} If authentication fails
33
- */
34
- async function getVertexAIToken() {
35
- try {
36
- const auth = new GoogleAuth({
37
- scopes: ['https://www.googleapis.com/auth/cloud-platform']
38
- });
39
-
40
- const client = await auth.getClient();
41
- const tokenResponse = await client.getAccessToken();
42
-
43
- if (!tokenResponse.token) {
44
- throw new Error('Failed to obtain access token');
45
- }
46
-
47
- return tokenResponse.token;
48
- } catch (error) {
49
- throw new Error(`Vertex AI authentication failed: ${error.message}`);
50
- }
51
- }
44
+ // FLUX 2 Models (via Fal.ai or BFL API)
45
+ const FLUX2_MODELS = {
46
+ "flux2-pro": "fal-ai/flux-2/pro", // State-of-the-art quality, fastest, lowest cost
47
+ "flux2-flex": "fal-ai/flux-2/flex", // Developer-controlled parameters
48
+ "flux2-dev": "fal-ai/flux-2/dev", // 32B open-weight model
49
+ // Legacy FLUX 1.x models (still available)
50
+ "flux-pro": "fal-ai/flux-pro/v1.1-ultra",
51
+ "flux-dev": "fal-ai/flux/dev",
52
+ };
52
53
 
53
- // Pricing (USD per image/video) - GPT-Image-1 pricing
54
+ // Pricing (USD per image/video)
54
55
  const PRICING = {
55
- gemini: 0.02, // Gemini 2.5 Flash Image (direct Google API)
56
- imagen: 0.03, // Imagen 3 (direct Google API)
57
- dalle_low: 0.02, // GPT-Image-1 low quality
58
- dalle_medium: 0.07, // GPT-Image-1 medium quality
59
- dalle_standard: 0.07, // GPT-Image-1 medium quality (alias for standard)
60
- dalle_high: 0.19, // GPT-Image-1 high quality
61
- dalle_hd: 0.19, // GPT-Image-1 high quality (alias for hd)
62
- veo: 0.10, // Veo 2 (estimated per video)
63
- // Fal.ai pricing
64
- flux_pro: 0.06, // FLUX Pro v1.1 Ultra
65
- flux_dev: 0.025, // FLUX Dev (per megapixel)
66
- veo3: 0.40, // Veo 3 (per second)
67
- gemini_fal: 0.0398, // Gemini via fal.ai (fallback)
68
- imagen_fal: 0.05 // Imagen via fal.ai (fallback)
56
+ // SOTA Models (Recommended)
57
+ gemini: 0.02, // Gemini 3.0 Pro Image "Nano Banana" - fast, cheap
58
+ "gpt-image-1.5": 0.19, // OpenAI GPT Image 1.5 - SOTA quality (high quality)
59
+ "gpt-image-1.5-medium": 0.07, // GPT Image 1.5 medium quality
60
+ "gpt-image-1.5-low": 0.02, // GPT Image 1.5 low quality
61
+ "gpt-image-1": 0.19, // OpenAI GPT Image 1 (high quality)
62
+ "gpt-image-1-mini": 0.02, // OpenAI GPT Image 1 Mini - budget option
63
+ // Additional Models
64
+ imagen: 0.03, // Imagen 3 (Gemini API)
65
+ nano_banana_pro: 0.15, // Nano Banana Pro (Fal API)
66
+ // FLUX 2 pricing
67
+ flux2_pro: 0.05, // FLUX 2 Pro
68
+ flux2_flex: 0.04, // FLUX 2 Flex
69
+ flux2_dev: 0.025, // FLUX 2 Dev
70
+ // Legacy FLUX 1.x
71
+ flux_pro: 0.06, // FLUX Pro v1.1 Ultra
72
+ flux_dev: 0.025, // FLUX Dev (per megapixel)
73
+ // Video
74
+ veo3: 0.4, // Veo 3 (per second)
69
75
  };
70
76
 
71
77
  /**
72
78
  * Validate that required API keys are configured
73
79
  *
74
80
  * @returns {Object} Validation results
75
- * @returns {boolean} hasGoogle - Google API key is configured
76
- * @returns {boolean} hasOpenAI - OpenAI API key is configured
77
- * @returns {boolean} hasAny - At least one API key is configured
78
- * @returns {Array<string>} availableServices - List of available services
79
81
  */
80
82
  export function validateAPIKeys() {
81
- const googleKey = process.env.GOOGLE_API_KEY;
82
- const openaiKey = process.env.OPENAI_API_KEY;
83
- const falKey = process.env.FAL_KEY;
84
-
85
- const hasGoogle = !!(googleKey && googleKey.length > 20);
86
- const hasOpenAI = !!(openaiKey && openaiKey.length > 20);
87
- const hasFal = !!(falKey && falKey.length > 20);
88
-
89
- const availableServices = [];
90
- if (hasGoogle) {
91
- availableServices.push('gemini', 'imagen', 'veo');
92
- }
93
- if (hasOpenAI) {
94
- availableServices.push('dalle');
95
- }
96
- if (hasFal) {
97
- availableServices.push('flux', 'flux-pro', 'flux-dev', 'veo3');
98
- }
99
-
100
- return {
101
- hasGoogle,
102
- hasOpenAI,
103
- hasFal,
104
- hasAny: hasGoogle || hasOpenAI || hasFal,
105
- availableServices
106
- };
83
+ // Support both GEMINI_API_KEY (preferred) and GOOGLE_API_KEY (legacy)
84
+ const geminiKey = process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY;
85
+ const openaiKey = process.env.OPENAI_API_KEY;
86
+ const falKey = process.env.FAL_KEY;
87
+ const bflKey = process.env.BFL_API_KEY; // Black Forest Labs direct API
88
+
89
+ const hasGemini = !!(geminiKey && geminiKey.length > 20);
90
+ const hasOpenAI = !!(openaiKey && openaiKey.length > 20);
91
+ const hasFal = !!(falKey && falKey.length > 20);
92
+ const hasBFL = !!(bflKey && bflKey.length > 20);
93
+
94
+ const availableServices = [];
95
+ if (hasGemini) {
96
+ availableServices.push("gemini", "imagen");
97
+ }
98
+ if (hasOpenAI) {
99
+ availableServices.push("gpt-image-1.5", "gpt-image-1", "gpt-image-1-mini");
100
+ }
101
+ if (hasFal || hasBFL) {
102
+ availableServices.push(
103
+ "flux2-pro",
104
+ "flux2-flex",
105
+ "flux2-dev",
106
+ "flux-pro",
107
+ "flux-dev",
108
+ "veo3",
109
+ );
110
+ }
111
+
112
+ return {
113
+ hasGemini,
114
+ hasGoogle: hasGemini, // Legacy compatibility
115
+ hasOpenAI,
116
+ hasFal,
117
+ hasBFL,
118
+ hasAny: hasGemini || hasOpenAI || hasFal || hasBFL,
119
+ availableServices,
120
+ };
107
121
  }
108
122
 
109
123
  /**
110
124
  * Estimate cost for image/video generation
111
125
  *
112
- * @param {string} service - Service name (gemini, imagen, dalle, veo)
126
+ * @param {string} service - Service name
113
127
  * @param {Object} options - Generation options
114
- * @param {string} options.quality - Quality level (standard, hd)
115
- * @param {string} options.size - Image size
116
128
  * @returns {number} Estimated cost in USD
117
129
  */
118
130
  export function estimateCost(service, options = {}) {
119
- const { quality = 'high', size = '1024x1024' } = options;
131
+ const { quality = "high" } = options;
120
132
 
121
- switch (service) {
122
- case 'gemini':
123
- return PRICING.gemini;
133
+ switch (service) {
134
+ case "gemini":
135
+ return PRICING.gemini;
124
136
 
125
- case 'imagen':
126
- return PRICING.imagen;
137
+ case "imagen":
138
+ return PRICING.imagen;
127
139
 
128
- case 'dalle':
129
- // GPT-Image-1 quality-based pricing
130
- if (quality === 'low') {
131
- return PRICING.dalle_low;
132
- } else if (quality === 'medium' || quality === 'standard') {
133
- return PRICING.dalle_medium;
134
- } else if (quality === 'high' || quality === 'hd') {
135
- return PRICING.dalle_high;
136
- }
137
- return PRICING.dalle_high; // default to high quality
140
+ // OpenAI GPT Image models - cost varies by quality
141
+ case "gpt-image-1.5":
142
+ if (quality === "low") return PRICING["gpt-image-1.5-low"];
143
+ if (quality === "medium") return PRICING["gpt-image-1.5-medium"];
144
+ return PRICING["gpt-image-1.5"]; // high quality default
138
145
 
139
- case 'veo':
140
- return PRICING.veo;
146
+ case "gpt-image-1":
147
+ return PRICING["gpt-image-1"];
141
148
 
142
- case 'flux':
143
- case 'flux-pro':
144
- return PRICING.flux_pro;
149
+ case "gpt-image-1-mini":
150
+ return PRICING["gpt-image-1-mini"];
145
151
 
146
- case 'flux-dev':
147
- return PRICING.flux_dev;
152
+ case "flux2-pro":
153
+ return PRICING.flux2_pro;
148
154
 
149
- case 'veo3':
150
- case 'veo3-fast':
151
- return PRICING.veo3; // per second, will multiply by duration
155
+ case "flux2-flex":
156
+ return PRICING.flux2_flex;
152
157
 
153
- default:
154
- return 0;
155
- }
158
+ case "flux2-dev":
159
+ return PRICING.flux2_dev;
160
+
161
+ case "flux":
162
+ case "flux-pro":
163
+ return PRICING.flux_pro;
164
+
165
+ case "flux-dev":
166
+ return PRICING.flux_dev;
167
+
168
+ case "veo3":
169
+ case "veo3-fast":
170
+ return PRICING.veo3; // per second, will multiply by duration
171
+
172
+ default:
173
+ return 0;
174
+ }
156
175
  }
157
176
 
158
177
  /**
@@ -162,484 +181,573 @@ export function estimateCost(service, options = {}) {
162
181
  * @returns {string} Selected service name
163
182
  * @throws {Error} If no API keys are configured
164
183
  */
165
- export function selectBestService(preferred = 'gemini') {
166
- const { availableServices, hasAny } = validateAPIKeys();
167
-
168
- if (!hasAny) {
169
- throw new Error('No API keys configured. Run /myai-configure visual to set up image generation.');
170
- }
171
-
172
- // Return preferred service if available
173
- if (availableServices.includes(preferred)) {
174
- return preferred;
175
- }
176
-
177
- // Fallback to first available service
178
- return availableServices[0];
184
+ export function selectBestService(preferred = "gemini") {
185
+ const { availableServices, hasAny } = validateAPIKeys();
186
+
187
+ if (!hasAny) {
188
+ throw new Error(
189
+ "No API keys configured. Set GEMINI_API_KEY, OPENAI_API_KEY, or FAL_KEY in your environment.",
190
+ );
191
+ }
192
+
193
+ // Return preferred service if available
194
+ if (availableServices.includes(preferred)) {
195
+ return preferred;
196
+ }
197
+
198
+ // Fallback to first available service
199
+ return availableServices[0];
179
200
  }
180
201
 
181
202
  /**
182
- * Generate image using Google Gemini 2.5 Flash Image ("Nano Banana")
183
- * Fast and cost-effective image generation
203
+ * Generate image using Google Gemini API
204
+ * Uses gemini-3-pro-image-preview model ("Nano Banana") with simple API key auth
184
205
  *
185
206
  * @param {string} prompt - Image description
186
207
  * @param {Object} options - Generation options
187
- * @param {number} options.aspectRatio - Aspect ratio (1 for square, 16/9 for wide)
208
+ * @param {string} options.imageSize - Image size (1K, 2K)
188
209
  * @param {number} options.maxRetries - Maximum retry attempts
189
210
  * @returns {Promise<Object>} Generated image data
190
211
  */
191
212
  export async function generateImageGemini(prompt, options = {}) {
192
- const {
193
- aspectRatio = 1,
194
- maxRetries = 3
195
- } = options;
196
-
197
- const apiKey = process.env.GOOGLE_API_KEY;
198
- if (!apiKey) {
199
- throw new Error('GOOGLE_API_KEY not configured');
200
- }
201
-
202
- const endpoint = `${GOOGLE_API_BASE}/models/gemini-2.0-flash-exp:generateContent`;
203
-
204
- const requestBody = {
205
- contents: [{
206
- parts: [{
207
- text: `Generate an image: ${prompt}\n\nAspect ratio: ${aspectRatio === 16/9 ? '16:9' : '1:1'}\nStyle: Professional, high quality, suitable for article content`
208
- }]
209
- }],
210
- generationConfig: {
211
- temperature: 0.4,
212
- topP: 0.95,
213
- topK: 40
214
- }
215
- };
216
-
217
- let lastError;
218
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
219
- try {
220
- const response = await fetch(`${endpoint}?key=${apiKey}`, {
221
- method: 'POST',
222
- headers: {
223
- 'Content-Type': 'application/json'
224
- },
225
- body: JSON.stringify(requestBody)
226
- });
227
-
228
- if (!response.ok) {
229
- const errorText = await response.text();
230
- throw new Error(`Gemini API error: ${response.status} - ${errorText}`);
231
- }
232
-
233
- const data = await response.json();
234
-
235
- // Extract image data from response
236
- if (data.candidates && data.candidates[0]) {
237
- const candidate = data.candidates[0];
238
-
239
- // Gemini returns inline data or content references
240
- if (candidate.content && candidate.content.parts) {
241
- for (const part of candidate.content.parts) {
242
- if (part.inlineData && part.inlineData.data) {
243
- return {
244
- data: part.inlineData.data,
245
- mimeType: part.inlineData.mimeType || 'image/png',
246
- service: 'gemini',
247
- cost: PRICING.gemini
248
- };
249
- }
250
- }
251
- }
252
- }
253
-
254
- throw new Error('No image data in Gemini response');
255
-
256
- } catch (error) {
257
- lastError = error;
258
-
259
- if (attempt < maxRetries) {
260
- const backoff = Math.pow(2, attempt) * 1000;
261
- console.log(`⚠️ Gemini attempt ${attempt} failed. Retrying in ${backoff/1000}s...`);
262
- await sleep(backoff);
263
- }
264
- }
265
- }
266
-
267
- throw lastError;
213
+ const { imageSize = "1K", maxRetries = 3 } = options;
214
+
215
+ const apiKey = process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY;
216
+ if (!apiKey) {
217
+ throw new Error(
218
+ "GEMINI_API_KEY not configured. Set GEMINI_API_KEY in your environment.",
219
+ );
220
+ }
221
+
222
+ const endpoint = `${GEMINI_API_BASE}/models/${GEMINI_IMAGE_MODEL}:generateContent`;
223
+
224
+ const requestBody = {
225
+ contents: [
226
+ {
227
+ role: "user",
228
+ parts: [
229
+ {
230
+ text: prompt,
231
+ },
232
+ ],
233
+ },
234
+ ],
235
+ generationConfig: {
236
+ responseModalities: ["IMAGE", "TEXT"],
237
+ imageConfig: {
238
+ image_size: imageSize,
239
+ },
240
+ },
241
+ };
242
+
243
+ let lastError;
244
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
245
+ try {
246
+ const response = await fetch(`${endpoint}?key=${apiKey}`, {
247
+ method: "POST",
248
+ headers: {
249
+ "Content-Type": "application/json",
250
+ },
251
+ body: JSON.stringify(requestBody),
252
+ });
253
+
254
+ if (!response.ok) {
255
+ const errorText = await response.text();
256
+ throw new Error(`Gemini API error: ${response.status} - ${errorText}`);
257
+ }
258
+
259
+ const data = await response.json();
260
+
261
+ // Handle streaming response format (array of candidates)
262
+ const candidates = Array.isArray(data) ? data : data.candidates || [data];
263
+
264
+ for (const candidate of candidates) {
265
+ const content = candidate.content || candidate;
266
+ const parts = content.parts || [];
267
+
268
+ for (const part of parts) {
269
+ // Check for inline image data
270
+ if (part.inlineData && part.inlineData.data) {
271
+ return {
272
+ data: part.inlineData.data,
273
+ mimeType: part.inlineData.mimeType || "image/png",
274
+ service: "gemini",
275
+ model: GEMINI_IMAGE_MODEL,
276
+ cost: PRICING.gemini,
277
+ };
278
+ }
279
+
280
+ // Check for file data reference
281
+ if (part.fileData && part.fileData.fileUri) {
282
+ const imageResponse = await fetch(part.fileData.fileUri);
283
+ const imageBuffer = await imageResponse.arrayBuffer();
284
+ const base64Data = Buffer.from(imageBuffer).toString("base64");
285
+
286
+ return {
287
+ data: base64Data,
288
+ mimeType: part.fileData.mimeType || "image/png",
289
+ service: "gemini",
290
+ model: GEMINI_IMAGE_MODEL,
291
+ cost: PRICING.gemini,
292
+ };
293
+ }
294
+ }
295
+ }
296
+
297
+ throw new Error("No image data in Gemini response");
298
+ } catch (error) {
299
+ lastError = error;
300
+
301
+ if (attempt < maxRetries) {
302
+ const backoff = Math.pow(2, attempt) * 1000;
303
+ console.log(
304
+ `⚠️ Gemini attempt ${attempt} failed: ${error.message}. Retrying in ${backoff / 1000}s...`,
305
+ );
306
+ await sleep(backoff);
307
+ }
308
+ }
309
+ }
310
+
311
+ throw lastError;
268
312
  }
269
313
 
270
314
  /**
271
- * Generate image using Google Imagen 4 (via Vertex AI)
272
- * Premium quality image generation
273
- *
274
- * Requires Vertex AI setup:
275
- * - GOOGLE_CLOUD_PROJECT_ID environment variable
276
- * - GOOGLE_CLOUD_LOCATION environment variable (default: us-central1)
277
- * - GOOGLE_APPLICATION_CREDENTIALS pointing to service account key JSON
315
+ * Generate image using Google Imagen 3 (via Gemini API)
316
+ * Premium quality image generation with simple API key authentication
278
317
  *
279
318
  * @param {string} prompt - Image description
280
319
  * @param {Object} options - Generation options
281
- * @param {string} options.size - Image size (256x256, 1024x1024)
320
+ * @param {string} options.aspectRatio - Aspect ratio (1:1, 16:9, 9:16, 4:3, 3:4)
321
+ * @param {number} options.numberOfImages - Number of images to generate (1-4)
282
322
  * @param {number} options.maxRetries - Maximum retry attempts
283
323
  * @returns {Promise<Object>} Generated image data
284
324
  */
285
325
  export async function generateImageImagen(prompt, options = {}) {
286
- const {
287
- size = '1024x1024',
288
- maxRetries = 3
289
- } = options;
290
-
291
- // Vertex AI configuration
292
- const projectId = process.env.GOOGLE_CLOUD_PROJECT_ID;
293
- const location = process.env.GOOGLE_CLOUD_LOCATION || 'us-central1';
294
-
295
- if (!projectId) {
296
- throw new Error('GOOGLE_CLOUD_PROJECT_ID not configured. Set up Vertex AI credentials.');
297
- }
298
-
299
- // Build Vertex AI endpoint
300
- const endpoint = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/imagen-4.0-generate-001:predict`;
301
-
302
- const requestBody = {
303
- instances: [{
304
- prompt: prompt
305
- }],
306
- parameters: {
307
- sampleCount: 1,
308
- aspectRatio: size === '1024x1024' ? '1:1' : (size.includes('1792') ? '16:9' : '1:1'),
309
- safetyFilterLevel: 'block_some',
310
- personGeneration: 'allow_adult'
311
- }
312
- };
313
-
314
- let lastError;
315
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
316
- try {
317
- // Get OAuth2 access token
318
- const token = await getVertexAIToken();
319
-
320
- const response = await fetch(endpoint, {
321
- method: 'POST',
322
- headers: {
323
- 'Authorization': `Bearer ${token}`,
324
- 'Content-Type': 'application/json'
325
- },
326
- body: JSON.stringify(requestBody)
327
- });
328
-
329
- if (!response.ok) {
330
- const errorText = await response.text();
331
- throw new Error(`Imagen API error: ${response.status} - ${errorText}`);
332
- }
333
-
334
- const data = await response.json();
335
-
336
- if (data.predictions && data.predictions[0]) {
337
- const prediction = data.predictions[0];
338
-
339
- // Imagen 4 returns base64-encoded image in bytesBase64Encoded
340
- if (prediction.bytesBase64Encoded) {
341
- return {
342
- data: prediction.bytesBase64Encoded,
343
- mimeType: prediction.mimeType || 'image/png',
344
- service: 'imagen',
345
- cost: PRICING.imagen
346
- };
347
- }
348
- }
349
-
350
- throw new Error('No image data in Imagen response');
351
-
352
- } catch (error) {
353
- lastError = error;
354
-
355
- if (attempt < maxRetries) {
356
- const backoff = Math.pow(2, attempt) * 1000;
357
- console.log(`⚠️ Imagen attempt ${attempt} failed. Retrying in ${backoff/1000}s...`);
358
- await sleep(backoff);
359
- }
360
- }
361
- }
362
-
363
- throw lastError;
326
+ const { aspectRatio = "1:1", numberOfImages = 1, maxRetries = 3 } = options;
327
+
328
+ const apiKey = process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY;
329
+ if (!apiKey) {
330
+ throw new Error(
331
+ "GEMINI_API_KEY not configured. Set GEMINI_API_KEY in your environment.",
332
+ );
333
+ }
334
+
335
+ const endpoint = `${GEMINI_API_BASE}/models/${GEMINI_IMAGEN_MODEL}:generateImages`;
336
+
337
+ const requestBody = {
338
+ prompt: prompt,
339
+ config: {
340
+ numberOfImages: Math.min(numberOfImages, 4),
341
+ aspectRatio: aspectRatio,
342
+ safetyFilterLevel: "BLOCK_MEDIUM_AND_ABOVE",
343
+ },
344
+ };
345
+
346
+ let lastError;
347
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
348
+ try {
349
+ const response = await fetch(`${endpoint}?key=${apiKey}`, {
350
+ method: "POST",
351
+ headers: {
352
+ "Content-Type": "application/json",
353
+ },
354
+ body: JSON.stringify(requestBody),
355
+ });
356
+
357
+ if (!response.ok) {
358
+ const errorText = await response.text();
359
+ throw new Error(`Imagen API error: ${response.status} - ${errorText}`);
360
+ }
361
+
362
+ const data = await response.json();
363
+
364
+ // Imagen returns images array with base64 encoded data
365
+ if (data.generatedImages && data.generatedImages[0]) {
366
+ const image = data.generatedImages[0];
367
+
368
+ if (image.image && image.image.imageBytes) {
369
+ return {
370
+ data: image.image.imageBytes,
371
+ mimeType: "image/png",
372
+ service: "imagen",
373
+ model: GEMINI_IMAGEN_MODEL,
374
+ cost: PRICING.imagen,
375
+ };
376
+ }
377
+ }
378
+
379
+ // Alternative response format
380
+ if (data.images && data.images[0]) {
381
+ const image = data.images[0];
382
+
383
+ if (image.bytesBase64Encoded || image.imageBytes) {
384
+ return {
385
+ data: image.bytesBase64Encoded || image.imageBytes,
386
+ mimeType: "image/png",
387
+ service: "imagen",
388
+ model: GEMINI_IMAGEN_MODEL,
389
+ cost: PRICING.imagen,
390
+ };
391
+ }
392
+ }
393
+
394
+ throw new Error("No image data in Imagen response");
395
+ } catch (error) {
396
+ lastError = error;
397
+
398
+ if (attempt < maxRetries) {
399
+ const backoff = Math.pow(2, attempt) * 1000;
400
+ console.log(
401
+ `⚠️ Imagen attempt ${attempt} failed: ${error.message}. Retrying in ${backoff / 1000}s...`,
402
+ );
403
+ await sleep(backoff);
404
+ }
405
+ }
406
+ }
407
+
408
+ throw lastError;
364
409
  }
365
410
 
366
411
  /**
367
- * Generate image using OpenAI DALL-E 3
368
- * Creative, high-quality image generation
412
+ * Generate image using OpenAI GPT Image API
413
+ * State-of-the-art image generation with best text rendering
414
+ *
415
+ * Features:
416
+ * - Best-in-class text rendering in images
417
+ * - Multiple quality tiers (low, medium, high)
418
+ * - Transparency support (PNG with transparent background)
419
+ * - Multiple output formats (PNG, JPEG, WebP)
369
420
  *
370
421
  * @param {string} prompt - Image description
371
422
  * @param {Object} options - Generation options
372
- * @param {string} options.size - Image size (1024x1024, 1024x1792, 1792x1024)
373
- * @param {string} options.quality - Quality level (standard, hd)
374
- * @param {string} options.style - Style (vivid, natural)
423
+ * @param {string} options.model - Model (gpt-image-1.5, gpt-image-1, gpt-image-1-mini)
424
+ * @param {string} options.size - Image size (1024x1024, 1536x1024, 1024x1536, auto)
425
+ * @param {string} options.quality - Quality level (low, medium, high, auto)
426
+ * @param {string} options.outputFormat - Output format (png, jpeg, webp)
427
+ * @param {string} options.background - Background type (transparent, opaque, auto)
375
428
  * @param {number} options.maxRetries - Maximum retry attempts
376
- * @returns {Promise<Object>} Generated image data with URL
429
+ * @returns {Promise<Object>} Generated image data
377
430
  */
378
- export async function generateImageDALLE(prompt, options = {}) {
379
- const {
380
- size = '1024x1024',
381
- quality = 'high', // low, medium, or high
382
- maxRetries = 3
383
- } = options;
384
-
385
- const apiKey = process.env.OPENAI_API_KEY;
386
- if (!apiKey) {
387
- throw new Error('OPENAI_API_KEY not configured');
388
- }
389
-
390
- const endpoint = `${OPENAI_API_BASE}/images/generations`;
391
-
392
- // Try GPT-Image-1 first, fall back to DALL-E 3 if not available
393
- const model = 'dall-e-3'; // Will use gpt-image-1 once org is verified
394
-
395
- const requestBody = {
396
- model: model,
397
- prompt: prompt,
398
- n: 1,
399
- size: size,
400
- ...(model === 'gpt-image-1' ? { quality } : { quality: quality === 'low' ? 'standard' : 'hd' })
401
- };
402
-
403
- let lastError;
404
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
405
- try {
406
- const response = await fetch(endpoint, {
407
- method: 'POST',
408
- headers: {
409
- 'Content-Type': 'application/json',
410
- 'Authorization': `Bearer ${apiKey}`
411
- },
412
- body: JSON.stringify(requestBody)
413
- });
414
-
415
- if (!response.ok) {
416
- const errorData = await response.json();
417
- throw new Error(`DALL-E API error: ${response.status} - ${errorData.error?.message || 'Unknown error'}`);
418
- }
419
-
420
- const data = await response.json();
421
-
422
- if (data.data && data.data[0]) {
423
- const image = data.data[0];
424
-
425
- return {
426
- url: image.url,
427
- revisedPrompt: image.revised_prompt, // DALL-E often revises prompts
428
- mimeType: 'image/png',
429
- service: 'dalle',
430
- cost: estimateCost('dalle', { quality, size })
431
- };
432
- }
433
-
434
- throw new Error('No image data in DALL-E response');
435
-
436
- } catch (error) {
437
- lastError = error;
438
-
439
- if (error.message.includes('rate_limit')) {
440
- if (attempt < maxRetries) {
441
- const backoff = Math.pow(2, attempt) * 1000;
442
- console.log(`⚠️ DALL-E rate limited. Retrying in ${backoff/1000}s...`);
443
- await sleep(backoff);
444
- }
445
- } else {
446
- throw error; // Don't retry on other errors
447
- }
448
- }
449
- }
450
-
451
- throw lastError;
431
+ export async function generateImageOpenAI(prompt, options = {}) {
432
+ const {
433
+ model = "gpt-image-1.5",
434
+ size = "1024x1024",
435
+ quality = "high",
436
+ outputFormat = "png",
437
+ background = "auto",
438
+ maxRetries = 3,
439
+ } = options;
440
+
441
+ const apiKey = process.env.OPENAI_API_KEY;
442
+ if (!apiKey) {
443
+ throw new Error(
444
+ "OPENAI_API_KEY not configured. Get your key from https://platform.openai.com/api-keys",
445
+ );
446
+ }
447
+
448
+ const endpoint = `${OPENAI_API_BASE}/images/generations`;
449
+
450
+ const requestBody = {
451
+ model: OPENAI_IMAGE_MODELS[model] || model,
452
+ prompt: prompt,
453
+ n: 1,
454
+ size: size,
455
+ quality: quality,
456
+ output_format: outputFormat,
457
+ };
458
+
459
+ // Add background for PNG format (transparency support)
460
+ if (outputFormat === "png" && background !== "auto") {
461
+ requestBody.background = background;
462
+ }
463
+
464
+ let lastError;
465
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
466
+ try {
467
+ const response = await fetch(endpoint, {
468
+ method: "POST",
469
+ headers: {
470
+ "Content-Type": "application/json",
471
+ Authorization: `Bearer ${apiKey}`,
472
+ },
473
+ body: JSON.stringify(requestBody),
474
+ });
475
+
476
+ if (!response.ok) {
477
+ const errorText = await response.text();
478
+ let errorMessage = `OpenAI API error: ${response.status}`;
479
+ try {
480
+ const errorData = JSON.parse(errorText);
481
+ errorMessage = errorData.error?.message || errorMessage;
482
+ } catch {
483
+ errorMessage = `${errorMessage} - ${errorText}`;
484
+ }
485
+ throw new Error(errorMessage);
486
+ }
487
+
488
+ const data = await response.json();
489
+
490
+ // OpenAI returns base64-encoded image data
491
+ if (data.data && data.data[0]) {
492
+ const imageData = data.data[0];
493
+
494
+ // Handle base64 response (primary)
495
+ if (imageData.b64_json) {
496
+ return {
497
+ data: imageData.b64_json,
498
+ mimeType:
499
+ outputFormat === "jpeg"
500
+ ? "image/jpeg"
501
+ : outputFormat === "webp"
502
+ ? "image/webp"
503
+ : "image/png",
504
+ service: "openai",
505
+ model: model,
506
+ cost: estimateCost(model, { quality }),
507
+ revisedPrompt: imageData.revised_prompt,
508
+ };
509
+ }
510
+
511
+ // Handle URL response (fallback)
512
+ if (imageData.url) {
513
+ const imageResponse = await fetch(imageData.url);
514
+ const imageBuffer = await imageResponse.arrayBuffer();
515
+ const base64Data = Buffer.from(imageBuffer).toString("base64");
516
+
517
+ return {
518
+ data: base64Data,
519
+ mimeType:
520
+ outputFormat === "jpeg"
521
+ ? "image/jpeg"
522
+ : outputFormat === "webp"
523
+ ? "image/webp"
524
+ : "image/png",
525
+ service: "openai",
526
+ model: model,
527
+ cost: estimateCost(model, { quality }),
528
+ revisedPrompt: imageData.revised_prompt,
529
+ };
530
+ }
531
+ }
532
+
533
+ throw new Error("No image data in OpenAI response");
534
+ } catch (error) {
535
+ lastError = error;
536
+
537
+ if (attempt < maxRetries) {
538
+ const backoff = Math.pow(2, attempt) * 1000;
539
+ console.log(
540
+ `⚠️ OpenAI attempt ${attempt} failed: ${error.message}. Retrying in ${backoff / 1000}s...`,
541
+ );
542
+ await sleep(backoff);
543
+ }
544
+ }
545
+ }
546
+
547
+ throw lastError;
452
548
  }
453
549
 
454
550
  /**
455
- * Generate video using Google Veo 2
456
- * AI video generation from text prompts
551
+ * Generate image using FLUX 2 (via Fal.ai)
552
+ * State-of-the-art image generation from Black Forest Labs
457
553
  *
458
- * @param {string} prompt - Video description
554
+ * Features:
555
+ * - Multi-reference support (up to 10 images)
556
+ * - Enhanced photorealism
557
+ * - Complex typography and UI mockups
558
+ * - Image editing up to 4 megapixels
559
+ *
560
+ * @param {string} prompt - Image description
459
561
  * @param {Object} options - Generation options
460
- * @param {number} options.duration - Video duration in seconds (max 8)
461
- * @param {string} options.aspectRatio - Aspect ratio (16:9, 9:16, 1:1)
562
+ * @param {string} options.model - FLUX 2 model (flux2-pro, flux2-flex, flux2-dev)
563
+ * @param {string} options.size - Image size (square, landscape, portrait)
564
+ * @param {number} options.steps - Number of inference steps (flux2-flex only)
565
+ * @param {number} options.guidance - Guidance scale (flux2-flex only)
566
+ * @param {Array<string>} options.referenceImages - Reference image URLs (up to 10)
462
567
  * @param {number} options.maxRetries - Maximum retry attempts
463
- * @returns {Promise<Object>} Generated video data
568
+ * @returns {Promise<Object>} Generated image data
464
569
  */
465
- export async function generateVideoVeo(prompt, options = {}) {
466
- const {
467
- duration = 5,
468
- aspectRatio = '16:9',
469
- maxRetries = 3
470
- } = options;
471
-
472
- const apiKey = process.env.GOOGLE_API_KEY;
473
- if (!apiKey) {
474
- throw new Error('GOOGLE_API_KEY not configured');
475
- }
476
-
477
- const endpoint = `${GOOGLE_API_BASE}/models/veo-2.0-generate-001:predict`;
478
-
479
- const requestBody = {
480
- instances: [{
481
- prompt: prompt
482
- }],
483
- parameters: {
484
- duration: Math.min(duration, 8), // Max 8 seconds
485
- aspectRatio: aspectRatio,
486
- quality: '720p'
487
- }
488
- };
489
-
490
- let lastError;
491
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
492
- try {
493
- const response = await fetch(`${endpoint}?key=${apiKey}`, {
494
- method: 'POST',
495
- headers: {
496
- 'Content-Type': 'application/json'
497
- },
498
- body: JSON.stringify(requestBody)
499
- });
500
-
501
- if (!response.ok) {
502
- const errorText = await response.text();
503
- throw new Error(`Veo API error: ${response.status} - ${errorText}`);
504
- }
505
-
506
- const data = await response.json();
507
-
508
- if (data.predictions && data.predictions[0]) {
509
- const prediction = data.predictions[0];
510
-
511
- // Veo returns video data or URL
512
- if (prediction.videoData || prediction.url) {
513
- return {
514
- data: prediction.videoData,
515
- url: prediction.url,
516
- mimeType: 'video/mp4',
517
- service: 'veo',
518
- cost: PRICING.veo,
519
- duration: duration
520
- };
521
- }
522
- }
523
-
524
- throw new Error('No video data in Veo response');
525
-
526
- } catch (error) {
527
- lastError = error;
528
-
529
- if (attempt < maxRetries) {
530
- const backoff = Math.pow(2, attempt) * 2000; // Longer backoff for video
531
- console.log(`⚠️ Veo attempt ${attempt} failed. Retrying in ${backoff/1000}s...`);
532
- await sleep(backoff);
533
- }
534
- }
535
- }
536
-
537
- throw lastError;
570
+ export async function generateImageFlux2(prompt, options = {}) {
571
+ const {
572
+ model = "flux2-pro",
573
+ size = "square",
574
+ steps = 28,
575
+ guidance = 3.5,
576
+ referenceImages = [],
577
+ maxRetries = 3,
578
+ } = options;
579
+
580
+ const apiKey = process.env.FAL_KEY || process.env.BFL_API_KEY;
581
+ if (!apiKey) {
582
+ throw new Error(
583
+ "FAL_KEY not configured. Get your key from https://fal.ai/dashboard/keys",
584
+ );
585
+ }
586
+
587
+ // Import fal.ai client
588
+ const { fal } = await import("@fal-ai/client");
589
+ fal.config({ credentials: apiKey });
590
+
591
+ // Get endpoint for model
592
+ const endpoint = FLUX2_MODELS[model] || FLUX2_MODELS["flux2-pro"];
593
+
594
+ // Build input based on model capabilities
595
+ const input = {
596
+ prompt: prompt,
597
+ image_size: size === "1024x1024" ? "square" : size,
598
+ num_images: 1,
599
+ };
600
+
601
+ // FLUX 2 Flex supports custom parameters
602
+ if (model === "flux2-flex") {
603
+ input.num_inference_steps = steps;
604
+ input.guidance_scale = guidance;
605
+ }
606
+
607
+ // Add reference images if provided (FLUX 2 multi-reference feature)
608
+ if (referenceImages.length > 0) {
609
+ input.reference_images = referenceImages.slice(0, 10); // Max 10
610
+ }
611
+
612
+ let lastError;
613
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
614
+ try {
615
+ const result = await fal.subscribe(endpoint, {
616
+ input,
617
+ logs: false,
618
+ });
619
+
620
+ // Extract image from result
621
+ let imageUrl;
622
+ let contentType = "image/png";
623
+
624
+ if (result.data?.images?.[0]) {
625
+ imageUrl = result.data.images[0].url;
626
+ contentType = result.data.images[0].content_type || "image/png";
627
+ } else if (result.images?.[0]) {
628
+ imageUrl = result.images[0].url;
629
+ contentType = result.images[0].content_type || "image/png";
630
+ } else if (result.image?.url) {
631
+ imageUrl = result.image.url;
632
+ }
633
+
634
+ if (imageUrl) {
635
+ // Fetch and convert to base64
636
+ const imageResponse = await fetch(imageUrl);
637
+ const imageBuffer = await imageResponse.arrayBuffer();
638
+ const base64Data = Buffer.from(imageBuffer).toString("base64");
639
+
640
+ return {
641
+ data: base64Data,
642
+ mimeType: contentType,
643
+ service: "flux2",
644
+ model: model,
645
+ cost: PRICING[model.replace("-", "_")] || PRICING.flux2_pro,
646
+ };
647
+ }
648
+
649
+ throw new Error("No image data in FLUX 2 response");
650
+ } catch (error) {
651
+ lastError = error;
652
+
653
+ if (attempt < maxRetries) {
654
+ const backoff = Math.pow(2, attempt) * 1000;
655
+ console.log(
656
+ `⚠️ FLUX 2 attempt ${attempt} failed: ${error.message}. Retrying in ${backoff / 1000}s...`,
657
+ );
658
+ await sleep(backoff);
659
+ }
660
+ }
661
+ }
662
+
663
+ throw lastError;
538
664
  }
539
665
 
540
666
  /**
541
- * Generate image using Fal.ai
542
- * Access to FLUX and other premium models
667
+ * Generate image using legacy FLUX 1.x (via Fal.ai)
668
+ * Still available for backwards compatibility
543
669
  *
544
670
  * @param {string} prompt - Image description
545
671
  * @param {Object} options - Generation options
546
- * @param {string} options.model - Fal.ai model (flux-pro, flux-dev, nano-banana, imagen-3-fast)
672
+ * @param {string} options.model - FLUX model (flux-pro, flux-dev)
547
673
  * @param {string} options.size - Image size
548
674
  * @param {number} options.maxRetries - Maximum retry attempts
549
675
  * @returns {Promise<Object>} Generated image data
550
676
  */
551
677
  export async function generateImageFal(prompt, options = {}) {
552
- const {
553
- model = 'flux-pro',
554
- size = '1024x1024',
555
- maxRetries = 3
556
- } = options;
557
-
558
- const apiKey = process.env.FAL_KEY;
559
- if (!apiKey) {
560
- throw new Error('FAL_KEY not configured. Get your key from https://fal.ai/dashboard/keys');
561
- }
562
-
563
- // Import fal.ai client
564
- const { fal } = await import('@fal-ai/client');
565
-
566
- // Configure credentials
567
- fal.config({ credentials: apiKey });
568
-
569
- // Map model names to fal.ai endpoints
570
- const modelMap = {
571
- 'flux-pro': 'fal-ai/flux-pro/v1.1-ultra',
572
- 'flux-dev': 'fal-ai/flux/dev',
573
- 'nano-banana': 'fal-ai/nano-banana',
574
- 'imagen-3-fast': 'fal-ai/fast-imagen'
575
- };
576
-
577
- const endpoint = modelMap[model] || modelMap['flux-pro'];
578
-
579
- let lastError;
580
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
581
- try {
582
- const result = await fal.subscribe(endpoint, {
583
- input: {
584
- prompt: prompt,
585
- image_size: size === '1024x1024' ? 'square' : 'landscape',
586
- num_images: 1
587
- },
588
- logs: false
589
- });
590
-
591
- // Fal.ai can return images in different formats
592
- let imageUrl;
593
- let contentType = 'image/png';
594
-
595
- if (result.data && result.data.images && result.data.images[0]) {
596
- imageUrl = result.data.images[0].url;
597
- contentType = result.data.images[0].content_type || 'image/png';
598
- } else if (result.images && result.images[0]) {
599
- imageUrl = result.images[0].url;
600
- contentType = result.images[0].content_type || 'image/png';
601
- } else if (result.image && result.image.url) {
602
- imageUrl = result.image.url;
603
- } else if (result.data && result.data[0] && result.data[0].url) {
604
- imageUrl = result.data[0].url;
605
- } else if (typeof result === 'string') {
606
- imageUrl = result;
607
- }
608
-
609
- if (imageUrl) {
610
- // Fal.ai returns URL, need to fetch and convert to base64
611
- const imageResponse = await fetch(imageUrl);
612
- const imageBuffer = await imageResponse.arrayBuffer();
613
- const base64Data = Buffer.from(imageBuffer).toString('base64');
614
-
615
- return {
616
- data: base64Data,
617
- mimeType: contentType,
618
- service: 'fal',
619
- model: model,
620
- cost: PRICING[`${model.replace('-', '_')}`] || PRICING.flux_pro
621
- };
622
- }
623
-
624
- throw new Error('No image data in Fal.ai response');
625
-
626
- } catch (error) {
627
- lastError = error;
628
-
629
- if (attempt < maxRetries) {
630
- const backoff = Math.pow(2, attempt) * 1000;
631
- console.log(`⚠️ Fal.ai attempt ${attempt} failed. Retrying in ${backoff/1000}s...`);
632
- await sleep(backoff);
633
- }
634
- }
635
- }
636
-
637
- throw lastError;
678
+ const { model = "flux-pro", size = "1024x1024", maxRetries = 3 } = options;
679
+
680
+ const apiKey = process.env.FAL_KEY;
681
+ if (!apiKey) {
682
+ throw new Error(
683
+ "FAL_KEY not configured. Get your key from https://fal.ai/dashboard/keys",
684
+ );
685
+ }
686
+
687
+ const { fal } = await import("@fal-ai/client");
688
+ fal.config({ credentials: apiKey });
689
+
690
+ const endpoint = FLUX2_MODELS[model] || FLUX2_MODELS["flux-pro"];
691
+
692
+ let lastError;
693
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
694
+ try {
695
+ const result = await fal.subscribe(endpoint, {
696
+ input: {
697
+ prompt: prompt,
698
+ image_size: size === "1024x1024" ? "square" : "landscape",
699
+ num_images: 1,
700
+ },
701
+ logs: false,
702
+ });
703
+
704
+ let imageUrl;
705
+ let contentType = "image/png";
706
+
707
+ if (result.data?.images?.[0]) {
708
+ imageUrl = result.data.images[0].url;
709
+ contentType = result.data.images[0].content_type || "image/png";
710
+ } else if (result.images?.[0]) {
711
+ imageUrl = result.images[0].url;
712
+ contentType = result.images[0].content_type || "image/png";
713
+ } else if (result.image?.url) {
714
+ imageUrl = result.image.url;
715
+ }
716
+
717
+ if (imageUrl) {
718
+ const imageResponse = await fetch(imageUrl);
719
+ const imageBuffer = await imageResponse.arrayBuffer();
720
+ const base64Data = Buffer.from(imageBuffer).toString("base64");
721
+
722
+ return {
723
+ data: base64Data,
724
+ mimeType: contentType,
725
+ service: "fal",
726
+ model: model,
727
+ cost: PRICING[model.replace("-", "_")] || PRICING.flux_pro,
728
+ };
729
+ }
730
+
731
+ throw new Error("No image data in Fal.ai response");
732
+ } catch (error) {
733
+ lastError = error;
734
+
735
+ if (attempt < maxRetries) {
736
+ const backoff = Math.pow(2, attempt) * 1000;
737
+ console.log(
738
+ `⚠️ Fal.ai attempt ${attempt} failed: ${error.message}. Retrying in ${backoff / 1000}s...`,
739
+ );
740
+ await sleep(backoff);
741
+ }
742
+ }
743
+ }
744
+
745
+ throw lastError;
638
746
  }
639
747
 
640
748
  /**
641
- * Generate video using Fal.ai (Veo 3)
642
- * Latest video generation models
749
+ * Generate video using Veo 3 (via Fal.ai)
750
+ * Latest video generation with outstanding quality
643
751
  *
644
752
  * @param {string} prompt - Video description
645
753
  * @param {Object} options - Generation options
@@ -649,68 +757,69 @@ export async function generateImageFal(prompt, options = {}) {
649
757
  * @param {number} options.maxRetries - Maximum retry attempts
650
758
  * @returns {Promise<Object>} Generated video data
651
759
  */
652
- export async function generateVideoFal(prompt, options = {}) {
653
- const {
654
- model = 'veo3',
655
- duration = 5,
656
- aspectRatio = '16:9',
657
- maxRetries = 3
658
- } = options;
659
-
660
- const apiKey = process.env.FAL_KEY;
661
- if (!apiKey) {
662
- throw new Error('FAL_KEY not configured. Get your key from https://fal.ai/dashboard/keys');
663
- }
664
-
665
- // Import fal.ai client
666
- const { fal } = await import('@fal-ai/client');
667
-
668
- // Configure credentials
669
- fal.config({ credentials: apiKey });
670
-
671
- const endpoint = model === 'veo3-fast' ? 'fal-ai/veo3-fast' : 'fal-ai/veo3';
672
-
673
- let lastError;
674
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
675
- try {
676
- const result = await fal.subscribe(endpoint, {
677
- input: {
678
- prompt: prompt,
679
- duration: Math.min(duration, 10),
680
- aspect_ratio: aspectRatio
681
- },
682
- logs: false
683
- });
684
-
685
- if (result.video && result.video.url) {
686
- // Fal.ai returns video URL
687
- const videoResponse = await fetch(result.video.url);
688
- const videoBuffer = await videoResponse.arrayBuffer();
689
- const base64Data = Buffer.from(videoBuffer).toString('base64');
690
-
691
- return {
692
- data: base64Data,
693
- mimeType: 'video/mp4',
694
- service: 'fal',
695
- model: model,
696
- cost: PRICING.veo3 * duration
697
- };
698
- }
699
-
700
- throw new Error('No video data in Fal.ai response');
701
-
702
- } catch (error) {
703
- lastError = error;
704
-
705
- if (attempt < maxRetries) {
706
- const backoff = Math.pow(2, attempt) * 1000;
707
- console.log(`⚠️ Fal.ai video attempt ${attempt} failed. Retrying in ${backoff/1000}s...`);
708
- await sleep(backoff);
709
- }
710
- }
711
- }
712
-
713
- throw lastError;
760
+ export async function generateVideoVeo3(prompt, options = {}) {
761
+ const {
762
+ model = "veo3",
763
+ duration = 5,
764
+ aspectRatio = "16:9",
765
+ maxRetries = 3,
766
+ } = options;
767
+
768
+ const apiKey = process.env.FAL_KEY;
769
+ if (!apiKey) {
770
+ throw new Error(
771
+ "FAL_KEY not configured. Get your key from https://fal.ai/dashboard/keys",
772
+ );
773
+ }
774
+
775
+ const { fal } = await import("@fal-ai/client");
776
+ fal.config({ credentials: apiKey });
777
+
778
+ const endpoint = model === "veo3-fast" ? "fal-ai/veo3-fast" : "fal-ai/veo3";
779
+
780
+ let lastError;
781
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
782
+ try {
783
+ const result = await fal.subscribe(endpoint, {
784
+ input: {
785
+ prompt: prompt,
786
+ duration: Math.min(duration, 10),
787
+ aspect_ratio: aspectRatio,
788
+ },
789
+ logs: false,
790
+ });
791
+
792
+ if (result.video?.url) {
793
+ const videoResponse = await fetch(result.video.url);
794
+ const videoBuffer = await videoResponse.arrayBuffer();
795
+ const base64Data = Buffer.from(videoBuffer).toString("base64");
796
+
797
+ return {
798
+ data: base64Data,
799
+ url: result.video.url,
800
+ mimeType: "video/mp4",
801
+ service: "veo3",
802
+ model: model,
803
+ cost: PRICING.veo3 * duration,
804
+ duration: duration,
805
+ };
806
+ }
807
+
808
+ throw new Error("No video data in Veo 3 response");
809
+ } catch (error) {
810
+ lastError = error;
811
+
812
+ if (attempt < maxRetries) {
813
+ const backoff = Math.pow(2, attempt) * 1000;
814
+ console.log(
815
+ `⚠️ Veo 3 attempt ${attempt} failed: ${error.message}. Retrying in ${backoff / 1000}s...`,
816
+ );
817
+ await sleep(backoff);
818
+ }
819
+ }
820
+ }
821
+
822
+ throw lastError;
714
823
  }
715
824
 
716
825
  /**
@@ -720,14 +829,14 @@ export async function generateVideoFal(prompt, options = {}) {
720
829
  * @returns {Promise<Buffer>} Image buffer
721
830
  */
722
831
  export async function downloadImage(url) {
723
- const response = await fetch(url);
832
+ const response = await fetch(url);
724
833
 
725
- if (!response.ok) {
726
- throw new Error(`Failed to download image: ${response.status}`);
727
- }
834
+ if (!response.ok) {
835
+ throw new Error(`Failed to download image: ${response.status}`);
836
+ }
728
837
 
729
- const arrayBuffer = await response.arrayBuffer();
730
- return Buffer.from(arrayBuffer);
838
+ const arrayBuffer = await response.arrayBuffer();
839
+ return Buffer.from(arrayBuffer);
731
840
  }
732
841
 
733
842
  /**
@@ -736,80 +845,127 @@ export async function downloadImage(url) {
736
845
  *
737
846
  * @param {string} prompt - Image description
738
847
  * @param {Object} options - Generation options
739
- * @param {string} options.preferredService - Preferred service (gemini, imagen, dalle)
848
+ * @param {string} options.preferredService - Preferred service
740
849
  * @param {string} options.type - Image type for optimization (hero, illustration, diagram)
741
850
  * @returns {Promise<Object>} Generated image data with buffer
742
851
  */
743
852
  export async function generateImage(prompt, options = {}) {
744
- const { preferredService, type = 'general', ...serviceOptions } = options;
745
-
746
- // Select service
747
- const defaultService = process.env.VISUAL_DEFAULT_SERVICE || 'gemini';
748
- const service = selectBestService(preferredService || defaultService);
749
-
750
- console.log(`🎨 Generating ${type} image using ${service}...`);
751
-
752
- // Enhance prompt based on image type
753
- const enhancedPrompt = enhancePrompt(prompt, type);
754
-
755
- // Generate based on service
756
- let result;
757
- switch (service) {
758
- case 'gemini':
759
- result = await generateImageGemini(enhancedPrompt, serviceOptions);
760
- break;
761
-
762
- case 'imagen':
763
- result = await generateImageImagen(enhancedPrompt, serviceOptions);
764
- break;
765
-
766
- case 'dalle':
767
- result = await generateImageDALLE(enhancedPrompt, serviceOptions);
768
- break;
769
-
770
- case 'flux':
771
- case 'flux-pro':
772
- case 'flux-dev':
773
- result = await generateImageFal(enhancedPrompt, { ...serviceOptions, model: service });
774
- break;
853
+ const { preferredService, type = "general", ...serviceOptions } = options;
854
+
855
+ // Select service
856
+ const defaultService = process.env.VISUAL_DEFAULT_SERVICE || "gemini";
857
+ const service = selectBestService(preferredService || defaultService);
858
+
859
+ const modelInfo = serviceOptions.model ? ` (${serviceOptions.model})` : "";
860
+ console.log(`🎨 Generating ${type} image using ${modelInfo}...`);
861
+
862
+ // Enhance prompt based on image type
863
+ const enhancedPrompt = enhancePrompt(prompt, type);
864
+
865
+ // Generate based on service
866
+ let result;
867
+ switch (service) {
868
+ case "gemini":
869
+ result = await generateImageGemini(enhancedPrompt, serviceOptions);
870
+ break;
871
+
872
+ case "imagen":
873
+ result = await generateImageImagen(enhancedPrompt, serviceOptions);
874
+ break;
875
+
876
+ // OpenAI GPT Image models (SOTA)
877
+ case "gpt-image-1.5":
878
+ case "gpt-image-1":
879
+ case "gpt-image-1-mini":
880
+ result = await generateImageOpenAI(enhancedPrompt, {
881
+ ...serviceOptions,
882
+ model: service,
883
+ });
884
+ break;
885
+
886
+ case "flux2-pro":
887
+ case "flux2-flex":
888
+ case "flux2-dev":
889
+ result = await generateImageFlux2(enhancedPrompt, {
890
+ ...serviceOptions,
891
+ model: service,
892
+ });
893
+ break;
894
+
895
+ case "flux":
896
+ case "flux-pro":
897
+ case "flux-dev":
898
+ case "nano-banana-pro":
899
+ result = await generateImageFal(enhancedPrompt, {
900
+ ...serviceOptions,
901
+ model: serviceOptions.model || service,,
902
+ });
903
+ break;
904
+
905
+ default:
906
+ throw new Error(`Unknown service: ${service}`);
907
+ }
908
+
909
+ // Convert to buffer
910
+ let buffer;
911
+ if (result.data) {
912
+ buffer = Buffer.from(result.data, "base64");
913
+ } else if (result.url) {
914
+ buffer = await downloadImage(result.url);
915
+ } else {
916
+ throw new Error("No image data or URL in response");
917
+ }
918
+
919
+ return {
920
+ ...result,
921
+ buffer,
922
+ prompt: enhancedPrompt,
923
+ originalPrompt: prompt,
924
+ };
925
+ }
775
926
 
776
- default:
777
- throw new Error(`Unknown service: ${service}`);
778
- }
927
+ /**
928
+ * Generate video using auto-selected service
929
+ *
930
+ * @param {string} prompt - Video description
931
+ * @param {Object} options - Generation options
932
+ * @returns {Promise<Object>} Generated video data
933
+ */
934
+ export async function generateVideo(prompt, options = {}) {
935
+ const { preferredService = "veo3", ...serviceOptions } = options;
779
936
 
780
- // Convert to buffer
781
- let buffer;
782
- if (result.data) {
783
- // Base64 encoded data
784
- buffer = Buffer.from(result.data, 'base64');
785
- } else if (result.url) {
786
- // Download from URL
787
- buffer = await downloadImage(result.url);
788
- } else {
789
- throw new Error('No image data or URL in response');
790
- }
937
+ console.log(`🎬 Generating video using ${preferredService}...`);
791
938
 
792
- return {
793
- ...result,
794
- buffer,
795
- prompt: enhancedPrompt,
796
- originalPrompt: prompt
797
- };
939
+ return await generateVideoVeo3(prompt, serviceOptions);
798
940
  }
799
941
 
800
942
  /**
801
943
  * Enhance prompt based on image type
802
944
  *
803
945
  * @param {string} prompt - Original prompt
804
- * @param {string} type - Image type (hero, illustration, diagram, screenshot)
946
+ * @param {string} type - Image type (hero, illustration, diagram, infographic-*, etc.)
805
947
  * @returns {string} Enhanced prompt
806
948
  */
807
949
  function enhancePrompt(prompt, type) {
808
950
  const enhancements = {
951
+ // Standard types
809
952
  hero: 'Professional hero image, high quality, visually striking, suitable for article header:',
810
953
  illustration: 'Clean illustration, professional style, clear and informative:',
811
954
  diagram: 'Technical diagram, clear labels, professional design, easy to understand:',
812
955
  screenshot: 'Professional screenshot, clean interface, high resolution:',
956
+
957
+ // Infographic types (optimized for GPT Image 1.5 text rendering)
958
+ 'infographic-data': 'Clean data visualization infographic with clear large typography, color-coded sections, modern flat design. Include prominent title area, 3-5 data callouts with large numbers, clean minimal layout, professional business style:',
959
+ 'infographic-process': 'Step-by-step process infographic with clearly numbered steps, simple icons for each step, connecting arrows between steps, clean modern design, horizontal or vertical flow layout, each step clearly labeled with action text:',
960
+ 'infographic-comparison': 'Side-by-side comparison infographic with two distinct columns, clear header labels, aligned comparison points, checkmarks for advantages, X marks for disadvantages, professional business style, easy to scan layout:',
961
+ 'infographic-timeline': 'Horizontal timeline infographic with dated milestone markers, small icons at each event point, connecting timeline line, clean modern design, clear date labels, brief event descriptions:',
962
+
963
+ // Technical diagram types
964
+ 'architecture-diagram': 'Technical system architecture diagram with labeled component boxes, directional connection arrows, cloud/server/database icons where appropriate, clear legend area, isometric or clean flat technical illustration style:',
965
+ 'flowchart': 'Professional flowchart with standard shapes - diamonds for decisions, rectangles for processes, ovals for start/end points, clear yes/no branching paths, labeled arrows, clean professional style:',
966
+ 'sequence-diagram': 'Technical sequence diagram showing component interactions with participant boxes at top, vertical lifelines, horizontal arrows with action labels, activation boxes, clean UML-style presentation:',
967
+
968
+ // Default
813
969
  general: 'High quality image, professional style:'
814
970
  };
815
971
 
@@ -817,6 +973,112 @@ function enhancePrompt(prompt, type) {
817
973
  return `${prefix} ${prompt}`;
818
974
  }
819
975
 
976
+ /**
977
+ * Get recommended service for image type
978
+ * Uses user's configured default service if available, otherwise falls back
979
+ * to first available service. Both Gemini and GPT Image 1.5 are excellent
980
+ * for text rendering - let the user choose based on their preference.
981
+ *
982
+ * @param {string} type - Image type (used for logging/future enhancements)
983
+ * @param {Array} availableServices - Optional pre-computed available services
984
+ * @returns {string} Recommended service name
985
+ */
986
+ export function getRecommendedServiceForType(type, availableServices = null) {
987
+ // Get available services if not provided
988
+ if (!availableServices) {
989
+ const validation = validateAPIKeys();
990
+ availableServices = validation.availableServices;
991
+ }
992
+
993
+ // Get user's configured default service
994
+ const userDefault = process.env.VISUAL_DEFAULT_SERVICE;
995
+
996
+ // If user has a configured default and it's available, use it
997
+ if (userDefault && availableServices.includes(userDefault)) {
998
+ return userDefault;
999
+ }
1000
+
1001
+ // Otherwise return first available service
1002
+ if (availableServices.length > 0) {
1003
+ return availableServices[0];
1004
+ }
1005
+
1006
+ // Fallback (will likely fail without API keys, but maintains API compatibility)
1007
+ return 'gemini';
1008
+ }
1009
+
1010
+ /**
1011
+ * Build structured infographic prompt from data
1012
+ *
1013
+ * @param {Object} config - Infographic configuration
1014
+ * @param {string} config.type - Infographic type
1015
+ * @param {string} config.title - Infographic title
1016
+ * @param {Array} config.data - Data points or steps
1017
+ * @param {string} config.style - Style preference
1018
+ * @returns {string} Structured prompt
1019
+ */
1020
+ export function buildInfographicPrompt(config) {
1021
+ const { type, title, data = [], style = 'modern flat design' } = config;
1022
+
1023
+ let prompt = '';
1024
+
1025
+ switch (type) {
1026
+ case 'infographic-data':
1027
+ prompt = `Data visualization infographic titled "${title}". `;
1028
+ prompt += `Display these metrics prominently: `;
1029
+ prompt += data.map(d => `${d.label}: ${d.value}`).join(', ') + '. ';
1030
+ prompt += `Style: ${style}, clear typography, color-coded sections.`;
1031
+ break;
1032
+
1033
+ case 'infographic-process':
1034
+ prompt = `Process flow infographic titled "${title}". `;
1035
+ prompt += `Show these steps in sequence: `;
1036
+ prompt += data.map((step, i) => `Step ${i + 1}: ${step}`).join('; ') + '. ';
1037
+ prompt += `Style: numbered steps with icons, connecting arrows, ${style}.`;
1038
+ break;
1039
+
1040
+ case 'infographic-comparison':
1041
+ prompt = `Comparison infographic titled "${title}". `;
1042
+ prompt += `Compare these aspects: `;
1043
+ prompt += data.map(d => `${d.category} - Option A: ${d.optionA}, Option B: ${d.optionB}`).join('; ') + '. ';
1044
+ prompt += `Style: two-column layout, checkmarks for strengths, ${style}.`;
1045
+ break;
1046
+
1047
+ case 'infographic-timeline':
1048
+ prompt = `Timeline infographic titled "${title}". `;
1049
+ prompt += `Show these milestones: `;
1050
+ prompt += data.map(d => `${d.date}: ${d.event}`).join('; ') + '. ';
1051
+ prompt += `Style: horizontal timeline, dated markers, ${style}.`;
1052
+ break;
1053
+
1054
+ default:
1055
+ prompt = `Infographic: ${title}. ${data.join(', ')}. Style: ${style}.`;
1056
+ }
1057
+
1058
+ return prompt;
1059
+ }
1060
+
1061
+ /**
1062
+ * Build structured architecture diagram prompt
1063
+ *
1064
+ * @param {Object} config - Diagram configuration
1065
+ * @param {string} config.title - Diagram title
1066
+ * @param {Array} config.components - System components
1067
+ * @param {Array} config.connections - Component connections
1068
+ * @param {string} config.style - Style preference
1069
+ * @returns {string} Structured prompt
1070
+ */
1071
+ export function buildArchitectureDiagramPrompt(config) {
1072
+ const { title, components = [], connections = [], style = 'isometric technical' } = config;
1073
+
1074
+ let prompt = `Technical architecture diagram: "${title}". `;
1075
+ prompt += `Components: ${components.join(', ')}. `;
1076
+ prompt += `Connections: ${connections.join('; ')}. `;
1077
+ prompt += `Style: ${style}, labeled boxes, directional arrows, clean professional design.`;
1078
+
1079
+ return prompt;
1080
+ }
1081
+
820
1082
  /**
821
1083
  * Sleep utility for retry backoff
822
1084
  *
@@ -824,7 +1086,7 @@ function enhancePrompt(prompt, type) {
824
1086
  * @returns {Promise<void>}
825
1087
  */
826
1088
  function sleep(ms) {
827
- return new Promise(resolve => setTimeout(resolve, ms));
1089
+ return new Promise((resolve) => setTimeout(resolve, ms));
828
1090
  }
829
1091
 
830
1092
  /**
@@ -834,80 +1096,158 @@ function sleep(ms) {
834
1096
  * @returns {Object} Service information
835
1097
  */
836
1098
  export function getServiceInfo(service) {
837
- const info = {
838
- gemini: {
839
- name: 'Gemini 2.5 Flash Image',
840
- nickname: 'Nano Banana',
841
- speed: 'Fast',
842
- cost: '$0.02/image',
843
- quality: 'Good',
844
- bestFor: 'Quick hero images, high volume',
845
- provider: 'Google AI (Direct)'
846
- },
847
- imagen: {
848
- name: 'Imagen 3',
849
- nickname: 'Premium Quality',
850
- speed: 'Medium',
851
- cost: '$0.03/image',
852
- quality: 'Excellent',
853
- bestFor: 'Premium hero images, photorealistic',
854
- provider: 'Google Vertex AI (Direct)'
855
- },
856
- dalle: {
857
- name: 'DALL-E 3',
858
- nickname: 'Creative',
859
- speed: 'Medium',
860
- cost: '$0.04-0.12/image',
861
- quality: 'Excellent',
862
- bestFor: 'Creative illustrations, concept art',
863
- provider: 'OpenAI (Direct)'
864
- },
865
- veo: {
866
- name: 'Veo 2',
867
- nickname: 'Video Generation',
868
- speed: 'Slow',
869
- cost: '$0.10/video (estimated)',
870
- quality: 'Good',
871
- bestFor: 'Product demos, animated diagrams',
872
- provider: 'Google AI (Direct)'
873
- },
874
- flux: {
875
- name: 'FLUX Pro v1.1 Ultra',
876
- nickname: 'Premium Artistic',
877
- speed: 'Medium',
878
- cost: '$0.06/image',
879
- quality: 'Outstanding',
880
- bestFor: 'Premium artistic images, highest quality',
881
- provider: 'Fal.ai'
882
- },
883
- 'flux-pro': {
884
- name: 'FLUX Pro v1.1 Ultra',
885
- nickname: 'Premium Artistic',
886
- speed: 'Medium',
887
- cost: '$0.06/image',
888
- quality: 'Outstanding',
889
- bestFor: 'Premium artistic images, highest quality',
890
- provider: 'Fal.ai'
891
- },
892
- 'flux-dev': {
893
- name: 'FLUX Dev',
894
- nickname: 'Developer Friendly',
895
- speed: 'Fast',
896
- cost: '$0.025/MP',
897
- quality: 'Excellent',
898
- bestFor: 'Developer workflows, rapid iteration',
899
- provider: 'Fal.ai'
900
- },
901
- veo3: {
902
- name: 'Veo 3',
903
- nickname: 'Cutting Edge Video',
904
- speed: 'Slow',
905
- cost: '$0.40/second',
906
- quality: 'Outstanding',
907
- bestFor: 'Premium video content, latest features',
908
- provider: 'Fal.ai'
909
- }
910
- };
911
-
912
- return info[service] || null;
1099
+ const info = {
1100
+ gemini: {
1101
+ name: "Gemini 3.0 Pro Image",
1102
+ nickname: "Nano Banana",
1103
+ speed: "Fast",
1104
+ cost: "$0.02/image",
1105
+ quality: "Good",
1106
+ bestFor: "Quick hero images, high volume",
1107
+ provider: "Google Gemini API (API Key)",
1108
+ model: GEMINI_IMAGE_MODEL,
1109
+ },
1110
+ imagen: {
1111
+ name: "Imagen 3",
1112
+ nickname: "Premium Quality",
1113
+ speed: "Medium",
1114
+ cost: "$0.03/image",
1115
+ quality: "Excellent",
1116
+ bestFor: "Premium hero images, photorealistic",
1117
+ provider: "Google Gemini API (API Key)",
1118
+ model: GEMINI_IMAGEN_MODEL,
1119
+ },
1120
+ // OpenAI GPT Image Models (SOTA)
1121
+ "gpt-image-1.5": {
1122
+ name: "GPT Image 1.5",
1123
+ nickname: "State-of-the-Art",
1124
+ speed: "Medium",
1125
+ cost: "$0.02-$0.19/image",
1126
+ quality: "Outstanding",
1127
+ bestFor: "Best text rendering, highest quality, transparent backgrounds",
1128
+ provider: "OpenAI",
1129
+ model: "gpt-image-1.5",
1130
+ features: [
1131
+ "Best-in-class text rendering",
1132
+ "Transparency support",
1133
+ "Multiple quality tiers",
1134
+ "WebP/JPEG/PNG output",
1135
+ ],
1136
+ qualityTiers: {
1137
+ low: "$0.02/image (~272 tokens)",
1138
+ medium: "$0.07/image (~1056 tokens)",
1139
+ high: "$0.19/image (~4160 tokens)",
1140
+ },
1141
+ },
1142
+ "gpt-image-1": {
1143
+ name: "GPT Image 1",
1144
+ nickname: "Premium Quality",
1145
+ speed: "Medium",
1146
+ cost: "$0.19/image",
1147
+ quality: "Outstanding",
1148
+ bestFor: "High quality images, text rendering",
1149
+ provider: "OpenAI",
1150
+ model: "gpt-image-1",
1151
+ features: [
1152
+ "Excellent text rendering",
1153
+ "Multiple sizes",
1154
+ "Transparency support",
1155
+ ],
1156
+ },
1157
+ "gpt-image-1-mini": {
1158
+ name: "GPT Image 1 Mini",
1159
+ nickname: "Cost-Effective",
1160
+ speed: "Fast",
1161
+ cost: "$0.02/image",
1162
+ quality: "Good",
1163
+ bestFor: "Quick images, budget-conscious, high volume",
1164
+ provider: "OpenAI",
1165
+ model: "gpt-image-1-mini",
1166
+ features: ["Fast generation", "Low cost", "Good quality"],
1167
+ },
1168
+ "flux2-pro": {
1169
+ name: "FLUX 2 Pro",
1170
+ nickname: "State-of-the-Art",
1171
+ speed: "Fast",
1172
+ cost: "$0.05/image",
1173
+ quality: "Outstanding",
1174
+ bestFor: "Best quality, fastest generation, lowest cost",
1175
+ provider: "Black Forest Labs (Fal.ai)",
1176
+ model: "flux-2/pro",
1177
+ features: [
1178
+ "Multi-reference (up to 10 images)",
1179
+ "Enhanced photorealism",
1180
+ "Complex typography",
1181
+ "UI mockups",
1182
+ ],
1183
+ },
1184
+ "flux2-flex": {
1185
+ name: "FLUX 2 Flex",
1186
+ nickname: "Developer Control",
1187
+ speed: "Medium",
1188
+ cost: "$0.04/image",
1189
+ quality: "Outstanding",
1190
+ bestFor: "Custom parameters, fine-tuned control",
1191
+ provider: "Black Forest Labs (Fal.ai)",
1192
+ model: "flux-2/flex",
1193
+ features: [
1194
+ "Custom inference steps",
1195
+ "Guidance scale control",
1196
+ "Developer-friendly",
1197
+ ],
1198
+ },
1199
+ "flux2-dev": {
1200
+ name: "FLUX 2 Dev",
1201
+ nickname: "Open-Weight",
1202
+ speed: "Fast",
1203
+ cost: "$0.025/image",
1204
+ quality: "Excellent",
1205
+ bestFor: "Developer workflows, local deployment option",
1206
+ provider: "Black Forest Labs (Fal.ai)",
1207
+ model: "flux-2/dev",
1208
+ features: [
1209
+ "32B parameters",
1210
+ "Open-weight model",
1211
+ "Local deployment available",
1212
+ ],
1213
+ },
1214
+ "flux-pro": {
1215
+ name: "FLUX Pro v1.1 Ultra",
1216
+ nickname: "Legacy Premium",
1217
+ speed: "Medium",
1218
+ cost: "$0.06/image",
1219
+ quality: "Outstanding",
1220
+ bestFor: "Premium artistic images (legacy)",
1221
+ provider: "Fal.ai",
1222
+ },
1223
+ "flux-dev": {
1224
+ name: "FLUX Dev",
1225
+ nickname: "Legacy Developer",
1226
+ speed: "Fast",
1227
+ cost: "$0.025/MP",
1228
+ quality: "Excellent",
1229
+ bestFor: "Developer workflows (legacy)",
1230
+ provider: "Fal.ai",
1231
+ },
1232
+ "nano-banana-pro": {
1233
+ name: "Nano Banana Pro",
1234
+ nickname: "Premium Quality",
1235
+ speed: "Fast",
1236
+ cost: "$0.15/image ($0.30 for 4K)",
1237
+ quality: "Excellent",
1238
+ bestFor: "High-quality branded content, detailed illustrations",
1239
+ provider: "Fal.ai",
1240
+ },
1241
+ veo3: {
1242
+ name: "Veo 3",
1243
+ nickname: "Cutting Edge Video",
1244
+ speed: "Slow",
1245
+ cost: "$0.40/second",
1246
+ quality: "Outstanding",
1247
+ bestFor: "Premium video content, latest features",
1248
+ provider: "Google (Fal.ai)",
1249
+ },
1250
+ };
1251
+
1252
+ return info[service] || null;
913
1253
  }