@weirdfingers/baseboards 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +4 -1
  2. package/dist/index.js +131 -11
  3. package/dist/index.js.map +1 -1
  4. package/package.json +1 -1
  5. package/templates/api/alembic/env.py +9 -1
  6. package/templates/api/alembic/versions/20250101_000000_initial_schema.py +107 -49
  7. package/templates/api/alembic/versions/20251022_174729_remove_provider_name_from_generations.py +7 -3
  8. package/templates/api/alembic/versions/20251023_165852_switch_to_declarative_base_and_mapping.py +57 -1
  9. package/templates/api/alembic/versions/20251202_000000_add_artifact_lineage.py +134 -0
  10. package/templates/api/alembic/versions/2025925_62735_add_seed_data_for_default_tenant.py +8 -5
  11. package/templates/api/config/generators.yaml +111 -0
  12. package/templates/api/src/boards/__init__.py +1 -1
  13. package/templates/api/src/boards/api/app.py +2 -1
  14. package/templates/api/src/boards/api/endpoints/tenant_registration.py +1 -1
  15. package/templates/api/src/boards/api/endpoints/uploads.py +150 -0
  16. package/templates/api/src/boards/auth/factory.py +1 -1
  17. package/templates/api/src/boards/dbmodels/__init__.py +8 -22
  18. package/templates/api/src/boards/generators/artifact_resolution.py +45 -12
  19. package/templates/api/src/boards/generators/implementations/fal/audio/__init__.py +16 -1
  20. package/templates/api/src/boards/generators/implementations/fal/audio/beatoven_music_generation.py +171 -0
  21. package/templates/api/src/boards/generators/implementations/fal/audio/beatoven_sound_effect_generation.py +167 -0
  22. package/templates/api/src/boards/generators/implementations/fal/audio/elevenlabs_sound_effects_v2.py +194 -0
  23. package/templates/api/src/boards/generators/implementations/fal/audio/elevenlabs_tts_eleven_v3.py +209 -0
  24. package/templates/api/src/boards/generators/implementations/fal/audio/fal_elevenlabs_tts_turbo_v2_5.py +206 -0
  25. package/templates/api/src/boards/generators/implementations/fal/audio/fal_minimax_speech_26_hd.py +237 -0
  26. package/templates/api/src/boards/generators/implementations/fal/audio/minimax_speech_2_6_turbo.py +1 -1
  27. package/templates/api/src/boards/generators/implementations/fal/image/__init__.py +30 -0
  28. package/templates/api/src/boards/generators/implementations/fal/image/clarity_upscaler.py +220 -0
  29. package/templates/api/src/boards/generators/implementations/fal/image/crystal_upscaler.py +173 -0
  30. package/templates/api/src/boards/generators/implementations/fal/image/fal_ideogram_character.py +227 -0
  31. package/templates/api/src/boards/generators/implementations/fal/image/flux_2.py +203 -0
  32. package/templates/api/src/boards/generators/implementations/fal/image/flux_2_edit.py +230 -0
  33. package/templates/api/src/boards/generators/implementations/fal/image/flux_2_pro.py +204 -0
  34. package/templates/api/src/boards/generators/implementations/fal/image/flux_2_pro_edit.py +221 -0
  35. package/templates/api/src/boards/generators/implementations/fal/image/gemini_25_flash_image.py +177 -0
  36. package/templates/api/src/boards/generators/implementations/fal/image/gpt_image_1_edit_image.py +182 -0
  37. package/templates/api/src/boards/generators/implementations/fal/image/gpt_image_1_mini.py +167 -0
  38. package/templates/api/src/boards/generators/implementations/fal/image/ideogram_character_edit.py +299 -0
  39. package/templates/api/src/boards/generators/implementations/fal/image/ideogram_v2.py +190 -0
  40. package/templates/api/src/boards/generators/implementations/fal/image/nano_banana_pro_edit.py +226 -0
  41. package/templates/api/src/boards/generators/implementations/fal/image/qwen_image.py +249 -0
  42. package/templates/api/src/boards/generators/implementations/fal/image/qwen_image_edit.py +244 -0
  43. package/templates/api/src/boards/generators/implementations/fal/video/__init__.py +42 -0
  44. package/templates/api/src/boards/generators/implementations/fal/video/bytedance_seedance_v1_pro_text_to_video.py +209 -0
  45. package/templates/api/src/boards/generators/implementations/fal/video/creatify_lipsync.py +161 -0
  46. package/templates/api/src/boards/generators/implementations/fal/video/fal_bytedance_seedance_v1_pro_image_to_video.py +222 -0
  47. package/templates/api/src/boards/generators/implementations/fal/video/fal_minimax_hailuo_02_standard_text_to_video.py +152 -0
  48. package/templates/api/src/boards/generators/implementations/fal/video/fal_pixverse_lipsync.py +197 -0
  49. package/templates/api/src/boards/generators/implementations/fal/video/fal_sora_2_text_to_video.py +173 -0
  50. package/templates/api/src/boards/generators/implementations/fal/video/infinitalk.py +221 -0
  51. package/templates/api/src/boards/generators/implementations/fal/video/kling_video_v2_5_turbo_pro_image_to_video.py +175 -0
  52. package/templates/api/src/boards/generators/implementations/fal/video/minimax_hailuo_2_3_pro_image_to_video.py +153 -0
  53. package/templates/api/src/boards/generators/implementations/fal/video/sora2_image_to_video.py +172 -0
  54. package/templates/api/src/boards/generators/implementations/fal/video/sora_2_image_to_video_pro.py +175 -0
  55. package/templates/api/src/boards/generators/implementations/fal/video/sora_2_text_to_video_pro.py +163 -0
  56. package/templates/api/src/boards/generators/implementations/fal/video/sync_lipsync_v2_pro.py +155 -0
  57. package/templates/api/src/boards/generators/implementations/fal/video/veed_lipsync.py +174 -0
  58. package/templates/api/src/boards/generators/implementations/fal/video/veo3.py +194 -0
  59. package/templates/api/src/boards/generators/implementations/fal/video/veo31_first_last_frame_to_video.py +1 -1
  60. package/templates/api/src/boards/generators/implementations/fal/video/wan_pro_image_to_video.py +158 -0
  61. package/templates/api/src/boards/graphql/access_control.py +1 -1
  62. package/templates/api/src/boards/graphql/mutations/root.py +16 -4
  63. package/templates/api/src/boards/graphql/resolvers/board.py +0 -2
  64. package/templates/api/src/boards/graphql/resolvers/generation.py +10 -233
  65. package/templates/api/src/boards/graphql/resolvers/lineage.py +381 -0
  66. package/templates/api/src/boards/graphql/resolvers/upload.py +463 -0
  67. package/templates/api/src/boards/graphql/types/generation.py +62 -26
  68. package/templates/api/src/boards/middleware.py +1 -1
  69. package/templates/api/src/boards/storage/factory.py +2 -2
  70. package/templates/api/src/boards/tenant_isolation.py +9 -9
  71. package/templates/api/src/boards/workers/actors.py +10 -1
  72. package/templates/web/package.json +1 -1
  73. package/templates/web/src/app/boards/[boardId]/page.tsx +14 -5
  74. package/templates/web/src/app/lineage/[generationId]/page.tsx +233 -0
  75. package/templates/web/src/components/boards/ArtifactPreview.tsx +20 -1
  76. package/templates/web/src/components/boards/UploadArtifact.tsx +253 -0
@@ -0,0 +1,209 @@
1
+ """
2
+ fal.ai ElevenLabs Text-to-Speech Eleven-V3 generator.
3
+
4
+ Generate high-quality speech from text using ElevenLabs' Eleven-V3 model,
5
+ offering natural-sounding voices with customizable parameters for stability,
6
+ similarity, style, and speed.
7
+
8
+ Based on Fal AI's fal-ai/elevenlabs/tts/eleven-v3 model.
9
+ See: https://fal.ai/models/fal-ai/elevenlabs/tts/eleven-v3
10
+ """
11
+
12
+ import os
13
+
14
+ from pydantic import BaseModel, Field
15
+
16
+ from ....base import BaseGenerator, GeneratorExecutionContext, GeneratorResult
17
+
18
+
19
+ class ElevenlabsTtsElevenV3Input(BaseModel):
20
+ """Input schema for ElevenLabs TTS Eleven-V3 generation.
21
+
22
+ The text is converted to speech using advanced AI voice synthesis with
23
+ customizable voice characteristics and optional word-level timestamps.
24
+ """
25
+
26
+ text: str = Field(
27
+ description="The text to convert to speech",
28
+ min_length=1,
29
+ )
30
+ voice: str = Field(
31
+ default="Rachel",
32
+ description=(
33
+ "Voice selection. Available voices: "
34
+ "Aria, Roger, Sarah, Laura, Charlie, George, Callum, River, Liam, "
35
+ "Charlotte, Alice, Matilda, Will, Jessica, Eric, Chris, Brian, "
36
+ "Daniel, Lily, Bill, Rachel"
37
+ ),
38
+ )
39
+ stability: float = Field(
40
+ default=0.5,
41
+ ge=0.0,
42
+ le=1.0,
43
+ description="Voice stability. Higher values result in more consistent output",
44
+ )
45
+ similarity_boost: float = Field(
46
+ default=0.75,
47
+ ge=0.0,
48
+ le=1.0,
49
+ description="Similarity boost for voice matching",
50
+ )
51
+ style: float = Field(
52
+ default=0.0,
53
+ ge=0.0,
54
+ le=1.0,
55
+ description="Style exaggeration. Higher values add more expressiveness",
56
+ )
57
+ speed: float = Field(
58
+ default=1.0,
59
+ ge=0.7,
60
+ le=1.2,
61
+ description="Speech rate adjustment. 1.0 is normal speed",
62
+ )
63
+ timestamps: bool = Field(
64
+ default=False,
65
+ description="Whether to return timestamps for each word",
66
+ )
67
+ previous_text: str | None = Field(
68
+ default=None,
69
+ description="Context from prior content for improved continuity",
70
+ )
71
+ next_text: str | None = Field(
72
+ default=None,
73
+ description="Context for upcoming content for improved continuity",
74
+ )
75
+ language_code: str | None = Field(
76
+ default=None,
77
+ description="ISO 639-1 language code (limited model support)",
78
+ )
79
+
80
+
81
+ class FalElevenlabsTtsElevenV3Generator(BaseGenerator):
82
+ """ElevenLabs Text-to-Speech Eleven-V3 generator using fal.ai."""
83
+
84
+ name = "fal-elevenlabs-tts-eleven-v3"
85
+ artifact_type = "audio"
86
+ description = (
87
+ "Fal: ElevenLabs TTS Eleven-V3 - "
88
+ "High-quality text-to-speech with natural-sounding voices and customizable parameters"
89
+ )
90
+
91
+ def get_input_schema(self) -> type[ElevenlabsTtsElevenV3Input]:
92
+ return ElevenlabsTtsElevenV3Input
93
+
94
+ async def generate(
95
+ self, inputs: ElevenlabsTtsElevenV3Input, context: GeneratorExecutionContext
96
+ ) -> GeneratorResult:
97
+ """Generate audio using fal.ai ElevenLabs TTS Eleven-V3 model."""
98
+ # Check for API key (fal-client uses FAL_KEY environment variable)
99
+ if not os.getenv("FAL_KEY"):
100
+ raise ValueError("API configuration invalid. Missing FAL_KEY environment variable")
101
+
102
+ # Import fal_client
103
+ try:
104
+ import fal_client
105
+ except ImportError as e:
106
+ raise ImportError(
107
+ "fal.ai SDK is required for FalElevenlabsTtsElevenV3Generator. "
108
+ "Install with: pip install weirdfingers-boards[generators-fal]"
109
+ ) from e
110
+
111
+ # Prepare arguments for fal.ai API
112
+ arguments = {
113
+ "text": inputs.text,
114
+ "voice": inputs.voice,
115
+ "stability": inputs.stability,
116
+ "similarity_boost": inputs.similarity_boost,
117
+ "style": inputs.style,
118
+ "speed": inputs.speed,
119
+ "timestamps": inputs.timestamps,
120
+ }
121
+
122
+ # Add optional context parameters if provided
123
+ if inputs.previous_text is not None:
124
+ arguments["previous_text"] = inputs.previous_text
125
+ if inputs.next_text is not None:
126
+ arguments["next_text"] = inputs.next_text
127
+ if inputs.language_code is not None:
128
+ arguments["language_code"] = inputs.language_code
129
+
130
+ # Submit async job and get handler
131
+ handler = await fal_client.submit_async(
132
+ "fal-ai/elevenlabs/tts/eleven-v3",
133
+ arguments=arguments,
134
+ )
135
+
136
+ # Store the external job ID for tracking
137
+ await context.set_external_job_id(handler.request_id)
138
+
139
+ # Stream progress updates (sample every 3rd event to avoid spam)
140
+ from .....progress.models import ProgressUpdate
141
+
142
+ event_count = 0
143
+ async for event in handler.iter_events(with_logs=True):
144
+ event_count += 1
145
+
146
+ # Process every 3rd event to provide feedback without overwhelming
147
+ if event_count % 3 == 0:
148
+ # Extract logs if available
149
+ logs = getattr(event, "logs", None)
150
+ if logs:
151
+ # Join log entries into a single message
152
+ if isinstance(logs, list):
153
+ message = " | ".join(str(log) for log in logs if log)
154
+ else:
155
+ message = str(logs)
156
+
157
+ if message:
158
+ await context.publish_progress(
159
+ ProgressUpdate(
160
+ job_id=handler.request_id,
161
+ status="processing",
162
+ progress=50.0, # Approximate mid-point progress
163
+ phase="processing",
164
+ message=message,
165
+ )
166
+ )
167
+
168
+ # Get final result
169
+ result = await handler.get()
170
+
171
+ # Extract audio URL from result
172
+ # fal.ai returns: {"audio": {"url": "...", "content_type": "...", ...}}
173
+ audio_data = result.get("audio")
174
+ if audio_data is None:
175
+ raise ValueError("No audio data returned from fal.ai API")
176
+
177
+ audio_url = audio_data.get("url")
178
+ if not audio_url:
179
+ raise ValueError("Audio URL missing in fal.ai response")
180
+
181
+ # Determine format from content_type or default to mp3
182
+ content_type = audio_data.get("content_type", "audio/mpeg")
183
+ format_map = {
184
+ "audio/mpeg": "mp3",
185
+ "audio/mp3": "mp3",
186
+ "audio/wav": "wav",
187
+ "audio/ogg": "ogg",
188
+ }
189
+ audio_format = format_map.get(content_type, "mp3")
190
+
191
+ # Store audio result
192
+ artifact = await context.store_audio_result(
193
+ storage_url=audio_url,
194
+ format=audio_format,
195
+ output_index=0,
196
+ )
197
+
198
+ return GeneratorResult(outputs=[artifact])
199
+
200
+ async def estimate_cost(self, inputs: ElevenlabsTtsElevenV3Input) -> float:
201
+ """Estimate cost for ElevenLabs TTS Eleven-V3 generation.
202
+
203
+ ElevenLabs TTS Eleven-V3 costs $0.10 per 1000 characters.
204
+ """
205
+ # Calculate character count
206
+ char_count = len(inputs.text)
207
+
208
+ # Cost is $0.10 per 1000 characters
209
+ return (char_count / 1000.0) * 0.10
@@ -0,0 +1,206 @@
1
+ """
2
+ fal.ai ElevenLabs TTS Turbo v2.5 text-to-speech generator.
3
+
4
+ Generate high-speed text-to-speech audio using ElevenLabs TTS Turbo v2.5.
5
+ Converts written text into spoken audio with customizable voice, speed, and prosody parameters.
6
+
7
+ Based on Fal AI's fal-ai/elevenlabs/tts/turbo-v2.5 model.
8
+ See: https://fal.ai/models/fal-ai/elevenlabs/tts/turbo-v2.5
9
+ """
10
+
11
+ import os
12
+
13
+ from pydantic import BaseModel, Field
14
+
15
+ from ....base import BaseGenerator, GeneratorExecutionContext, GeneratorResult
16
+
17
+
18
+ class FalElevenlabsTtsTurboV25Input(BaseModel):
19
+ """Input schema for ElevenLabs TTS Turbo v2.5 generation.
20
+
21
+ Artifact fields are automatically detected via type introspection
22
+ and resolved from generation IDs to artifact objects.
23
+ """
24
+
25
+ text: str = Field(
26
+ description="The text to convert to speech",
27
+ min_length=1,
28
+ )
29
+
30
+ voice: str = Field(
31
+ default="Rachel",
32
+ description=(
33
+ "Voice selection from predefined options (Aria, Roger, Sarah, Laura, Rachel, etc.)"
34
+ ),
35
+ )
36
+
37
+ stability: float = Field(
38
+ default=0.5,
39
+ ge=0.0,
40
+ le=1.0,
41
+ description="Voice stability (0-1)",
42
+ )
43
+
44
+ similarity_boost: float = Field(
45
+ default=0.75,
46
+ ge=0.0,
47
+ le=1.0,
48
+ description="Voice similarity control (0-1)",
49
+ )
50
+
51
+ style: float = Field(
52
+ default=0.0,
53
+ ge=0.0,
54
+ le=1.0,
55
+ description="Style exaggeration (0-1)",
56
+ )
57
+
58
+ speed: float = Field(
59
+ default=1.0,
60
+ ge=0.7,
61
+ le=1.2,
62
+ description="Speech tempo adjustment (0.7-1.2x)",
63
+ )
64
+
65
+ timestamps: bool = Field(
66
+ default=False,
67
+ description="Include word-level timing data in output",
68
+ )
69
+
70
+ language_code: str | None = Field(
71
+ default=None,
72
+ description="ISO 639-1 language code for language enforcement (Turbo v2.5 only)",
73
+ )
74
+
75
+ previous_text: str | None = Field(
76
+ default=None,
77
+ description="Prior context for speech continuity when concatenating generations",
78
+ )
79
+
80
+ next_text: str | None = Field(
81
+ default=None,
82
+ description="Subsequent context for speech continuity when concatenating generations",
83
+ )
84
+
85
+
86
+ class FalElevenlabsTtsTurboV25Generator(BaseGenerator):
87
+ """Generator for high-speed text-to-speech using ElevenLabs TTS Turbo v2.5."""
88
+
89
+ name = "fal-elevenlabs-tts-turbo-v2-5"
90
+ description = (
91
+ "Fal: ElevenLabs TTS Turbo v2.5 - "
92
+ "High-speed text-to-speech with customizable voices and prosody"
93
+ )
94
+ artifact_type = "audio"
95
+
96
+ def get_input_schema(self) -> type[FalElevenlabsTtsTurboV25Input]:
97
+ """Return the input schema for this generator."""
98
+ return FalElevenlabsTtsTurboV25Input
99
+
100
+ async def generate(
101
+ self, inputs: FalElevenlabsTtsTurboV25Input, context: GeneratorExecutionContext
102
+ ) -> GeneratorResult:
103
+ """Generate audio using fal.ai ElevenLabs TTS Turbo v2.5."""
104
+ # Check for API key
105
+ if not os.getenv("FAL_KEY"):
106
+ raise ValueError("API configuration invalid. Missing FAL_KEY environment variable")
107
+
108
+ # Import fal_client
109
+ try:
110
+ import fal_client
111
+ except ImportError as e:
112
+ raise ImportError(
113
+ "fal.ai SDK is required for FalElevenlabsTtsTurboV25Generator. "
114
+ "Install with: pip install weirdfingers-boards[generators-fal]"
115
+ ) from e
116
+
117
+ # Prepare arguments for fal.ai API
118
+ arguments = {
119
+ "text": inputs.text,
120
+ "voice": inputs.voice,
121
+ "stability": inputs.stability,
122
+ "similarity_boost": inputs.similarity_boost,
123
+ "style": inputs.style,
124
+ "speed": inputs.speed,
125
+ "timestamps": inputs.timestamps,
126
+ }
127
+
128
+ # Add optional fields only if provided
129
+ if inputs.language_code is not None:
130
+ arguments["language_code"] = inputs.language_code
131
+ if inputs.previous_text is not None:
132
+ arguments["previous_text"] = inputs.previous_text
133
+ if inputs.next_text is not None:
134
+ arguments["next_text"] = inputs.next_text
135
+
136
+ # Submit async job
137
+ handler = await fal_client.submit_async(
138
+ "fal-ai/elevenlabs/tts/turbo-v2.5",
139
+ arguments=arguments,
140
+ )
141
+
142
+ # Store external job ID
143
+ await context.set_external_job_id(handler.request_id)
144
+
145
+ # Stream progress updates
146
+ from .....progress.models import ProgressUpdate
147
+
148
+ event_count = 0
149
+ async for event in handler.iter_events(with_logs=True):
150
+ event_count += 1
151
+ # Sample every 3rd event to avoid spam
152
+ if event_count % 3 == 0:
153
+ # Extract logs if available
154
+ logs = getattr(event, "logs", None)
155
+ if logs:
156
+ # Join log entries into a single message
157
+ if isinstance(logs, list):
158
+ message = " | ".join(str(log) for log in logs if log)
159
+ else:
160
+ message = str(logs)
161
+
162
+ if message:
163
+ await context.publish_progress(
164
+ ProgressUpdate(
165
+ job_id=handler.request_id,
166
+ status="processing",
167
+ progress=50.0,
168
+ phase="processing",
169
+ message=message,
170
+ )
171
+ )
172
+
173
+ # Get final result
174
+ result = await handler.get()
175
+
176
+ # Extract audio URL from result
177
+ # fal.ai returns: {"audio": {"url": "...", "content_type": "...", ...}}
178
+ audio_data = result.get("audio")
179
+ if audio_data is None:
180
+ raise ValueError("No audio data returned from fal.ai API")
181
+
182
+ audio_url = audio_data.get("url")
183
+ if not audio_url:
184
+ raise ValueError("Audio URL missing in fal.ai response")
185
+
186
+ # Store audio result
187
+ artifact = await context.store_audio_result(
188
+ storage_url=audio_url,
189
+ format="mp3", # ElevenLabs TTS returns MP3 format
190
+ output_index=0,
191
+ )
192
+
193
+ return GeneratorResult(outputs=[artifact])
194
+
195
+ async def estimate_cost(self, inputs: FalElevenlabsTtsTurboV25Input) -> float:
196
+ """Estimate cost for ElevenLabs TTS Turbo v2.5 generation.
197
+
198
+ ElevenLabs TTS Turbo v2.5 pricing is typically based on character count.
199
+ Using a conservative estimate of $0.001 per character for turbo models.
200
+ """
201
+ # Calculate character count
202
+ char_count = len(inputs.text)
203
+
204
+ # Estimated cost: $0.001 per character (adjust based on actual pricing)
205
+ # This is a placeholder - actual pricing should be verified
206
+ return char_count * 0.001
@@ -0,0 +1,237 @@
1
+ """
2
+ Text-to-speech generation using Minimax Speech 2.6-HD.
3
+
4
+ Based on Fal AI's fal-ai/minimax/speech-2.6-hd model.
5
+ See: https://fal.ai/models/fal-ai/minimax/speech-2.6-hd
6
+ """
7
+
8
+ import os
9
+ from typing import Literal
10
+
11
+ from pydantic import BaseModel, Field
12
+
13
+ from ....base import BaseGenerator, GeneratorExecutionContext, GeneratorResult
14
+
15
+
16
+ class VoiceSetting(BaseModel):
17
+ """Voice settings for speech synthesis."""
18
+
19
+ voice_id: str = Field(
20
+ default="Wise_Woman",
21
+ description="Voice ID from predefined voices (e.g., Wise_Woman, Young_Man, etc.)",
22
+ )
23
+ speed: float = Field(
24
+ default=1.0,
25
+ ge=0.5,
26
+ le=2.0,
27
+ description="Speech speed multiplier (0.5-2.0)",
28
+ )
29
+ vol: float = Field(
30
+ default=1.0,
31
+ ge=0.01,
32
+ le=10.0,
33
+ description="Volume level (0.01-10)",
34
+ )
35
+ pitch: int = Field(
36
+ default=0,
37
+ ge=-12,
38
+ le=12,
39
+ description="Pitch adjustment in semitones (-12 to 12)",
40
+ )
41
+ emotion: str | None = Field(
42
+ default=None,
43
+ description=(
44
+ "Emotion for speech (happy, sad, angry, fearful, disgusted, surprised, neutral)"
45
+ ),
46
+ )
47
+ english_normalization: bool = Field(
48
+ default=False,
49
+ description="Enable English text normalization",
50
+ )
51
+
52
+
53
+ class AudioSetting(BaseModel):
54
+ """Audio output settings."""
55
+
56
+ format: Literal["mp3", "pcm", "flac"] = Field(
57
+ default="mp3",
58
+ description="Output audio format",
59
+ )
60
+ sample_rate: Literal[8000, 16000, 22050, 24000, 32000, 44100] = Field(
61
+ default=32000,
62
+ description="Sample rate in Hz",
63
+ )
64
+ channel: Literal[1, 2] = Field(
65
+ default=1,
66
+ description="Number of audio channels (1=mono, 2=stereo)",
67
+ )
68
+ bitrate: Literal[32000, 64000, 128000, 256000] = Field(
69
+ default=128000,
70
+ description="Audio bitrate in bits per second",
71
+ )
72
+
73
+
74
+ class NormalizationSetting(BaseModel):
75
+ """Audio normalization settings."""
76
+
77
+ enabled: bool = Field(
78
+ default=True,
79
+ description="Enable audio normalization",
80
+ )
81
+ target_loudness: float = Field(
82
+ default=-18.0,
83
+ ge=-70.0,
84
+ le=-10.0,
85
+ description="Target loudness in LUFS (-70 to -10)",
86
+ )
87
+ target_range: float = Field(
88
+ default=8.0,
89
+ ge=0.0,
90
+ le=20.0,
91
+ description="Target loudness range in LU (0-20)",
92
+ )
93
+ target_peak: float = Field(
94
+ default=-0.5,
95
+ ge=-3.0,
96
+ le=0.0,
97
+ description="Target peak level in dBTP (-3 to 0)",
98
+ )
99
+
100
+
101
+ class FalMinimaxSpeech26HdInput(BaseModel):
102
+ """Input schema for Fal Minimax Speech 2.6-HD generator."""
103
+
104
+ prompt: str = Field(
105
+ description=(
106
+ "Text to convert to speech. Paragraph breaks should be marked with newline characters."
107
+ ),
108
+ min_length=1,
109
+ max_length=10000,
110
+ )
111
+ language_boost: str | None = Field(
112
+ default=None,
113
+ description=(
114
+ "Language boost option. Supports: Chinese, English, Arabic, Russian, Spanish, "
115
+ "French, Portuguese, German, Turkish, Dutch, Ukrainian, Vietnamese, Indonesian, "
116
+ "Japanese, Italian, Korean, Thai, Polish, Romanian, Greek, Czech, Finnish, Hindi, "
117
+ "Bulgarian, Danish, Hebrew, Malay, Slovak, Swedish, Croatian, Hungarian, "
118
+ "Norwegian, Slovenian, Catalan, Nynorsk, Afrikaans"
119
+ ),
120
+ )
121
+ output_format: Literal["hex", "url"] = Field(
122
+ default="url",
123
+ description=(
124
+ "Output format for audio data (url returns a downloadable link, hex returns raw data)"
125
+ ),
126
+ )
127
+ voice_setting: VoiceSetting = Field(
128
+ default_factory=VoiceSetting,
129
+ description="Voice customization settings",
130
+ )
131
+ audio_setting: AudioSetting = Field(
132
+ default_factory=AudioSetting,
133
+ description="Audio output format settings",
134
+ )
135
+ normalization_setting: NormalizationSetting = Field(
136
+ default_factory=NormalizationSetting,
137
+ description="Audio normalization settings",
138
+ )
139
+
140
+
141
+ class FalMinimaxSpeech26HdGenerator(BaseGenerator):
142
+ """Generator for text-to-speech using Minimax Speech 2.6-HD."""
143
+
144
+ name = "fal-minimax-speech-26-hd"
145
+ description = (
146
+ "High-quality text-to-speech generation with extensive voice customization options"
147
+ )
148
+ artifact_type = "audio"
149
+
150
+ def get_input_schema(self) -> type[FalMinimaxSpeech26HdInput]:
151
+ """Return the input schema for this generator."""
152
+ return FalMinimaxSpeech26HdInput
153
+
154
+ async def generate(
155
+ self, inputs: FalMinimaxSpeech26HdInput, context: GeneratorExecutionContext
156
+ ) -> GeneratorResult:
157
+ """Generate audio using fal.ai minimax/speech-2.6-hd."""
158
+ # Check for API key
159
+ if not os.getenv("FAL_KEY"):
160
+ raise ValueError("API configuration invalid. Missing FAL_KEY environment variable")
161
+
162
+ # Import fal_client
163
+ try:
164
+ import fal_client
165
+ except ImportError as e:
166
+ raise ImportError(
167
+ "fal.ai SDK is required for FalMinimaxSpeech26HdGenerator. "
168
+ "Install with: pip install weirdfingers-boards[generators-fal]"
169
+ ) from e
170
+
171
+ # Prepare arguments for fal.ai API
172
+ arguments: dict = {
173
+ "prompt": inputs.prompt,
174
+ "output_format": inputs.output_format,
175
+ "voice_setting": inputs.voice_setting.model_dump(exclude_none=True),
176
+ "audio_setting": inputs.audio_setting.model_dump(),
177
+ "normalization_setting": inputs.normalization_setting.model_dump(),
178
+ }
179
+
180
+ # Only add language_boost if specified
181
+ if inputs.language_boost:
182
+ arguments["language_boost"] = inputs.language_boost
183
+
184
+ # Submit async job
185
+ handler = await fal_client.submit_async(
186
+ "fal-ai/minimax/speech-2.6-hd",
187
+ arguments=arguments,
188
+ )
189
+
190
+ # Store external job ID
191
+ await context.set_external_job_id(handler.request_id)
192
+
193
+ # Stream progress updates
194
+ from .....progress.models import ProgressUpdate
195
+
196
+ event_count = 0
197
+ async for _event in handler.iter_events(with_logs=True):
198
+ event_count += 1
199
+ # Sample every 3rd event to avoid spam
200
+ if event_count % 3 == 0:
201
+ await context.publish_progress(
202
+ ProgressUpdate(
203
+ job_id=handler.request_id,
204
+ status="processing",
205
+ progress=50.0,
206
+ phase="processing",
207
+ )
208
+ )
209
+
210
+ # Get final result
211
+ result = await handler.get()
212
+
213
+ # Extract audio output
214
+ audio_data = result.get("audio")
215
+ if audio_data is None:
216
+ raise ValueError("No audio data returned from API")
217
+
218
+ if not isinstance(audio_data, dict):
219
+ raise ValueError(f"Unexpected audio data type: {type(audio_data)}")
220
+
221
+ audio_url = audio_data.get("url")
222
+ if not audio_url:
223
+ raise ValueError("Audio URL missing")
224
+
225
+ artifact = await context.store_audio_result(
226
+ storage_url=audio_url,
227
+ format=inputs.audio_setting.format,
228
+ output_index=0,
229
+ )
230
+
231
+ return GeneratorResult(outputs=[artifact])
232
+
233
+ async def estimate_cost(self, inputs: FalMinimaxSpeech26HdInput) -> float:
234
+ """Estimate cost for this generation in USD."""
235
+ # Minimax Speech 2.6-HD pricing (estimated at $0.015 per generation)
236
+ # This is a reasonable estimate for TTS models
237
+ return 0.015
@@ -73,7 +73,7 @@ class MinimaxSpeech26TurboInput(BaseModel):
73
73
 
74
74
  prompt: str = Field(
75
75
  description=(
76
- "Text to convert to speech " "(supports pause markers <#x#> with 0.01-99.99 seconds)"
76
+ "Text to convert to speech (supports pause markers <#x#> with 0.01-99.99 seconds)"
77
77
  ),
78
78
  min_length=1,
79
79
  max_length=10000,