@alien-lobster-buffet/tts-conductor-fal 0.2.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Cole Reed
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,106 @@
1
+ # @alien-lobster-buffet/tts-conductor-fal
2
+
3
+ [fal.ai](https://fal.ai) provider bindings for the
4
+ [TTS Conductor](https://github.com/ichabodcole/tts-conductor) ecosystem.
5
+
6
+ fal.ai is a **gateway fronting many TTS engines**, each with its own input and
7
+ voice schema. This package models that as a single `fal` provider id that you
8
+ parameterize with a `model` at construction — so the marketplace lives at
9
+ construction time while each provider _instance_ is still one engine, fully
10
+ honoring the conductor's `TtsProvider` contract (one `caps`, one `generate`, one
11
+ optional `voiceCatalog`).
12
+
13
+ ## Install
14
+
15
+ ```sh
16
+ npm install @alien-lobster-buffet/tts-conductor-core @alien-lobster-buffet/tts-conductor-fal
17
+ ```
18
+
19
+ Requires a fal API key (`FAL_KEY`).
20
+
21
+ ## Quickstart
22
+
23
+ ```ts
24
+ import { createTtsConductor } from "@alien-lobster-buffet/tts-conductor-core";
25
+ import { falProviderFactory } from "@alien-lobster-buffet/tts-conductor-fal";
26
+
27
+ const conductor = createTtsConductor({ pauseTable: {}, maxPauseSeconds: 30 });
28
+ conductor.registerProvider(falProviderFactory);
29
+
30
+ const provider = conductor.createProvider("fal", {
31
+ apiKey: process.env.FAL_KEY!,
32
+ model: "fal-ai/minimax/speech-02-hd",
33
+ voice: { kind: "preset", id: "Wise_Woman" },
34
+ });
35
+
36
+ const { audio, duration, providerMeta } = await conductor.generateFull(
37
+ "Hello there. [PAUSE:2s] Welcome.",
38
+ provider,
39
+ );
40
+ ```
41
+
42
+ ## Starter models
43
+
44
+ | `model` | text key | voice mechanism | duration |
45
+ | ---------------------------------- | -------- | -------------------------------------------------------------------------------- | ---------------------- |
46
+ | `fal-ai/minimax/speech-02-hd` | `text` | `{ kind: 'preset', id }` → `voice_setting` object | native (`duration_ms`) |
47
+ | `fal-ai/gemini-3.1-flash-tts` | `prompt` | `{ kind: 'preset', id }` (30-voice enum) or `{ kind: 'multiSpeaker', speakers }` | ffprobe |
48
+ | `fal-ai/chatterbox/text-to-speech` | `text` | `{ kind: 'clone', audioUrl }` (no preset voices) | ffprobe |
49
+ | `fal-ai/elevenlabs/tts/turbo-v2.5` | `text` | `{ kind: 'preset', id }` | ffprobe |
50
+
51
+ Switch model and voice at construction, or vary them per call:
52
+
53
+ ```ts
54
+ await provider.generate(chunk, {
55
+ overrides: {
56
+ voice: { kind: "preset", id: "Aria" },
57
+ params: { stability: 0.3 },
58
+ },
59
+ });
60
+ ```
61
+
62
+ `params` carries the model's scalar knobs (e.g. minimax `voiceSetting`, gemini
63
+ `styleInstructions` / `temperature`, chatterbox `exaggeration` / `cfg`, 11labs
64
+ `stability` / `similarity_boost` / `speed`). Each descriptor forwards only the
65
+ keys it recognizes.
66
+
67
+ ## Voice discovery
68
+
69
+ Only `fal-ai/gemini-3.1-flash-tts` exposes a `voiceCatalog` — its 30 preset
70
+ voices are the one schema-enumerable set. minimax and elevenlabs-on-fal take a
71
+ voice id as a free string (pass a known id), and chatterbox clones from
72
+ `audio_url`; all three leave `voiceCatalog` undefined.
73
+
74
+ ```ts
75
+ if (provider.voiceCatalog) {
76
+ const voices = await provider.voiceCatalog.listVoices({ search: "kore" });
77
+ }
78
+ ```
79
+
80
+ > Gemini voices carry no per-voice language metadata (the model is multilingual
81
+ > via the `languageCode` param), so `listVoices({ language })` filters them all
82
+ > out. Filter by `search` instead.
83
+
84
+ ## Cost reconciliation
85
+
86
+ fal bills asynchronously. Each chunk's fal `request_id` is surfaced on the
87
+ generation result via core's generic `providerMeta` channel — collect them from
88
+ the final result and reconcile against fal's billing-events API:
89
+
90
+ ```ts
91
+ const result = await conductor.generateFull(text, provider);
92
+ const requestIds =
93
+ result.providerMeta?.map((m) => m?.request_id).filter(Boolean) ?? [];
94
+ ```
95
+
96
+ ## Notes
97
+
98
+ - **Multi-speaker (gemini) is single-chunk / short-form for now.** fal needs the
99
+ full `speakers[]` on every request and the prompt's `Alias:` line-prefixes to
100
+ survive chunking; long multi-speaker scripts aren't supported yet.
101
+ - fal engines don't render SSML breaks — inter-chunk pauses become stitched
102
+ silence segments via the core orchestrator.
103
+
104
+ ## License
105
+
106
+ MIT
@@ -0,0 +1,188 @@
1
+ import { ProviderCapabilities, TtsProviderFactory, VoiceCatalog, VoiceCatalogEntry } from "@alien-lobster-buffet/tts-conductor-core";
2
+
3
+ //#region src/types.d.ts
4
+ /**
5
+ * fal endpoint ids supported by this provider. Extended as models are added;
6
+ * each id must have a matching entry in the descriptor registry.
7
+ */
8
+ type FalModelId = 'fal-ai/minimax/speech-02-hd' | 'fal-ai/gemini-3.1-flash-tts' | 'fal-ai/chatterbox/text-to-speech' | 'fal-ai/elevenlabs/tts/turbo-v2.5';
9
+ /** One speaker in a multi-speaker request (gemini `speakers[]`). */
10
+ interface FalSpeaker {
11
+ /** Alias used as a line prefix in the prompt (e.g. "Host:"). → SpeakerConfig.speaker_id */
12
+ speakerId: string;
13
+ /** A preset voice name. → SpeakerConfig.voice */
14
+ voiceId: string;
15
+ }
16
+ /**
17
+ * Polymorphic voice selection spanning fal's mechanisms. Object-voice (minimax
18
+ * `voice_setting`) is not a separate kind — it's a `preset` id that the
19
+ * descriptor nests into an object alongside its scalar params.
20
+ */
21
+ type FalVoiceSelection = {
22
+ kind: 'preset';
23
+ id: string;
24
+ } | {
25
+ kind: 'multiSpeaker';
26
+ speakers: FalSpeaker[];
27
+ } | {
28
+ kind: 'clone';
29
+ audioUrl: string;
30
+ };
31
+ /**
32
+ * Normalized, model-agnostic input the provider assembles per call before a
33
+ * descriptor maps it onto a specific model's wire schema.
34
+ */
35
+ interface CanonicalTtsInput {
36
+ /** Plain text for this chunk — already stripped of core's `<speak>` wrapper. */
37
+ text: string;
38
+ /** Resolved voice selection (per-call override ?? construction default). */
39
+ voice?: FalVoiceSelection;
40
+ /**
41
+ * Resolved model-specific scalar knobs (speed, temperature, stability, …).
42
+ * Opaque at this layer; each descriptor whitelists the keys it forwards, so a
43
+ * typo'd param is a silent no-op rather than a crash.
44
+ */
45
+ params?: Record<string, unknown>;
46
+ }
47
+ /**
48
+ * Where a model's audio lives in its fal response: a URL to fetch plus an
49
+ * optional mime type. All four starter models return `audio` as a fal `File`
50
+ * (`{ url, content_type? }`), so the provider's shared, abort-aware fetch turns
51
+ * this into bytes — descriptors only need to *locate* it.
52
+ */
53
+ interface FalAudioLocation {
54
+ url: string;
55
+ mimeType?: string;
56
+ }
57
+ /**
58
+ * Per-model adapter. The marketplace nature of fal is localized here: one
59
+ * descriptor per `endpointId`, all sharing the provider's `fal.subscribe` /
60
+ * fetch / abort plumbing.
61
+ */
62
+ interface FalModelDescriptor {
63
+ endpointId: FalModelId;
64
+ /**
65
+ * Per-model capabilities (incl. `maxCharsPerRequest`). fal engines don't
66
+ * render SSML breaks, so `maxInlineBreakSeconds` is always `null` — long
67
+ * pauses become stitched silence segments via the core orchestrator.
68
+ */
69
+ caps: ProviderCapabilities;
70
+ /** Map canonical input → this model's fal wire input (text key, voice encoding, defaults). */
71
+ buildInput(input: CanonicalTtsInput): Record<string, unknown>;
72
+ /** Locate the audio URL (+ mime) in this model's fal response. */
73
+ extractAudio(data: unknown): FalAudioLocation;
74
+ /**
75
+ * Pull duration (seconds) from the response when the model returns it. Only
76
+ * minimax does (`duration_ms`); the others omit this and let core's ffprobe
77
+ * derive duration.
78
+ */
79
+ extractDuration?(data: unknown): number | undefined;
80
+ /** Enumerable speaker list, for models that expose one. */
81
+ voiceCatalog?: VoiceCatalog;
82
+ }
83
+ //#endregion
84
+ //#region src/descriptors/chatterbox.d.ts
85
+ declare const chatterboxTextToSpeech: FalModelDescriptor;
86
+ //#endregion
87
+ //#region src/descriptors/elevenlabs.d.ts
88
+ /**
89
+ * `fal-ai/elevenlabs/tts/turbo-v2.5` — the flat model: `text` key, `voice` id
90
+ * (default Rachel), and 11labs scalar knobs (`stability` / `similarity_boost` /
91
+ * `style` / `speed` / `previous_text` / `next_text` / `language_code` /
92
+ * `apply_text_normalization`) forwarded via `params`. `voice` is a free string
93
+ * with documented examples, not a schema enum, so no `voiceCatalog`. Returns
94
+ * `.mp3`; no duration field → core ffprobe fallback.
95
+ */
96
+ declare const elevenlabsTurboV25: FalModelDescriptor;
97
+ //#endregion
98
+ //#region src/descriptors/flatBuildInput.d.ts
99
+ interface FlatBuildInputOptions {
100
+ /** Wire key the chunk text maps to (`text` for most, `prompt` for gemini). */
101
+ textKey: 'text' | 'prompt';
102
+ /** Wire key a `preset` voice id maps to. */
103
+ voiceKey: string;
104
+ /** Static input merged in first (e.g. an output_format or default voice). */
105
+ defaults?: Record<string, unknown>;
106
+ }
107
+ /**
108
+ * Produces a `buildInput` for flat models — text at `textKey`, a `preset` voice
109
+ * id at `voiceKey`, scalar `params` merged on top. Keeps one-line ergonomics for
110
+ * structurally-simple models without a separate declarative code path; models
111
+ * with object/multi-speaker/clone voice encoding hand-write `buildInput` instead.
112
+ */
113
+ declare function flatBuildInput(opts: FlatBuildInputOptions): (input: CanonicalTtsInput) => Record<string, unknown>;
114
+ //#endregion
115
+ //#region src/descriptors/gemini.d.ts
116
+ /**
117
+ * `fal-ai/gemini-3.1-flash-tts` — uses the `prompt` key (not `text`).
118
+ * Single-speaker uses the `voice` enum (default Kore); multi-speaker maps the
119
+ * canonical `speakers` to fal `SpeakerConfig[]` (`{ speaker_id, voice }`) and
120
+ * omits `voice`. `style_instructions` / `language_code` / `temperature` ride
121
+ * `params`. No duration field → core ffprobe fallback.
122
+ *
123
+ * **Multi-speaker + chunking caveat:** fal expects every request to carry the
124
+ * full `speakers[]` AND the prompt to keep its `Alias:` line-prefixes. A naive
125
+ * length-chunker can split mid-turn, so long multi-speaker scripts are not yet
126
+ * supported — treat multi-speaker as single-chunk / short-form for now.
127
+ */
128
+ declare const gemini31FlashTts: FalModelDescriptor;
129
+ //#endregion
130
+ //#region src/descriptors/minimax.d.ts
131
+ /**
132
+ * `fal-ai/minimax/speech-02-hd` — object-voice. The preset voice id nests into a
133
+ * `voice_setting` object alongside scalar knobs (vol/speed/pitch/emotion via
134
+ * `params.voiceSetting`). `output_format` is forced to `url` so the response is
135
+ * the schema-confirmed `audio.url` fetch shape shared by all four models (hex
136
+ * inline bytes are a possible later optimization). minimax is the only starter
137
+ * model that returns a duration (`duration_ms`).
138
+ */
139
+ declare const minimaxSpeech02Hd: FalModelDescriptor;
140
+ //#endregion
141
+ //#region src/descriptors/index.d.ts
142
+ /** Registry of every supported fal model, keyed by its `endpointId`. */
143
+ declare const FAL_DESCRIPTORS: Record<FalModelId, FalModelDescriptor>;
144
+ //#endregion
145
+ //#region src/falProvider.d.ts
146
+ /**
147
+ * Construction options for the single `fal` provider id. `model` selects the
148
+ * engine (the marketplace lives here, at construction); the returned provider
149
+ * instance is bound to that one model. `voice` / `params` are per-instance
150
+ * defaults that per-call overrides can replace.
151
+ */
152
+ interface FalProviderOptions {
153
+ apiKey: string;
154
+ model: FalModelId;
155
+ voice?: FalVoiceSelection;
156
+ params?: Record<string, unknown>;
157
+ }
158
+ /** Per-call overrides — vary the voice or scalar params for a single `generate`. */
159
+ interface FalCallOverrides {
160
+ voice?: FalVoiceSelection;
161
+ params?: Record<string, unknown>;
162
+ }
163
+ declare module '@alien-lobster-buffet/tts-conductor-core' {
164
+ interface TtsProviderRegistry {
165
+ fal: FalProviderOptions;
166
+ }
167
+ interface TtsProviderCallOverridesRegistry {
168
+ fal: FalCallOverrides;
169
+ }
170
+ }
171
+ declare const falProviderFactory: TtsProviderFactory<'fal', FalCallOverrides>;
172
+ //#endregion
173
+ //#region src/voiceCatalog.d.ts
174
+ /**
175
+ * Build a `VoiceCatalog` from a fixed list of entries, applying the
176
+ * cross-provider client-side filter baseline (case-insensitive substring search;
177
+ * language prefix-match; gender equality; custom-only). Used for fal models
178
+ * whose voices are a schema-enumerable set.
179
+ */
180
+ declare function staticVoiceCatalog<TRaw = unknown>(entries: ReadonlyArray<VoiceCatalogEntry<TRaw>>): VoiceCatalog<TRaw>;
181
+ /** Raw shape for a gemini voice entry — just the preset name. */
182
+ interface GeminiRawVoice {
183
+ name: string;
184
+ }
185
+ declare const geminiVoiceCatalog: VoiceCatalog<GeminiRawVoice>;
186
+ //#endregion
187
+ export { type CanonicalTtsInput, FAL_DESCRIPTORS, type FalAudioLocation, type FalCallOverrides, type FalModelDescriptor, type FalModelId, type FalProviderOptions, type FalSpeaker, type FalVoiceSelection, type FlatBuildInputOptions, type GeminiRawVoice, chatterboxTextToSpeech, elevenlabsTurboV25, falProviderFactory, flatBuildInput, gemini31FlashTts, geminiVoiceCatalog, minimaxSpeech02Hd, staticVoiceCatalog };
188
+ //# sourceMappingURL=index.d.mts.map
package/dist/index.mjs ADDED
@@ -0,0 +1,329 @@
1
+ import { TtsError } from "@alien-lobster-buffet/tts-conductor-core";
2
+ import { createFalClient } from "@fal-ai/client";
3
+ //#region src/descriptors/shared.ts
4
+ /**
5
+ * Locate the audio URL (+ mime) in a fal TTS response. All four starter models
6
+ * return `audio` as a fal `File` (`{ url, content_type? }`) when
7
+ * `output_format` is url-based — the provider's shared fetch turns the URL into
8
+ * bytes. Throws a {@link TtsError} if the response carries no usable `audio.url`
9
+ * (already-classified, so the provider passes it through without re-wrapping).
10
+ */
11
+ function locateAudio(data, defaultMime) {
12
+ const audio = data?.audio;
13
+ const url = typeof audio?.url === "string" ? audio.url : void 0;
14
+ if (!url) throw new TtsError("[fal] response did not contain an audio.url");
15
+ return {
16
+ url,
17
+ mimeType: typeof audio?.content_type === "string" ? audio.content_type : defaultMime
18
+ };
19
+ }
20
+ //#endregion
21
+ //#region src/descriptors/chatterbox.ts
22
+ /**
23
+ * `fal-ai/chatterbox/text-to-speech` — voice cloning, no voice id. A `clone`
24
+ * selection supplies `audio_url` (a plain URL string); omitting it lets fal use
25
+ * its demo voice. Generation knobs (`exaggeration` / `cfg` / `temperature` /
26
+ * `seed`) ride `params`. Returns `.wav`; no duration field → core ffprobe
27
+ * fallback. No `voiceCatalog` — there are no enumerable speakers.
28
+ */
29
+ const CHATTERBOX_PARAM_KEYS = [
30
+ "exaggeration",
31
+ "cfg",
32
+ "temperature",
33
+ "seed"
34
+ ];
35
+ const chatterboxTextToSpeech = {
36
+ endpointId: "fal-ai/chatterbox/text-to-speech",
37
+ caps: {
38
+ maxInlineBreakSeconds: null,
39
+ maxCharsPerRequest: 5e3
40
+ },
41
+ buildInput(input) {
42
+ const params = input.params ?? {};
43
+ const wire = { text: input.text };
44
+ if (input.voice?.kind === "clone") wire.audio_url = input.voice.audioUrl;
45
+ for (const key of CHATTERBOX_PARAM_KEYS) if (params[key] !== void 0) wire[key] = params[key];
46
+ return wire;
47
+ },
48
+ extractAudio: (data) => locateAudio(data, "audio/wav")
49
+ };
50
+ //#endregion
51
+ //#region src/descriptors/flatBuildInput.ts
52
+ /**
53
+ * Produces a `buildInput` for flat models — text at `textKey`, a `preset` voice
54
+ * id at `voiceKey`, scalar `params` merged on top. Keeps one-line ergonomics for
55
+ * structurally-simple models without a separate declarative code path; models
56
+ * with object/multi-speaker/clone voice encoding hand-write `buildInput` instead.
57
+ */
58
+ function flatBuildInput(opts) {
59
+ return (input) => ({
60
+ ...opts.defaults,
61
+ [opts.textKey]: input.text,
62
+ ...input.voice?.kind === "preset" ? { [opts.voiceKey]: input.voice.id } : {},
63
+ ...input.params ?? {}
64
+ });
65
+ }
66
+ //#endregion
67
+ //#region src/descriptors/elevenlabs.ts
68
+ /**
69
+ * `fal-ai/elevenlabs/tts/turbo-v2.5` — the flat model: `text` key, `voice` id
70
+ * (default Rachel), and 11labs scalar knobs (`stability` / `similarity_boost` /
71
+ * `style` / `speed` / `previous_text` / `next_text` / `language_code` /
72
+ * `apply_text_normalization`) forwarded via `params`. `voice` is a free string
73
+ * with documented examples, not a schema enum, so no `voiceCatalog`. Returns
74
+ * `.mp3`; no duration field → core ffprobe fallback.
75
+ */
76
+ const elevenlabsTurboV25 = {
77
+ endpointId: "fal-ai/elevenlabs/tts/turbo-v2.5",
78
+ caps: {
79
+ maxInlineBreakSeconds: null,
80
+ maxCharsPerRequest: 2e3
81
+ },
82
+ buildInput: flatBuildInput({
83
+ textKey: "text",
84
+ voiceKey: "voice",
85
+ defaults: { voice: "Rachel" }
86
+ }),
87
+ extractAudio: (data) => locateAudio(data, "audio/mpeg")
88
+ };
89
+ //#endregion
90
+ //#region src/voiceCatalog.ts
91
+ /**
92
+ * Build a `VoiceCatalog` from a fixed list of entries, applying the
93
+ * cross-provider client-side filter baseline (case-insensitive substring search;
94
+ * language prefix-match; gender equality; custom-only). Used for fal models
95
+ * whose voices are a schema-enumerable set.
96
+ */
97
+ function staticVoiceCatalog(entries) {
98
+ return { async listVoices(query) {
99
+ let result = [...entries];
100
+ if (query?.search) {
101
+ const q = query.search.toLowerCase();
102
+ result = result.filter((v) => [
103
+ v.name,
104
+ v.description,
105
+ ...Object.values(v.labels ?? {})
106
+ ].filter((s) => typeof s === "string").some((s) => s.toLowerCase().includes(q)));
107
+ }
108
+ if (query?.language) {
109
+ const lang = query.language.toLowerCase();
110
+ result = result.filter((v) => v.languages.some((l) => l.toLowerCase().startsWith(lang)));
111
+ }
112
+ if (query?.gender) {
113
+ const g = query.gender.toLowerCase();
114
+ result = result.filter((v) => v.gender?.toLowerCase() === g);
115
+ }
116
+ if (query?.customOnly) result = result.filter((v) => v.custom === true);
117
+ return result;
118
+ } };
119
+ }
120
+ const geminiVoiceCatalog = staticVoiceCatalog([
121
+ "Achernar",
122
+ "Achird",
123
+ "Algenib",
124
+ "Algieba",
125
+ "Alnilam",
126
+ "Aoede",
127
+ "Autonoe",
128
+ "Callirrhoe",
129
+ "Charon",
130
+ "Despina",
131
+ "Enceladus",
132
+ "Erinome",
133
+ "Fenrir",
134
+ "Gacrux",
135
+ "Iapetus",
136
+ "Kore",
137
+ "Laomedeia",
138
+ "Leda",
139
+ "Orus",
140
+ "Pulcherrima",
141
+ "Puck",
142
+ "Rasalgethi",
143
+ "Sadachbia",
144
+ "Sadaltager",
145
+ "Schedar",
146
+ "Sulafat",
147
+ "Umbriel",
148
+ "Vindemiatrix",
149
+ "Zephyr",
150
+ "Zubenelgenubi"
151
+ ].map((name) => ({
152
+ id: name,
153
+ name,
154
+ languages: [],
155
+ raw: { name }
156
+ })));
157
+ //#endregion
158
+ //#region src/descriptors/gemini.ts
159
+ /**
160
+ * `fal-ai/gemini-3.1-flash-tts` — uses the `prompt` key (not `text`).
161
+ * Single-speaker uses the `voice` enum (default Kore); multi-speaker maps the
162
+ * canonical `speakers` to fal `SpeakerConfig[]` (`{ speaker_id, voice }`) and
163
+ * omits `voice`. `style_instructions` / `language_code` / `temperature` ride
164
+ * `params`. No duration field → core ffprobe fallback.
165
+ *
166
+ * **Multi-speaker + chunking caveat:** fal expects every request to carry the
167
+ * full `speakers[]` AND the prompt to keep its `Alias:` line-prefixes. A naive
168
+ * length-chunker can split mid-turn, so long multi-speaker scripts are not yet
169
+ * supported — treat multi-speaker as single-chunk / short-form for now.
170
+ */
171
+ const gemini31FlashTts = {
172
+ endpointId: "fal-ai/gemini-3.1-flash-tts",
173
+ caps: {
174
+ maxInlineBreakSeconds: null,
175
+ maxCharsPerRequest: 5e3
176
+ },
177
+ buildInput(input) {
178
+ const params = input.params ?? {};
179
+ const wire = {
180
+ prompt: input.text,
181
+ output_format: "mp3"
182
+ };
183
+ if (params.styleInstructions !== void 0) wire.style_instructions = params.styleInstructions;
184
+ if (params.languageCode !== void 0) wire.language_code = params.languageCode;
185
+ if (params.temperature !== void 0) wire.temperature = params.temperature;
186
+ if (input.voice?.kind === "multiSpeaker") wire.speakers = input.voice.speakers.map((s) => ({
187
+ speaker_id: s.speakerId,
188
+ voice: s.voiceId
189
+ }));
190
+ else wire.voice = input.voice?.kind === "preset" ? input.voice.id : "Kore";
191
+ return wire;
192
+ },
193
+ extractAudio: (data) => locateAudio(data, "audio/mpeg"),
194
+ voiceCatalog: geminiVoiceCatalog
195
+ };
196
+ //#endregion
197
+ //#region src/descriptors/minimax.ts
198
+ /**
199
+ * `fal-ai/minimax/speech-02-hd` — object-voice. The preset voice id nests into a
200
+ * `voice_setting` object alongside scalar knobs (vol/speed/pitch/emotion via
201
+ * `params.voiceSetting`). `output_format` is forced to `url` so the response is
202
+ * the schema-confirmed `audio.url` fetch shape shared by all four models (hex
203
+ * inline bytes are a possible later optimization). minimax is the only starter
204
+ * model that returns a duration (`duration_ms`).
205
+ */
206
+ const minimaxSpeech02Hd = {
207
+ endpointId: "fal-ai/minimax/speech-02-hd",
208
+ caps: {
209
+ maxInlineBreakSeconds: null,
210
+ maxCharsPerRequest: 5e3
211
+ },
212
+ buildInput(input) {
213
+ const params = input.params ?? {};
214
+ const voiceSetting = params.voiceSetting ?? {};
215
+ const presetId = input.voice?.kind === "preset" ? input.voice.id : void 0;
216
+ const wire = {
217
+ text: input.text,
218
+ voice_setting: {
219
+ ...voiceSetting,
220
+ voice_id: presetId ?? voiceSetting.voice_id ?? "Wise_Woman"
221
+ },
222
+ output_format: "url"
223
+ };
224
+ if (params.audioSetting !== void 0) wire.audio_setting = params.audioSetting;
225
+ if (params.languageBoost !== void 0) wire.language_boost = params.languageBoost;
226
+ return wire;
227
+ },
228
+ extractAudio: (data) => locateAudio(data, "audio/mpeg"),
229
+ extractDuration(data) {
230
+ const ms = data.duration_ms;
231
+ return typeof ms === "number" ? ms / 1e3 : void 0;
232
+ }
233
+ };
234
+ //#endregion
235
+ //#region src/descriptors/index.ts
236
+ /** Registry of every supported fal model, keyed by its `endpointId`. */
237
+ const FAL_DESCRIPTORS = {
238
+ "fal-ai/minimax/speech-02-hd": minimaxSpeech02Hd,
239
+ "fal-ai/gemini-3.1-flash-tts": gemini31FlashTts,
240
+ "fal-ai/chatterbox/text-to-speech": chatterboxTextToSpeech,
241
+ "fal-ai/elevenlabs/tts/turbo-v2.5": elevenlabsTurboV25
242
+ };
243
+ //#endregion
244
+ //#region src/falProvider.ts
245
+ /**
246
+ * Remove core's `<speak>…</speak>` wrapper, which the orchestrator adds to every
247
+ * chunk. fal engines take plain text/prompt, not SSML (and the descriptors
248
+ * declare `maxInlineBreakSeconds: null`, so the chunker emits no `<break>` tags).
249
+ */
250
+ function stripSpeak(input) {
251
+ return input.replace(/^\s*<speak[^>]*>/i, "").replace(/<\/speak>\s*$/i, "").trim();
252
+ }
253
+ function isAbortError(error) {
254
+ return error instanceof Error && error.name === "AbortError";
255
+ }
256
+ var FalProvider = class {
257
+ constructor(ctx, options, client) {
258
+ this.ctx = ctx;
259
+ this.options = options;
260
+ this.client = client;
261
+ this.id = ctx.id;
262
+ this.descriptor = FAL_DESCRIPTORS[options.model];
263
+ this.caps = this.descriptor.caps;
264
+ this.voiceCatalog = this.descriptor.voiceCatalog;
265
+ }
266
+ async generate(chunk, options) {
267
+ const signal = options?.signal;
268
+ signal?.throwIfAborted();
269
+ const overrides = options?.overrides;
270
+ const mergedParams = this.options.params || overrides?.params ? {
271
+ ...this.options.params,
272
+ ...overrides?.params
273
+ } : void 0;
274
+ const canonical = {
275
+ text: stripSpeak(chunk),
276
+ voice: overrides?.voice ?? this.options.voice,
277
+ params: mergedParams
278
+ };
279
+ const input = this.descriptor.buildInput(canonical);
280
+ const logger = this.ctx.config.logger;
281
+ logger?.info?.("[fal] subscribe start", { model: this.options.model });
282
+ try {
283
+ const result = await this.client.subscribe(this.descriptor.endpointId, {
284
+ input,
285
+ ...signal ? { abortSignal: signal } : {}
286
+ });
287
+ const { url, mimeType } = this.descriptor.extractAudio(result.data);
288
+ const audio = await this.fetchAudio(url, signal);
289
+ const duration = this.descriptor.extractDuration?.(result.data);
290
+ logger?.info?.("[fal] generation done", {
291
+ model: this.options.model,
292
+ bytes: audio.length,
293
+ requestId: result.requestId
294
+ });
295
+ return {
296
+ audio,
297
+ mimeType,
298
+ ...duration !== void 0 ? { duration } : {},
299
+ size: audio.length,
300
+ providerMeta: { request_id: result.requestId }
301
+ };
302
+ } catch (error) {
303
+ if (isAbortError(error)) throw error;
304
+ if (signal?.aborted) throw new DOMException("Aborted", "AbortError");
305
+ if (error instanceof TtsError) throw error;
306
+ const mapped = new TtsError(`fal generation failed: ${error instanceof Error ? error.message : String(error)}`, { cause: error });
307
+ logger?.error?.("[fal] generation error", { message: mapped.message });
308
+ throw mapped;
309
+ }
310
+ }
311
+ /** Shared, abort-aware fetch turning a model's `audio.url` into bytes. */
312
+ async fetchAudio(url, signal) {
313
+ const res = await fetch(url, signal ? { signal } : {});
314
+ if (!res.ok) throw new TtsError(`fal audio fetch failed (${res.status}): ${url}`, { statusCode: res.status });
315
+ return Buffer.from(await res.arrayBuffer());
316
+ }
317
+ };
318
+ const falProviderFactory = {
319
+ id: "fal",
320
+ create(ctx, options) {
321
+ if (!options.apiKey) throw new Error("fal provider requires an apiKey");
322
+ if (!options.model || !FAL_DESCRIPTORS[options.model]) throw new Error(`fal provider: unknown model "${options.model}"`);
323
+ return new FalProvider(ctx, options, createFalClient({ credentials: options.apiKey }));
324
+ }
325
+ };
326
+ //#endregion
327
+ export { FAL_DESCRIPTORS, chatterboxTextToSpeech, elevenlabsTurboV25, falProviderFactory, flatBuildInput, gemini31FlashTts, geminiVoiceCatalog, minimaxSpeech02Hd, staticVoiceCatalog };
328
+
329
+ //# sourceMappingURL=index.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.mjs","names":[],"sources":["../src/descriptors/shared.ts","../src/descriptors/chatterbox.ts","../src/descriptors/flatBuildInput.ts","../src/descriptors/elevenlabs.ts","../src/voiceCatalog.ts","../src/descriptors/gemini.ts","../src/descriptors/minimax.ts","../src/descriptors/index.ts","../src/falProvider.ts"],"sourcesContent":["import { TtsError } from '@alien-lobster-buffet/tts-conductor-core';\nimport type { FalAudioLocation } from '../types';\n\n/**\n * Locate the audio URL (+ mime) in a fal TTS response. All four starter models\n * return `audio` as a fal `File` (`{ url, content_type? }`) when\n * `output_format` is url-based — the provider's shared fetch turns the URL into\n * bytes. Throws a {@link TtsError} if the response carries no usable `audio.url`\n * (already-classified, so the provider passes it through without re-wrapping).\n */\nexport function locateAudio(data: unknown, defaultMime: string): FalAudioLocation {\n const audio = (data as { audio?: { url?: unknown; content_type?: unknown } } | null | undefined)\n ?.audio;\n const url = typeof audio?.url === 'string' ? audio.url : undefined;\n if (!url) {\n throw new TtsError('[fal] response did not contain an audio.url');\n }\n const mimeType = typeof audio?.content_type === 'string' ? audio.content_type : defaultMime;\n return { url, mimeType };\n}\n","import type { FalModelDescriptor } from '../types';\nimport { locateAudio } from './shared';\n\n/**\n * `fal-ai/chatterbox/text-to-speech` — voice cloning, no voice id. A `clone`\n * selection supplies `audio_url` (a plain URL string); omitting it lets fal use\n * its demo voice. Generation knobs (`exaggeration` / `cfg` / `temperature` /\n * `seed`) ride `params`. Returns `.wav`; no duration field → core ffprobe\n * fallback. No `voiceCatalog` — there are no enumerable speakers.\n */\nconst CHATTERBOX_PARAM_KEYS = ['exaggeration', 'cfg', 'temperature', 'seed'] as const;\n\nexport const chatterboxTextToSpeech: FalModelDescriptor = {\n endpointId: 'fal-ai/chatterbox/text-to-speech',\n caps: { maxInlineBreakSeconds: null, maxCharsPerRequest: 5000 },\n buildInput(input) {\n const params = input.params ?? {};\n const wire: Record<string, unknown> = { text: input.text };\n if (input.voice?.kind === 'clone') wire.audio_url = input.voice.audioUrl;\n for (const key of CHATTERBOX_PARAM_KEYS) {\n if (params[key] !== undefined) wire[key] = params[key];\n }\n return wire;\n },\n extractAudio: (data) => locateAudio(data, 'audio/wav'),\n};\n","import type { CanonicalTtsInput } from '../types';\n\nexport interface FlatBuildInputOptions {\n /** Wire key the chunk text maps to (`text` for most, `prompt` for gemini). */\n textKey: 'text' | 'prompt';\n /** Wire key a `preset` voice id maps to. */\n voiceKey: string;\n /** Static input merged in first (e.g. an output_format or default voice). */\n defaults?: Record<string, unknown>;\n}\n\n/**\n * Produces a `buildInput` for flat models — text at `textKey`, a `preset` voice\n * id at `voiceKey`, scalar `params` merged on top. Keeps one-line ergonomics for\n * structurally-simple models without a separate declarative code path; models\n * with object/multi-speaker/clone voice encoding hand-write `buildInput` instead.\n */\nexport function flatBuildInput(opts: FlatBuildInputOptions) {\n return (input: CanonicalTtsInput): Record<string, unknown> => ({\n ...opts.defaults,\n [opts.textKey]: input.text,\n ...(input.voice?.kind === 'preset' ? { [opts.voiceKey]: input.voice.id } : {}),\n ...(input.params ?? {}),\n });\n}\n","import type { FalModelDescriptor } from '../types';\nimport { flatBuildInput } from './flatBuildInput';\nimport { locateAudio } from './shared';\n\n/**\n * `fal-ai/elevenlabs/tts/turbo-v2.5` — the flat model: `text` key, `voice` id\n * (default Rachel), and 11labs scalar knobs (`stability` / `similarity_boost` /\n * `style` / `speed` / `previous_text` / `next_text` / `language_code` /\n * `apply_text_normalization`) forwarded via `params`. `voice` is a free string\n * with documented examples, not a schema enum, so no `voiceCatalog`. Returns\n * `.mp3`; no duration field → core ffprobe fallback.\n */\nexport const elevenlabsTurboV25: FalModelDescriptor = {\n endpointId: 'fal-ai/elevenlabs/tts/turbo-v2.5',\n // No schema char limit; 2000 is a deliberate narration default (a bit above\n // the direct 11labs adapter's 1200) balancing progress granularity against\n // request count. Soft — raise per job via BuildAudioOptions.maxCharsPerRequest.\n caps: { maxInlineBreakSeconds: null, maxCharsPerRequest: 2000 },\n buildInput: flatBuildInput({ textKey: 'text', voiceKey: 'voice', defaults: { voice: 'Rachel' } }),\n extractAudio: (data) => locateAudio(data, 'audio/mpeg'),\n};\n","import type {\n VoiceCatalog,\n VoiceCatalogEntry,\n VoiceCatalogQuery,\n} from '@alien-lobster-buffet/tts-conductor-core';\n\n/**\n * Build a `VoiceCatalog` from a fixed list of entries, applying the\n * cross-provider client-side filter baseline (case-insensitive substring search;\n * language prefix-match; gender equality; custom-only). Used for fal models\n * whose voices are a schema-enumerable set.\n */\nexport function staticVoiceCatalog<TRaw = unknown>(\n entries: ReadonlyArray<VoiceCatalogEntry<TRaw>>,\n): VoiceCatalog<TRaw> {\n return {\n async listVoices(query?: VoiceCatalogQuery): Promise<VoiceCatalogEntry<TRaw>[]> {\n let result = [...entries];\n if (query?.search) {\n const q = query.search.toLowerCase();\n result = result.filter((v) =>\n [v.name, v.description, ...Object.values(v.labels ?? {})]\n .filter((s): s is string => typeof s === 'string')\n .some((s) => s.toLowerCase().includes(q)),\n );\n }\n if (query?.language) {\n const lang = query.language.toLowerCase();\n result = result.filter((v) => v.languages.some((l) => l.toLowerCase().startsWith(lang)));\n }\n if (query?.gender) {\n const g = query.gender.toLowerCase();\n result = result.filter((v) => v.gender?.toLowerCase() === g);\n }\n if (query?.customOnly) {\n result = result.filter((v) => v.custom === true);\n }\n return result;\n },\n };\n}\n\n/** Raw shape for a gemini voice entry — just the preset name. */\nexport interface GeminiRawVoice {\n name: string;\n}\n\n/**\n * The 30 gemini-3.1-flash-tts preset voices, straight from the model's OpenAPI\n * `voice` enum (authoritative). gemini is multilingual via `language_code`\n * rather than per-voice, so `languages` is left empty.\n */\nconst GEMINI_VOICE_NAMES = [\n 'Achernar',\n 'Achird',\n 'Algenib',\n 'Algieba',\n 'Alnilam',\n 'Aoede',\n 'Autonoe',\n 'Callirrhoe',\n 'Charon',\n 'Despina',\n 'Enceladus',\n 'Erinome',\n 'Fenrir',\n 'Gacrux',\n 'Iapetus',\n 'Kore',\n 'Laomedeia',\n 'Leda',\n 'Orus',\n 'Pulcherrima',\n 'Puck',\n 'Rasalgethi',\n 'Sadachbia',\n 'Sadaltager',\n 'Schedar',\n 'Sulafat',\n 'Umbriel',\n 'Vindemiatrix',\n 'Zephyr',\n 'Zubenelgenubi',\n] as const;\n\nexport const geminiVoiceCatalog: VoiceCatalog<GeminiRawVoice> = staticVoiceCatalog(\n GEMINI_VOICE_NAMES.map((name) => ({ id: name, name, languages: [], raw: { name } })),\n);\n","import type { FalModelDescriptor } from '../types';\nimport { geminiVoiceCatalog } from '../voiceCatalog';\nimport { locateAudio } from './shared';\n\n/**\n * `fal-ai/gemini-3.1-flash-tts` — uses the `prompt` key (not `text`).\n * Single-speaker uses the `voice` enum (default Kore); multi-speaker maps the\n * canonical `speakers` to fal `SpeakerConfig[]` (`{ speaker_id, voice }`) and\n * omits `voice`. `style_instructions` / `language_code` / `temperature` ride\n * `params`. No duration field → core ffprobe fallback.\n *\n * **Multi-speaker + chunking caveat:** fal expects every request to carry the\n * full `speakers[]` AND the prompt to keep its `Alias:` line-prefixes. A naive\n * length-chunker can split mid-turn, so long multi-speaker scripts are not yet\n * supported — treat multi-speaker as single-chunk / short-form for now.\n */\nexport const gemini31FlashTts: FalModelDescriptor = {\n endpointId: 'fal-ai/gemini-3.1-flash-tts',\n // Schema allows up to 50000. Default 5000 favors progress granularity and a\n // small per-failure blast radius for narration. NB: smaller chunks mean more\n // fal.subscribe calls → more request_ids to reconcile + more stitch seams\n // (fal cost is ~per-request, not per-char), so long-form consumers should\n // raise this per job via BuildAudioOptions.maxCharsPerRequest (it's a soft\n // default, not a ceiling).\n caps: { maxInlineBreakSeconds: null, maxCharsPerRequest: 5000 },\n buildInput(input) {\n const params = input.params ?? {};\n const wire: Record<string, unknown> = {\n prompt: input.text,\n output_format: 'mp3',\n };\n if (params.styleInstructions !== undefined) wire.style_instructions = params.styleInstructions;\n if (params.languageCode !== undefined) wire.language_code = params.languageCode;\n if (params.temperature !== undefined) wire.temperature = params.temperature;\n\n if (input.voice?.kind === 'multiSpeaker') {\n wire.speakers = input.voice.speakers.map((s) => ({\n speaker_id: s.speakerId,\n voice: s.voiceId,\n }));\n } else {\n wire.voice = input.voice?.kind === 'preset' ? input.voice.id : 'Kore';\n }\n return wire;\n },\n extractAudio: (data) => locateAudio(data, 'audio/mpeg'),\n voiceCatalog: geminiVoiceCatalog,\n};\n","import type { FalModelDescriptor } from '../types';\nimport { locateAudio } from './shared';\n\n/**\n * `fal-ai/minimax/speech-02-hd` — object-voice. The preset voice id nests into a\n * `voice_setting` object alongside scalar knobs (vol/speed/pitch/emotion via\n * `params.voiceSetting`). `output_format` is forced to `url` so the response is\n * the schema-confirmed `audio.url` fetch shape shared by all four models (hex\n * inline bytes are a possible later optimization). minimax is the only starter\n * model that returns a duration (`duration_ms`).\n */\nexport const minimaxSpeech02Hd: FalModelDescriptor = {\n endpointId: 'fal-ai/minimax/speech-02-hd',\n // Schema hard limit is 5000 chars; used directly as the chunk budget.\n caps: { maxInlineBreakSeconds: null, maxCharsPerRequest: 5000 },\n buildInput(input) {\n const params = input.params ?? {};\n const voiceSetting = (params.voiceSetting as Record<string, unknown> | undefined) ?? {};\n const presetId = input.voice?.kind === 'preset' ? input.voice.id : undefined;\n const wire: Record<string, unknown> = {\n text: input.text,\n voice_setting: {\n ...voiceSetting,\n // The explicit canonical voice wins over a voice_id smuggled into\n // params.voiceSetting; fall back to that, then the model default.\n voice_id: presetId ?? voiceSetting.voice_id ?? 'Wise_Woman',\n },\n output_format: 'url',\n };\n if (params.audioSetting !== undefined) wire.audio_setting = params.audioSetting;\n if (params.languageBoost !== undefined) wire.language_boost = params.languageBoost;\n return wire;\n },\n extractAudio: (data) => locateAudio(data, 'audio/mpeg'),\n extractDuration(data) {\n const ms = (data as { duration_ms?: unknown }).duration_ms;\n return typeof ms === 'number' ? ms / 1000 : undefined;\n },\n // minimax voice_id is a free string with documented examples (not a schema\n // enum), so there's no authoritative list to enumerate — voiceCatalog omitted.\n};\n","import type { FalModelDescriptor, FalModelId } from '../types';\nimport { chatterboxTextToSpeech } from './chatterbox';\nimport { elevenlabsTurboV25 } from './elevenlabs';\nimport { gemini31FlashTts } from './gemini';\nimport { minimaxSpeech02Hd } from './minimax';\n\nexport { chatterboxTextToSpeech } from './chatterbox';\nexport { elevenlabsTurboV25 } from './elevenlabs';\nexport { type FlatBuildInputOptions, flatBuildInput } from './flatBuildInput';\nexport { gemini31FlashTts } from './gemini';\nexport { minimaxSpeech02Hd } from './minimax';\n\n/** Registry of every supported fal model, keyed by its `endpointId`. */\nexport const FAL_DESCRIPTORS: Record<FalModelId, FalModelDescriptor> = {\n 'fal-ai/minimax/speech-02-hd': minimaxSpeech02Hd,\n 'fal-ai/gemini-3.1-flash-tts': gemini31FlashTts,\n 'fal-ai/chatterbox/text-to-speech': chatterboxTextToSpeech,\n 'fal-ai/elevenlabs/tts/turbo-v2.5': elevenlabsTurboV25,\n};\n","import type {\n GenerateCallOptions,\n GenerationResult,\n ProviderCapabilities,\n TtsProvider,\n TtsProviderContext,\n TtsProviderFactory,\n VoiceCatalog,\n} from '@alien-lobster-buffet/tts-conductor-core';\nimport { TtsError } from '@alien-lobster-buffet/tts-conductor-core';\nimport { createFalClient, type FalClient } from '@fal-ai/client';\nimport { FAL_DESCRIPTORS } from './descriptors';\nimport type { CanonicalTtsInput, FalModelDescriptor, FalModelId, FalVoiceSelection } from './types';\n\n/**\n * Construction options for the single `fal` provider id. `model` selects the\n * engine (the marketplace lives here, at construction); the returned provider\n * instance is bound to that one model. `voice` / `params` are per-instance\n * defaults that per-call overrides can replace.\n */\nexport interface FalProviderOptions {\n apiKey: string;\n model: FalModelId;\n voice?: FalVoiceSelection;\n params?: Record<string, unknown>;\n}\n\n/** Per-call overrides — vary the voice or scalar params for a single `generate`. */\nexport interface FalCallOverrides {\n voice?: FalVoiceSelection;\n params?: Record<string, unknown>;\n}\n\ndeclare module '@alien-lobster-buffet/tts-conductor-core' {\n interface TtsProviderRegistry {\n fal: FalProviderOptions;\n }\n interface TtsProviderCallOverridesRegistry {\n fal: FalCallOverrides;\n }\n}\n\n/**\n * Remove core's `<speak>…</speak>` wrapper, which the orchestrator adds to every\n * chunk. fal engines take plain text/prompt, not SSML (and the descriptors\n * declare `maxInlineBreakSeconds: null`, so the chunker emits no `<break>` tags).\n */\nfunction stripSpeak(input: string): string {\n return input\n .replace(/^\\s*<speak[^>]*>/i, '')\n .replace(/<\\/speak>\\s*$/i, '')\n .trim();\n}\n\nfunction isAbortError(error: unknown): boolean {\n return error instanceof Error && error.name === 'AbortError';\n}\n\nclass FalProvider implements TtsProvider<FalCallOverrides> {\n readonly id: string;\n readonly caps: ProviderCapabilities;\n readonly voiceCatalog?: VoiceCatalog;\n private readonly descriptor: FalModelDescriptor;\n\n constructor(\n private readonly ctx: TtsProviderContext,\n private readonly options: FalProviderOptions,\n // Instance-local fal client — credentials are scoped to this provider, not\n // a global singleton, so multiple providers with different keys don't race.\n private readonly client: FalClient,\n ) {\n this.id = ctx.id;\n this.descriptor = FAL_DESCRIPTORS[options.model];\n this.caps = this.descriptor.caps;\n this.voiceCatalog = this.descriptor.voiceCatalog;\n }\n\n async generate(\n chunk: string,\n options?: GenerateCallOptions<FalCallOverrides>,\n ): Promise<GenerationResult> {\n const signal = options?.signal;\n signal?.throwIfAborted();\n\n const overrides = options?.overrides;\n const mergedParams =\n this.options.params || overrides?.params\n ? { ...this.options.params, ...overrides?.params }\n : undefined;\n const canonical: CanonicalTtsInput = {\n text: stripSpeak(chunk),\n voice: overrides?.voice ?? this.options.voice,\n params: mergedParams,\n };\n\n const input = this.descriptor.buildInput(canonical);\n const logger = this.ctx.config.logger;\n logger?.info?.('[fal] subscribe start', { model: this.options.model });\n\n try {\n const result = await this.client.subscribe(this.descriptor.endpointId, {\n input,\n ...(signal ? { abortSignal: signal } : {}),\n });\n\n const { url, mimeType } = this.descriptor.extractAudio(result.data);\n const audio = await this.fetchAudio(url, signal);\n const duration = this.descriptor.extractDuration?.(result.data);\n\n logger?.info?.('[fal] generation done', {\n model: this.options.model,\n bytes: audio.length,\n requestId: result.requestId,\n });\n\n return {\n audio,\n mimeType,\n ...(duration !== undefined ? { duration } : {}),\n size: audio.length,\n // Opaque per-chunk metadata for core; consumers reconcile fal cost from\n // the aggregated request_ids on the final result.\n providerMeta: { request_id: result.requestId },\n };\n } catch (error) {\n // Aborts are not failures — propagate so consumers can distinguish\n // cancellation from a real error (BullMQ workers, retry logic, etc.).\n if (isAbortError(error)) throw error;\n // If the call was cancelled, surface a clean AbortError rather than\n // re-throwing whatever the SDK happened to raise after the abort — that\n // would otherwise mislabel a real API error as a cancellation.\n if (signal?.aborted) throw new DOMException('Aborted', 'AbortError');\n // A TtsError raised below (e.g. a failed fetch) is already classified.\n if (error instanceof TtsError) throw error;\n const message = error instanceof Error ? error.message : String(error);\n const mapped = new TtsError(`fal generation failed: ${message}`, { cause: error });\n logger?.error?.('[fal] generation error', { message: mapped.message });\n throw mapped;\n }\n }\n\n /** Shared, abort-aware fetch turning a model's `audio.url` into bytes. */\n private async fetchAudio(url: string, signal?: AbortSignal): Promise<Buffer> {\n const res = await fetch(url, signal ? { signal } : {});\n if (!res.ok) {\n throw new TtsError(`fal audio fetch failed (${res.status}): ${url}`, {\n statusCode: res.status,\n });\n }\n return Buffer.from(await res.arrayBuffer());\n }\n}\n\nexport const falProviderFactory: TtsProviderFactory<'fal', FalCallOverrides> = {\n id: 'fal',\n create(ctx: TtsProviderContext, options: FalProviderOptions) {\n if (!options.apiKey) {\n throw new Error('fal provider requires an apiKey');\n }\n if (!options.model || !FAL_DESCRIPTORS[options.model]) {\n throw new Error(`fal provider: unknown model \"${options.model}\"`);\n }\n // Instance-local client — credentials scoped here, no global mutation.\n const client = createFalClient({ credentials: options.apiKey });\n return new FalProvider(ctx, options, client);\n },\n};\n"],"mappings":";;;;;;;;;;AAUA,SAAgB,YAAY,MAAe,aAAuC;CAChF,MAAM,QAAS,MACX;CACJ,MAAM,MAAM,OAAO,OAAO,QAAQ,WAAW,MAAM,MAAM,KAAA;CACzD,IAAI,CAAC,KACH,MAAM,IAAI,SAAS,6CAA6C;CAGlE,OAAO;EAAE;EAAK,UADG,OAAO,OAAO,iBAAiB,WAAW,MAAM,eAAe;CACzD;AACzB;;;;;;;;;;ACTA,MAAM,wBAAwB;CAAC;CAAgB;CAAO;CAAe;AAAM;AAE3E,MAAa,yBAA6C;CACxD,YAAY;CACZ,MAAM;EAAE,uBAAuB;EAAM,oBAAoB;CAAK;CAC9D,WAAW,OAAO;EAChB,MAAM,SAAS,MAAM,UAAU,CAAC;EAChC,MAAM,OAAgC,EAAE,MAAM,MAAM,KAAK;EACzD,IAAI,MAAM,OAAO,SAAS,SAAS,KAAK,YAAY,MAAM,MAAM;EAChE,KAAK,MAAM,OAAO,uBAChB,IAAI,OAAO,SAAS,KAAA,GAAW,KAAK,OAAO,OAAO;EAEpD,OAAO;CACT;CACA,eAAe,SAAS,YAAY,MAAM,WAAW;AACvD;;;;;;;;;ACRA,SAAgB,eAAe,MAA6B;CAC1D,QAAQ,WAAuD;EAC7D,GAAG,KAAK;GACP,KAAK,UAAU,MAAM;EACtB,GAAI,MAAM,OAAO,SAAS,WAAW,GAAG,KAAK,WAAW,MAAM,MAAM,GAAG,IAAI,CAAC;EAC5E,GAAI,MAAM,UAAU,CAAC;CACvB;AACF;;;;;;;;;;;ACZA,MAAa,qBAAyC;CACpD,YAAY;CAIZ,MAAM;EAAE,uBAAuB;EAAM,oBAAoB;CAAK;CAC9D,YAAY,eAAe;EAAE,SAAS;EAAQ,UAAU;EAAS,UAAU,EAAE,OAAO,SAAS;CAAE,CAAC;CAChG,eAAe,SAAS,YAAY,MAAM,YAAY;AACxD;;;;;;;;;ACRA,SAAgB,mBACd,SACoB;CACpB,OAAO,EACL,MAAM,WAAW,OAA+D;EAC9E,IAAI,SAAS,CAAC,GAAG,OAAO;EACxB,IAAI,OAAO,QAAQ;GACjB,MAAM,IAAI,MAAM,OAAO,YAAY;GACnC,SAAS,OAAO,QAAQ,MACtB;IAAC,EAAE;IAAM,EAAE;IAAa,GAAG,OAAO,OAAO,EAAE,UAAU,CAAC,CAAC;GAAC,EACrD,QAAQ,MAAmB,OAAO,MAAM,QAAQ,EAChD,MAAM,MAAM,EAAE,YAAY,EAAE,SAAS,CAAC,CAAC,CAC5C;EACF;EACA,IAAI,OAAO,UAAU;GACnB,MAAM,OAAO,MAAM,SAAS,YAAY;GACxC,SAAS,OAAO,QAAQ,MAAM,EAAE,UAAU,MAAM,MAAM,EAAE,YAAY,EAAE,WAAW,IAAI,CAAC,CAAC;EACzF;EACA,IAAI,OAAO,QAAQ;GACjB,MAAM,IAAI,MAAM,OAAO,YAAY;GACnC,SAAS,OAAO,QAAQ,MAAM,EAAE,QAAQ,YAAY,MAAM,CAAC;EAC7D;EACA,IAAI,OAAO,YACT,SAAS,OAAO,QAAQ,MAAM,EAAE,WAAW,IAAI;EAEjD,OAAO;CACT,EACF;AACF;AA6CA,MAAa,qBAAmD,mBAC9D;CAjCA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;AAIA,EAAmB,KAAK,UAAU;CAAE,IAAI;CAAM;CAAM,WAAW,CAAC;CAAG,KAAK,EAAE,KAAK;AAAE,EAAE,CACrF;;;;;;;;;;;;;;;ACvEA,MAAa,mBAAuC;CAClD,YAAY;CAOZ,MAAM;EAAE,uBAAuB;EAAM,oBAAoB;CAAK;CAC9D,WAAW,OAAO;EAChB,MAAM,SAAS,MAAM,UAAU,CAAC;EAChC,MAAM,OAAgC;GACpC,QAAQ,MAAM;GACd,eAAe;EACjB;EACA,IAAI,OAAO,sBAAsB,KAAA,GAAW,KAAK,qBAAqB,OAAO;EAC7E,IAAI,OAAO,iBAAiB,KAAA,GAAW,KAAK,gBAAgB,OAAO;EACnE,IAAI,OAAO,gBAAgB,KAAA,GAAW,KAAK,cAAc,OAAO;EAEhE,IAAI,MAAM,OAAO,SAAS,gBACxB,KAAK,WAAW,MAAM,MAAM,SAAS,KAAK,OAAO;GAC/C,YAAY,EAAE;GACd,OAAO,EAAE;EACX,EAAE;OAEF,KAAK,QAAQ,MAAM,OAAO,SAAS,WAAW,MAAM,MAAM,KAAK;EAEjE,OAAO;CACT;CACA,eAAe,SAAS,YAAY,MAAM,YAAY;CACtD,cAAc;AAChB;;;;;;;;;;;ACpCA,MAAa,oBAAwC;CACnD,YAAY;CAEZ,MAAM;EAAE,uBAAuB;EAAM,oBAAoB;CAAK;CAC9D,WAAW,OAAO;EAChB,MAAM,SAAS,MAAM,UAAU,CAAC;EAChC,MAAM,eAAgB,OAAO,gBAAwD,CAAC;EACtF,MAAM,WAAW,MAAM,OAAO,SAAS,WAAW,MAAM,MAAM,KAAK,KAAA;EACnE,MAAM,OAAgC;GACpC,MAAM,MAAM;GACZ,eAAe;IACb,GAAG;IAGH,UAAU,YAAY,aAAa,YAAY;GACjD;GACA,eAAe;EACjB;EACA,IAAI,OAAO,iBAAiB,KAAA,GAAW,KAAK,gBAAgB,OAAO;EACnE,IAAI,OAAO,kBAAkB,KAAA,GAAW,KAAK,iBAAiB,OAAO;EACrE,OAAO;CACT;CACA,eAAe,SAAS,YAAY,MAAM,YAAY;CACtD,gBAAgB,MAAM;EACpB,MAAM,KAAM,KAAmC;EAC/C,OAAO,OAAO,OAAO,WAAW,KAAK,MAAO,KAAA;CAC9C;AAGF;;;;AC3BA,MAAa,kBAA0D;CACrE,+BAA+B;CAC/B,+BAA+B;CAC/B,oCAAoC;CACpC,oCAAoC;AACtC;;;;;;;;AC6BA,SAAS,WAAW,OAAuB;CACzC,OAAO,MACJ,QAAQ,qBAAqB,EAAE,EAC/B,QAAQ,kBAAkB,EAAE,EAC5B,KAAK;AACV;AAEA,SAAS,aAAa,OAAyB;CAC7C,OAAO,iBAAiB,SAAS,MAAM,SAAS;AAClD;AAEA,IAAM,cAAN,MAA2D;CAMzD,YACE,KACA,SAGA,QACA;EALiB,KAAA,MAAA;EACA,KAAA,UAAA;EAGA,KAAA,SAAA;EAEjB,KAAK,KAAK,IAAI;EACd,KAAK,aAAa,gBAAgB,QAAQ;EAC1C,KAAK,OAAO,KAAK,WAAW;EAC5B,KAAK,eAAe,KAAK,WAAW;CACtC;CAEA,MAAM,SACJ,OACA,SAC2B;EAC3B,MAAM,SAAS,SAAS;EACxB,QAAQ,eAAe;EAEvB,MAAM,YAAY,SAAS;EAC3B,MAAM,eACJ,KAAK,QAAQ,UAAU,WAAW,SAC9B;GAAE,GAAG,KAAK,QAAQ;GAAQ,GAAG,WAAW;EAAO,IAC/C,KAAA;EACN,MAAM,YAA+B;GACnC,MAAM,WAAW,KAAK;GACtB,OAAO,WAAW,SAAS,KAAK,QAAQ;GACxC,QAAQ;EACV;EAEA,MAAM,QAAQ,KAAK,WAAW,WAAW,SAAS;EAClD,MAAM,SAAS,KAAK,IAAI,OAAO;EAC/B,QAAQ,OAAO,yBAAyB,EAAE,OAAO,KAAK,QAAQ,MAAM,CAAC;EAErE,IAAI;GACF,MAAM,SAAS,MAAM,KAAK,OAAO,UAAU,KAAK,WAAW,YAAY;IACrE;IACA,GAAI,SAAS,EAAE,aAAa,OAAO,IAAI,CAAC;GAC1C,CAAC;GAED,MAAM,EAAE,KAAK,aAAa,KAAK,WAAW,aAAa,OAAO,IAAI;GAClE,MAAM,QAAQ,MAAM,KAAK,WAAW,KAAK,MAAM;GAC/C,MAAM,WAAW,KAAK,WAAW,kBAAkB,OAAO,IAAI;GAE9D,QAAQ,OAAO,yBAAyB;IACtC,OAAO,KAAK,QAAQ;IACpB,OAAO,MAAM;IACb,WAAW,OAAO;GACpB,CAAC;GAED,OAAO;IACL;IACA;IACA,GAAI,aAAa,KAAA,IAAY,EAAE,SAAS,IAAI,CAAC;IAC7C,MAAM,MAAM;IAGZ,cAAc,EAAE,YAAY,OAAO,UAAU;GAC/C;EACF,SAAS,OAAO;GAGd,IAAI,aAAa,KAAK,GAAG,MAAM;GAI/B,IAAI,QAAQ,SAAS,MAAM,IAAI,aAAa,WAAW,YAAY;GAEnE,IAAI,iBAAiB,UAAU,MAAM;GAErC,MAAM,SAAS,IAAI,SAAS,0BADZ,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,KACJ,EAAE,OAAO,MAAM,CAAC;GACjF,QAAQ,QAAQ,0BAA0B,EAAE,SAAS,OAAO,QAAQ,CAAC;GACrE,MAAM;EACR;CACF;;CAGA,MAAc,WAAW,KAAa,QAAuC;EAC3E,MAAM,MAAM,MAAM,MAAM,KAAK,SAAS,EAAE,OAAO,IAAI,CAAC,CAAC;EACrD,IAAI,CAAC,IAAI,IACP,MAAM,IAAI,SAAS,2BAA2B,IAAI,OAAO,KAAK,OAAO,EACnE,YAAY,IAAI,OAClB,CAAC;EAEH,OAAO,OAAO,KAAK,MAAM,IAAI,YAAY,CAAC;CAC5C;AACF;AAEA,MAAa,qBAAkE;CAC7E,IAAI;CACJ,OAAO,KAAyB,SAA6B;EAC3D,IAAI,CAAC,QAAQ,QACX,MAAM,IAAI,MAAM,iCAAiC;EAEnD,IAAI,CAAC,QAAQ,SAAS,CAAC,gBAAgB,QAAQ,QAC7C,MAAM,IAAI,MAAM,gCAAgC,QAAQ,MAAM,EAAE;EAIlE,OAAO,IAAI,YAAY,KAAK,SADb,gBAAgB,EAAE,aAAa,QAAQ,OAAO,CACnB,CAAC;CAC7C;AACF"}
package/package.json ADDED
@@ -0,0 +1,59 @@
1
+ {
2
+ "name": "@alien-lobster-buffet/tts-conductor-fal",
3
+ "version": "0.2.0-alpha.0",
4
+ "description": "fal.ai provider bindings for the TTS Conductor ecosystem",
5
+ "keywords": [
6
+ "tts",
7
+ "fal",
8
+ "fal-ai",
9
+ "provider"
10
+ ],
11
+ "license": "MIT",
12
+ "author": "Cole Reed <alienlobsterbuffet.dev@gmail.com>",
13
+ "homepage": "https://github.com/ichabodcole/tts-conductor#readme",
14
+ "bugs": {
15
+ "url": "https://github.com/ichabodcole/tts-conductor/issues"
16
+ },
17
+ "repository": {
18
+ "type": "git",
19
+ "url": "git+https://github.com/ichabodcole/tts-conductor.git",
20
+ "directory": "packages/tts-provider-fal"
21
+ },
22
+ "type": "module",
23
+ "exports": {
24
+ ".": {
25
+ "import": "./dist/index.mjs",
26
+ "types": "./dist/index.d.mts"
27
+ }
28
+ },
29
+ "main": "./dist/index.mjs",
30
+ "types": "./dist/index.d.mts",
31
+ "files": [
32
+ "dist",
33
+ "LICENSE",
34
+ "README.md"
35
+ ],
36
+ "engines": {
37
+ "node": ">=18"
38
+ },
39
+ "publishConfig": {
40
+ "access": "public"
41
+ },
42
+ "scripts": {
43
+ "build": "tsdown",
44
+ "clean": "tsdown --clean",
45
+ "dev": "tsdown --watch",
46
+ "prepublishOnly": "tsdown",
47
+ "test": "vitest --run --config ../../vitest.config.ts",
48
+ "typecheck": "tsc --noEmit"
49
+ },
50
+ "peerDependencies": {
51
+ "@alien-lobster-buffet/tts-conductor-core": "^0.2.0-alpha.1"
52
+ },
53
+ "dependencies": {
54
+ "@fal-ai/client": "^1.10.1"
55
+ },
56
+ "devDependencies": {
57
+ "@alien-lobster-buffet/tts-conductor-core": "^0.2.0-alpha.1"
58
+ }
59
+ }