@alien-lobster-buffet/tts-conductor-fal 0.2.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +106 -0
- package/dist/index.d.mts +188 -0
- package/dist/index.mjs +329 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +59 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Cole Reed
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# @alien-lobster-buffet/tts-conductor-fal
|
|
2
|
+
|
|
3
|
+
[fal.ai](https://fal.ai) provider bindings for the
|
|
4
|
+
[TTS Conductor](https://github.com/ichabodcole/tts-conductor) ecosystem.
|
|
5
|
+
|
|
6
|
+
fal.ai is a **gateway fronting many TTS engines**, each with its own input and
|
|
7
|
+
voice schema. This package models that as a single `fal` provider id that you
|
|
8
|
+
parameterize with a `model` at construction — so the marketplace lives at
|
|
9
|
+
construction time while each provider _instance_ is still one engine, fully
|
|
10
|
+
honoring the conductor's `TtsProvider` contract (one `caps`, one `generate`, one
|
|
11
|
+
optional `voiceCatalog`).
|
|
12
|
+
|
|
13
|
+
## Install
|
|
14
|
+
|
|
15
|
+
```sh
|
|
16
|
+
npm install @alien-lobster-buffet/tts-conductor-core @alien-lobster-buffet/tts-conductor-fal
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Requires a fal API key (`FAL_KEY`).
|
|
20
|
+
|
|
21
|
+
## Quickstart
|
|
22
|
+
|
|
23
|
+
```ts
|
|
24
|
+
import { createTtsConductor } from "@alien-lobster-buffet/tts-conductor-core";
|
|
25
|
+
import { falProviderFactory } from "@alien-lobster-buffet/tts-conductor-fal";
|
|
26
|
+
|
|
27
|
+
const conductor = createTtsConductor({ pauseTable: {}, maxPauseSeconds: 30 });
|
|
28
|
+
conductor.registerProvider(falProviderFactory);
|
|
29
|
+
|
|
30
|
+
const provider = conductor.createProvider("fal", {
|
|
31
|
+
apiKey: process.env.FAL_KEY!,
|
|
32
|
+
model: "fal-ai/minimax/speech-02-hd",
|
|
33
|
+
voice: { kind: "preset", id: "Wise_Woman" },
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
const { audio, duration, providerMeta } = await conductor.generateFull(
|
|
37
|
+
"Hello there. [PAUSE:2s] Welcome.",
|
|
38
|
+
provider,
|
|
39
|
+
);
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Starter models
|
|
43
|
+
|
|
44
|
+
| `model` | text key | voice mechanism | duration |
|
|
45
|
+
| ---------------------------------- | -------- | -------------------------------------------------------------------------------- | ---------------------- |
|
|
46
|
+
| `fal-ai/minimax/speech-02-hd` | `text` | `{ kind: 'preset', id }` → `voice_setting` object | native (`duration_ms`) |
|
|
47
|
+
| `fal-ai/gemini-3.1-flash-tts` | `prompt` | `{ kind: 'preset', id }` (30-voice enum) or `{ kind: 'multiSpeaker', speakers }` | ffprobe |
|
|
48
|
+
| `fal-ai/chatterbox/text-to-speech` | `text` | `{ kind: 'clone', audioUrl }` (no preset voices) | ffprobe |
|
|
49
|
+
| `fal-ai/elevenlabs/tts/turbo-v2.5` | `text` | `{ kind: 'preset', id }` | ffprobe |
|
|
50
|
+
|
|
51
|
+
Switch model and voice at construction, or vary them per call:
|
|
52
|
+
|
|
53
|
+
```ts
|
|
54
|
+
await provider.generate(chunk, {
|
|
55
|
+
overrides: {
|
|
56
|
+
voice: { kind: "preset", id: "Aria" },
|
|
57
|
+
params: { stability: 0.3 },
|
|
58
|
+
},
|
|
59
|
+
});
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
`params` carries the model's scalar knobs (e.g. minimax `voiceSetting`, gemini
|
|
63
|
+
`styleInstructions` / `temperature`, chatterbox `exaggeration` / `cfg`, 11labs
|
|
64
|
+
`stability` / `similarity_boost` / `speed`). Each descriptor forwards only the
|
|
65
|
+
keys it recognizes.
|
|
66
|
+
|
|
67
|
+
## Voice discovery
|
|
68
|
+
|
|
69
|
+
Only `fal-ai/gemini-3.1-flash-tts` exposes a `voiceCatalog` — its 30 preset
|
|
70
|
+
voices are the one schema-enumerable set. minimax and elevenlabs-on-fal take a
|
|
71
|
+
voice id as a free string (pass a known id), and chatterbox clones from
|
|
72
|
+
`audio_url`; all three leave `voiceCatalog` undefined.
|
|
73
|
+
|
|
74
|
+
```ts
|
|
75
|
+
if (provider.voiceCatalog) {
|
|
76
|
+
const voices = await provider.voiceCatalog.listVoices({ search: "kore" });
|
|
77
|
+
}
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
> Gemini voices carry no per-voice language metadata (the model is multilingual
|
|
81
|
+
> via the `languageCode` param), so `listVoices({ language })` filters them all
|
|
82
|
+
> out. Filter by `search` instead.
|
|
83
|
+
|
|
84
|
+
## Cost reconciliation
|
|
85
|
+
|
|
86
|
+
fal bills asynchronously. Each chunk's fal `request_id` is surfaced on the
|
|
87
|
+
generation result via core's generic `providerMeta` channel — collect them from
|
|
88
|
+
the final result and reconcile against fal's billing-events API:
|
|
89
|
+
|
|
90
|
+
```ts
|
|
91
|
+
const result = await conductor.generateFull(text, provider);
|
|
92
|
+
const requestIds =
|
|
93
|
+
result.providerMeta?.map((m) => m?.request_id).filter(Boolean) ?? [];
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Notes
|
|
97
|
+
|
|
98
|
+
- **Multi-speaker (gemini) is single-chunk / short-form for now.** fal needs the
|
|
99
|
+
full `speakers[]` on every request and the prompt's `Alias:` line-prefixes to
|
|
100
|
+
survive chunking; long multi-speaker scripts aren't supported yet.
|
|
101
|
+
- fal engines don't render SSML breaks — inter-chunk pauses become stitched
|
|
102
|
+
silence segments via the core orchestrator.
|
|
103
|
+
|
|
104
|
+
## License
|
|
105
|
+
|
|
106
|
+
MIT
|
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import { ProviderCapabilities, TtsProviderFactory, VoiceCatalog, VoiceCatalogEntry } from "@alien-lobster-buffet/tts-conductor-core";
|
|
2
|
+
|
|
3
|
+
//#region src/types.d.ts
|
|
4
|
+
/**
|
|
5
|
+
* fal endpoint ids supported by this provider. Extended as models are added;
|
|
6
|
+
* each id must have a matching entry in the descriptor registry.
|
|
7
|
+
*/
|
|
8
|
+
type FalModelId = 'fal-ai/minimax/speech-02-hd' | 'fal-ai/gemini-3.1-flash-tts' | 'fal-ai/chatterbox/text-to-speech' | 'fal-ai/elevenlabs/tts/turbo-v2.5';
|
|
9
|
+
/** One speaker in a multi-speaker request (gemini `speakers[]`). */
|
|
10
|
+
interface FalSpeaker {
|
|
11
|
+
/** Alias used as a line prefix in the prompt (e.g. "Host:"). → SpeakerConfig.speaker_id */
|
|
12
|
+
speakerId: string;
|
|
13
|
+
/** A preset voice name. → SpeakerConfig.voice */
|
|
14
|
+
voiceId: string;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Polymorphic voice selection spanning fal's mechanisms. Object-voice (minimax
|
|
18
|
+
* `voice_setting`) is not a separate kind — it's a `preset` id that the
|
|
19
|
+
* descriptor nests into an object alongside its scalar params.
|
|
20
|
+
*/
|
|
21
|
+
type FalVoiceSelection = {
|
|
22
|
+
kind: 'preset';
|
|
23
|
+
id: string;
|
|
24
|
+
} | {
|
|
25
|
+
kind: 'multiSpeaker';
|
|
26
|
+
speakers: FalSpeaker[];
|
|
27
|
+
} | {
|
|
28
|
+
kind: 'clone';
|
|
29
|
+
audioUrl: string;
|
|
30
|
+
};
|
|
31
|
+
/**
|
|
32
|
+
* Normalized, model-agnostic input the provider assembles per call before a
|
|
33
|
+
* descriptor maps it onto a specific model's wire schema.
|
|
34
|
+
*/
|
|
35
|
+
interface CanonicalTtsInput {
|
|
36
|
+
/** Plain text for this chunk — already stripped of core's `<speak>` wrapper. */
|
|
37
|
+
text: string;
|
|
38
|
+
/** Resolved voice selection (per-call override ?? construction default). */
|
|
39
|
+
voice?: FalVoiceSelection;
|
|
40
|
+
/**
|
|
41
|
+
* Resolved model-specific scalar knobs (speed, temperature, stability, …).
|
|
42
|
+
* Opaque at this layer; each descriptor whitelists the keys it forwards, so a
|
|
43
|
+
* typo'd param is a silent no-op rather than a crash.
|
|
44
|
+
*/
|
|
45
|
+
params?: Record<string, unknown>;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Where a model's audio lives in its fal response: a URL to fetch plus an
|
|
49
|
+
* optional mime type. All four starter models return `audio` as a fal `File`
|
|
50
|
+
* (`{ url, content_type? }`), so the provider's shared, abort-aware fetch turns
|
|
51
|
+
* this into bytes — descriptors only need to *locate* it.
|
|
52
|
+
*/
|
|
53
|
+
interface FalAudioLocation {
|
|
54
|
+
url: string;
|
|
55
|
+
mimeType?: string;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Per-model adapter. The marketplace nature of fal is localized here: one
|
|
59
|
+
* descriptor per `endpointId`, all sharing the provider's `fal.subscribe` /
|
|
60
|
+
* fetch / abort plumbing.
|
|
61
|
+
*/
|
|
62
|
+
interface FalModelDescriptor {
|
|
63
|
+
endpointId: FalModelId;
|
|
64
|
+
/**
|
|
65
|
+
* Per-model capabilities (incl. `maxCharsPerRequest`). fal engines don't
|
|
66
|
+
* render SSML breaks, so `maxInlineBreakSeconds` is always `null` — long
|
|
67
|
+
* pauses become stitched silence segments via the core orchestrator.
|
|
68
|
+
*/
|
|
69
|
+
caps: ProviderCapabilities;
|
|
70
|
+
/** Map canonical input → this model's fal wire input (text key, voice encoding, defaults). */
|
|
71
|
+
buildInput(input: CanonicalTtsInput): Record<string, unknown>;
|
|
72
|
+
/** Locate the audio URL (+ mime) in this model's fal response. */
|
|
73
|
+
extractAudio(data: unknown): FalAudioLocation;
|
|
74
|
+
/**
|
|
75
|
+
* Pull duration (seconds) from the response when the model returns it. Only
|
|
76
|
+
* minimax does (`duration_ms`); the others omit this and let core's ffprobe
|
|
77
|
+
* derive duration.
|
|
78
|
+
*/
|
|
79
|
+
extractDuration?(data: unknown): number | undefined;
|
|
80
|
+
/** Enumerable speaker list, for models that expose one. */
|
|
81
|
+
voiceCatalog?: VoiceCatalog;
|
|
82
|
+
}
|
|
83
|
+
//#endregion
|
|
84
|
+
//#region src/descriptors/chatterbox.d.ts
|
|
85
|
+
declare const chatterboxTextToSpeech: FalModelDescriptor;
|
|
86
|
+
//#endregion
|
|
87
|
+
//#region src/descriptors/elevenlabs.d.ts
|
|
88
|
+
/**
|
|
89
|
+
* `fal-ai/elevenlabs/tts/turbo-v2.5` — the flat model: `text` key, `voice` id
|
|
90
|
+
* (default Rachel), and 11labs scalar knobs (`stability` / `similarity_boost` /
|
|
91
|
+
* `style` / `speed` / `previous_text` / `next_text` / `language_code` /
|
|
92
|
+
* `apply_text_normalization`) forwarded via `params`. `voice` is a free string
|
|
93
|
+
* with documented examples, not a schema enum, so no `voiceCatalog`. Returns
|
|
94
|
+
* `.mp3`; no duration field → core ffprobe fallback.
|
|
95
|
+
*/
|
|
96
|
+
declare const elevenlabsTurboV25: FalModelDescriptor;
|
|
97
|
+
//#endregion
|
|
98
|
+
//#region src/descriptors/flatBuildInput.d.ts
|
|
99
|
+
interface FlatBuildInputOptions {
|
|
100
|
+
/** Wire key the chunk text maps to (`text` for most, `prompt` for gemini). */
|
|
101
|
+
textKey: 'text' | 'prompt';
|
|
102
|
+
/** Wire key a `preset` voice id maps to. */
|
|
103
|
+
voiceKey: string;
|
|
104
|
+
/** Static input merged in first (e.g. an output_format or default voice). */
|
|
105
|
+
defaults?: Record<string, unknown>;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Produces a `buildInput` for flat models — text at `textKey`, a `preset` voice
|
|
109
|
+
* id at `voiceKey`, scalar `params` merged on top. Keeps one-line ergonomics for
|
|
110
|
+
* structurally-simple models without a separate declarative code path; models
|
|
111
|
+
* with object/multi-speaker/clone voice encoding hand-write `buildInput` instead.
|
|
112
|
+
*/
|
|
113
|
+
declare function flatBuildInput(opts: FlatBuildInputOptions): (input: CanonicalTtsInput) => Record<string, unknown>;
|
|
114
|
+
//#endregion
|
|
115
|
+
//#region src/descriptors/gemini.d.ts
|
|
116
|
+
/**
|
|
117
|
+
* `fal-ai/gemini-3.1-flash-tts` — uses the `prompt` key (not `text`).
|
|
118
|
+
* Single-speaker uses the `voice` enum (default Kore); multi-speaker maps the
|
|
119
|
+
* canonical `speakers` to fal `SpeakerConfig[]` (`{ speaker_id, voice }`) and
|
|
120
|
+
* omits `voice`. `style_instructions` / `language_code` / `temperature` ride
|
|
121
|
+
* `params`. No duration field → core ffprobe fallback.
|
|
122
|
+
*
|
|
123
|
+
* **Multi-speaker + chunking caveat:** fal expects every request to carry the
|
|
124
|
+
* full `speakers[]` AND the prompt to keep its `Alias:` line-prefixes. A naive
|
|
125
|
+
* length-chunker can split mid-turn, so long multi-speaker scripts are not yet
|
|
126
|
+
* supported — treat multi-speaker as single-chunk / short-form for now.
|
|
127
|
+
*/
|
|
128
|
+
declare const gemini31FlashTts: FalModelDescriptor;
|
|
129
|
+
//#endregion
|
|
130
|
+
//#region src/descriptors/minimax.d.ts
|
|
131
|
+
/**
|
|
132
|
+
* `fal-ai/minimax/speech-02-hd` — object-voice. The preset voice id nests into a
|
|
133
|
+
* `voice_setting` object alongside scalar knobs (vol/speed/pitch/emotion via
|
|
134
|
+
* `params.voiceSetting`). `output_format` is forced to `url` so the response is
|
|
135
|
+
* the schema-confirmed `audio.url` fetch shape shared by all four models (hex
|
|
136
|
+
* inline bytes are a possible later optimization). minimax is the only starter
|
|
137
|
+
* model that returns a duration (`duration_ms`).
|
|
138
|
+
*/
|
|
139
|
+
declare const minimaxSpeech02Hd: FalModelDescriptor;
|
|
140
|
+
//#endregion
|
|
141
|
+
//#region src/descriptors/index.d.ts
|
|
142
|
+
/** Registry of every supported fal model, keyed by its `endpointId`. */
|
|
143
|
+
declare const FAL_DESCRIPTORS: Record<FalModelId, FalModelDescriptor>;
|
|
144
|
+
//#endregion
|
|
145
|
+
//#region src/falProvider.d.ts
|
|
146
|
+
/**
|
|
147
|
+
* Construction options for the single `fal` provider id. `model` selects the
|
|
148
|
+
* engine (the marketplace lives here, at construction); the returned provider
|
|
149
|
+
* instance is bound to that one model. `voice` / `params` are per-instance
|
|
150
|
+
* defaults that per-call overrides can replace.
|
|
151
|
+
*/
|
|
152
|
+
interface FalProviderOptions {
|
|
153
|
+
apiKey: string;
|
|
154
|
+
model: FalModelId;
|
|
155
|
+
voice?: FalVoiceSelection;
|
|
156
|
+
params?: Record<string, unknown>;
|
|
157
|
+
}
|
|
158
|
+
/** Per-call overrides — vary the voice or scalar params for a single `generate`. */
|
|
159
|
+
interface FalCallOverrides {
|
|
160
|
+
voice?: FalVoiceSelection;
|
|
161
|
+
params?: Record<string, unknown>;
|
|
162
|
+
}
|
|
163
|
+
declare module '@alien-lobster-buffet/tts-conductor-core' {
|
|
164
|
+
interface TtsProviderRegistry {
|
|
165
|
+
fal: FalProviderOptions;
|
|
166
|
+
}
|
|
167
|
+
interface TtsProviderCallOverridesRegistry {
|
|
168
|
+
fal: FalCallOverrides;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
declare const falProviderFactory: TtsProviderFactory<'fal', FalCallOverrides>;
|
|
172
|
+
//#endregion
|
|
173
|
+
//#region src/voiceCatalog.d.ts
|
|
174
|
+
/**
|
|
175
|
+
* Build a `VoiceCatalog` from a fixed list of entries, applying the
|
|
176
|
+
* cross-provider client-side filter baseline (case-insensitive substring search;
|
|
177
|
+
* language prefix-match; gender equality; custom-only). Used for fal models
|
|
178
|
+
* whose voices are a schema-enumerable set.
|
|
179
|
+
*/
|
|
180
|
+
declare function staticVoiceCatalog<TRaw = unknown>(entries: ReadonlyArray<VoiceCatalogEntry<TRaw>>): VoiceCatalog<TRaw>;
|
|
181
|
+
/** Raw shape for a gemini voice entry — just the preset name. */
|
|
182
|
+
interface GeminiRawVoice {
|
|
183
|
+
name: string;
|
|
184
|
+
}
|
|
185
|
+
declare const geminiVoiceCatalog: VoiceCatalog<GeminiRawVoice>;
|
|
186
|
+
//#endregion
|
|
187
|
+
export { type CanonicalTtsInput, FAL_DESCRIPTORS, type FalAudioLocation, type FalCallOverrides, type FalModelDescriptor, type FalModelId, type FalProviderOptions, type FalSpeaker, type FalVoiceSelection, type FlatBuildInputOptions, type GeminiRawVoice, chatterboxTextToSpeech, elevenlabsTurboV25, falProviderFactory, flatBuildInput, gemini31FlashTts, geminiVoiceCatalog, minimaxSpeech02Hd, staticVoiceCatalog };
|
|
188
|
+
//# sourceMappingURL=index.d.mts.map
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
import { TtsError } from "@alien-lobster-buffet/tts-conductor-core";
|
|
2
|
+
import { createFalClient } from "@fal-ai/client";
|
|
3
|
+
//#region src/descriptors/shared.ts
|
|
4
|
+
/**
|
|
5
|
+
* Locate the audio URL (+ mime) in a fal TTS response. All four starter models
|
|
6
|
+
* return `audio` as a fal `File` (`{ url, content_type? }`) when
|
|
7
|
+
* `output_format` is url-based — the provider's shared fetch turns the URL into
|
|
8
|
+
* bytes. Throws a {@link TtsError} if the response carries no usable `audio.url`
|
|
9
|
+
* (already-classified, so the provider passes it through without re-wrapping).
|
|
10
|
+
*/
|
|
11
|
+
function locateAudio(data, defaultMime) {
|
|
12
|
+
const audio = data?.audio;
|
|
13
|
+
const url = typeof audio?.url === "string" ? audio.url : void 0;
|
|
14
|
+
if (!url) throw new TtsError("[fal] response did not contain an audio.url");
|
|
15
|
+
return {
|
|
16
|
+
url,
|
|
17
|
+
mimeType: typeof audio?.content_type === "string" ? audio.content_type : defaultMime
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
//#endregion
|
|
21
|
+
//#region src/descriptors/chatterbox.ts
|
|
22
|
+
/**
|
|
23
|
+
* `fal-ai/chatterbox/text-to-speech` — voice cloning, no voice id. A `clone`
|
|
24
|
+
* selection supplies `audio_url` (a plain URL string); omitting it lets fal use
|
|
25
|
+
* its demo voice. Generation knobs (`exaggeration` / `cfg` / `temperature` /
|
|
26
|
+
* `seed`) ride `params`. Returns `.wav`; no duration field → core ffprobe
|
|
27
|
+
* fallback. No `voiceCatalog` — there are no enumerable speakers.
|
|
28
|
+
*/
|
|
29
|
+
const CHATTERBOX_PARAM_KEYS = [
|
|
30
|
+
"exaggeration",
|
|
31
|
+
"cfg",
|
|
32
|
+
"temperature",
|
|
33
|
+
"seed"
|
|
34
|
+
];
|
|
35
|
+
const chatterboxTextToSpeech = {
|
|
36
|
+
endpointId: "fal-ai/chatterbox/text-to-speech",
|
|
37
|
+
caps: {
|
|
38
|
+
maxInlineBreakSeconds: null,
|
|
39
|
+
maxCharsPerRequest: 5e3
|
|
40
|
+
},
|
|
41
|
+
buildInput(input) {
|
|
42
|
+
const params = input.params ?? {};
|
|
43
|
+
const wire = { text: input.text };
|
|
44
|
+
if (input.voice?.kind === "clone") wire.audio_url = input.voice.audioUrl;
|
|
45
|
+
for (const key of CHATTERBOX_PARAM_KEYS) if (params[key] !== void 0) wire[key] = params[key];
|
|
46
|
+
return wire;
|
|
47
|
+
},
|
|
48
|
+
extractAudio: (data) => locateAudio(data, "audio/wav")
|
|
49
|
+
};
|
|
50
|
+
//#endregion
|
|
51
|
+
//#region src/descriptors/flatBuildInput.ts
|
|
52
|
+
/**
|
|
53
|
+
* Produces a `buildInput` for flat models — text at `textKey`, a `preset` voice
|
|
54
|
+
* id at `voiceKey`, scalar `params` merged on top. Keeps one-line ergonomics for
|
|
55
|
+
* structurally-simple models without a separate declarative code path; models
|
|
56
|
+
* with object/multi-speaker/clone voice encoding hand-write `buildInput` instead.
|
|
57
|
+
*/
|
|
58
|
+
function flatBuildInput(opts) {
|
|
59
|
+
return (input) => ({
|
|
60
|
+
...opts.defaults,
|
|
61
|
+
[opts.textKey]: input.text,
|
|
62
|
+
...input.voice?.kind === "preset" ? { [opts.voiceKey]: input.voice.id } : {},
|
|
63
|
+
...input.params ?? {}
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
//#endregion
|
|
67
|
+
//#region src/descriptors/elevenlabs.ts
|
|
68
|
+
/**
|
|
69
|
+
* `fal-ai/elevenlabs/tts/turbo-v2.5` — the flat model: `text` key, `voice` id
|
|
70
|
+
* (default Rachel), and 11labs scalar knobs (`stability` / `similarity_boost` /
|
|
71
|
+
* `style` / `speed` / `previous_text` / `next_text` / `language_code` /
|
|
72
|
+
* `apply_text_normalization`) forwarded via `params`. `voice` is a free string
|
|
73
|
+
* with documented examples, not a schema enum, so no `voiceCatalog`. Returns
|
|
74
|
+
* `.mp3`; no duration field → core ffprobe fallback.
|
|
75
|
+
*/
|
|
76
|
+
const elevenlabsTurboV25 = {
|
|
77
|
+
endpointId: "fal-ai/elevenlabs/tts/turbo-v2.5",
|
|
78
|
+
caps: {
|
|
79
|
+
maxInlineBreakSeconds: null,
|
|
80
|
+
maxCharsPerRequest: 2e3
|
|
81
|
+
},
|
|
82
|
+
buildInput: flatBuildInput({
|
|
83
|
+
textKey: "text",
|
|
84
|
+
voiceKey: "voice",
|
|
85
|
+
defaults: { voice: "Rachel" }
|
|
86
|
+
}),
|
|
87
|
+
extractAudio: (data) => locateAudio(data, "audio/mpeg")
|
|
88
|
+
};
|
|
89
|
+
//#endregion
|
|
90
|
+
//#region src/voiceCatalog.ts
|
|
91
|
+
/**
|
|
92
|
+
* Build a `VoiceCatalog` from a fixed list of entries, applying the
|
|
93
|
+
* cross-provider client-side filter baseline (case-insensitive substring search;
|
|
94
|
+
* language prefix-match; gender equality; custom-only). Used for fal models
|
|
95
|
+
* whose voices are a schema-enumerable set.
|
|
96
|
+
*/
|
|
97
|
+
function staticVoiceCatalog(entries) {
|
|
98
|
+
return { async listVoices(query) {
|
|
99
|
+
let result = [...entries];
|
|
100
|
+
if (query?.search) {
|
|
101
|
+
const q = query.search.toLowerCase();
|
|
102
|
+
result = result.filter((v) => [
|
|
103
|
+
v.name,
|
|
104
|
+
v.description,
|
|
105
|
+
...Object.values(v.labels ?? {})
|
|
106
|
+
].filter((s) => typeof s === "string").some((s) => s.toLowerCase().includes(q)));
|
|
107
|
+
}
|
|
108
|
+
if (query?.language) {
|
|
109
|
+
const lang = query.language.toLowerCase();
|
|
110
|
+
result = result.filter((v) => v.languages.some((l) => l.toLowerCase().startsWith(lang)));
|
|
111
|
+
}
|
|
112
|
+
if (query?.gender) {
|
|
113
|
+
const g = query.gender.toLowerCase();
|
|
114
|
+
result = result.filter((v) => v.gender?.toLowerCase() === g);
|
|
115
|
+
}
|
|
116
|
+
if (query?.customOnly) result = result.filter((v) => v.custom === true);
|
|
117
|
+
return result;
|
|
118
|
+
} };
|
|
119
|
+
}
|
|
120
|
+
const geminiVoiceCatalog = staticVoiceCatalog([
|
|
121
|
+
"Achernar",
|
|
122
|
+
"Achird",
|
|
123
|
+
"Algenib",
|
|
124
|
+
"Algieba",
|
|
125
|
+
"Alnilam",
|
|
126
|
+
"Aoede",
|
|
127
|
+
"Autonoe",
|
|
128
|
+
"Callirrhoe",
|
|
129
|
+
"Charon",
|
|
130
|
+
"Despina",
|
|
131
|
+
"Enceladus",
|
|
132
|
+
"Erinome",
|
|
133
|
+
"Fenrir",
|
|
134
|
+
"Gacrux",
|
|
135
|
+
"Iapetus",
|
|
136
|
+
"Kore",
|
|
137
|
+
"Laomedeia",
|
|
138
|
+
"Leda",
|
|
139
|
+
"Orus",
|
|
140
|
+
"Pulcherrima",
|
|
141
|
+
"Puck",
|
|
142
|
+
"Rasalgethi",
|
|
143
|
+
"Sadachbia",
|
|
144
|
+
"Sadaltager",
|
|
145
|
+
"Schedar",
|
|
146
|
+
"Sulafat",
|
|
147
|
+
"Umbriel",
|
|
148
|
+
"Vindemiatrix",
|
|
149
|
+
"Zephyr",
|
|
150
|
+
"Zubenelgenubi"
|
|
151
|
+
].map((name) => ({
|
|
152
|
+
id: name,
|
|
153
|
+
name,
|
|
154
|
+
languages: [],
|
|
155
|
+
raw: { name }
|
|
156
|
+
})));
|
|
157
|
+
//#endregion
|
|
158
|
+
//#region src/descriptors/gemini.ts
|
|
159
|
+
/**
|
|
160
|
+
* `fal-ai/gemini-3.1-flash-tts` — uses the `prompt` key (not `text`).
|
|
161
|
+
* Single-speaker uses the `voice` enum (default Kore); multi-speaker maps the
|
|
162
|
+
* canonical `speakers` to fal `SpeakerConfig[]` (`{ speaker_id, voice }`) and
|
|
163
|
+
* omits `voice`. `style_instructions` / `language_code` / `temperature` ride
|
|
164
|
+
* `params`. No duration field → core ffprobe fallback.
|
|
165
|
+
*
|
|
166
|
+
* **Multi-speaker + chunking caveat:** fal expects every request to carry the
|
|
167
|
+
* full `speakers[]` AND the prompt to keep its `Alias:` line-prefixes. A naive
|
|
168
|
+
* length-chunker can split mid-turn, so long multi-speaker scripts are not yet
|
|
169
|
+
* supported — treat multi-speaker as single-chunk / short-form for now.
|
|
170
|
+
*/
|
|
171
|
+
const gemini31FlashTts = {
|
|
172
|
+
endpointId: "fal-ai/gemini-3.1-flash-tts",
|
|
173
|
+
caps: {
|
|
174
|
+
maxInlineBreakSeconds: null,
|
|
175
|
+
maxCharsPerRequest: 5e3
|
|
176
|
+
},
|
|
177
|
+
buildInput(input) {
|
|
178
|
+
const params = input.params ?? {};
|
|
179
|
+
const wire = {
|
|
180
|
+
prompt: input.text,
|
|
181
|
+
output_format: "mp3"
|
|
182
|
+
};
|
|
183
|
+
if (params.styleInstructions !== void 0) wire.style_instructions = params.styleInstructions;
|
|
184
|
+
if (params.languageCode !== void 0) wire.language_code = params.languageCode;
|
|
185
|
+
if (params.temperature !== void 0) wire.temperature = params.temperature;
|
|
186
|
+
if (input.voice?.kind === "multiSpeaker") wire.speakers = input.voice.speakers.map((s) => ({
|
|
187
|
+
speaker_id: s.speakerId,
|
|
188
|
+
voice: s.voiceId
|
|
189
|
+
}));
|
|
190
|
+
else wire.voice = input.voice?.kind === "preset" ? input.voice.id : "Kore";
|
|
191
|
+
return wire;
|
|
192
|
+
},
|
|
193
|
+
extractAudio: (data) => locateAudio(data, "audio/mpeg"),
|
|
194
|
+
voiceCatalog: geminiVoiceCatalog
|
|
195
|
+
};
|
|
196
|
+
//#endregion
|
|
197
|
+
//#region src/descriptors/minimax.ts
|
|
198
|
+
/**
|
|
199
|
+
* `fal-ai/minimax/speech-02-hd` — object-voice. The preset voice id nests into a
|
|
200
|
+
* `voice_setting` object alongside scalar knobs (vol/speed/pitch/emotion via
|
|
201
|
+
* `params.voiceSetting`). `output_format` is forced to `url` so the response is
|
|
202
|
+
* the schema-confirmed `audio.url` fetch shape shared by all four models (hex
|
|
203
|
+
* inline bytes are a possible later optimization). minimax is the only starter
|
|
204
|
+
* model that returns a duration (`duration_ms`).
|
|
205
|
+
*/
|
|
206
|
+
const minimaxSpeech02Hd = {
|
|
207
|
+
endpointId: "fal-ai/minimax/speech-02-hd",
|
|
208
|
+
caps: {
|
|
209
|
+
maxInlineBreakSeconds: null,
|
|
210
|
+
maxCharsPerRequest: 5e3
|
|
211
|
+
},
|
|
212
|
+
buildInput(input) {
|
|
213
|
+
const params = input.params ?? {};
|
|
214
|
+
const voiceSetting = params.voiceSetting ?? {};
|
|
215
|
+
const presetId = input.voice?.kind === "preset" ? input.voice.id : void 0;
|
|
216
|
+
const wire = {
|
|
217
|
+
text: input.text,
|
|
218
|
+
voice_setting: {
|
|
219
|
+
...voiceSetting,
|
|
220
|
+
voice_id: presetId ?? voiceSetting.voice_id ?? "Wise_Woman"
|
|
221
|
+
},
|
|
222
|
+
output_format: "url"
|
|
223
|
+
};
|
|
224
|
+
if (params.audioSetting !== void 0) wire.audio_setting = params.audioSetting;
|
|
225
|
+
if (params.languageBoost !== void 0) wire.language_boost = params.languageBoost;
|
|
226
|
+
return wire;
|
|
227
|
+
},
|
|
228
|
+
extractAudio: (data) => locateAudio(data, "audio/mpeg"),
|
|
229
|
+
extractDuration(data) {
|
|
230
|
+
const ms = data.duration_ms;
|
|
231
|
+
return typeof ms === "number" ? ms / 1e3 : void 0;
|
|
232
|
+
}
|
|
233
|
+
};
|
|
234
|
+
//#endregion
|
|
235
|
+
//#region src/descriptors/index.ts
|
|
236
|
+
/** Registry of every supported fal model, keyed by its `endpointId`. */
|
|
237
|
+
const FAL_DESCRIPTORS = {
|
|
238
|
+
"fal-ai/minimax/speech-02-hd": minimaxSpeech02Hd,
|
|
239
|
+
"fal-ai/gemini-3.1-flash-tts": gemini31FlashTts,
|
|
240
|
+
"fal-ai/chatterbox/text-to-speech": chatterboxTextToSpeech,
|
|
241
|
+
"fal-ai/elevenlabs/tts/turbo-v2.5": elevenlabsTurboV25
|
|
242
|
+
};
|
|
243
|
+
//#endregion
|
|
244
|
+
//#region src/falProvider.ts
|
|
245
|
+
/**
|
|
246
|
+
* Remove core's `<speak>…</speak>` wrapper, which the orchestrator adds to every
|
|
247
|
+
* chunk. fal engines take plain text/prompt, not SSML (and the descriptors
|
|
248
|
+
* declare `maxInlineBreakSeconds: null`, so the chunker emits no `<break>` tags).
|
|
249
|
+
*/
|
|
250
|
+
function stripSpeak(input) {
|
|
251
|
+
return input.replace(/^\s*<speak[^>]*>/i, "").replace(/<\/speak>\s*$/i, "").trim();
|
|
252
|
+
}
|
|
253
|
+
function isAbortError(error) {
|
|
254
|
+
return error instanceof Error && error.name === "AbortError";
|
|
255
|
+
}
|
|
256
|
+
var FalProvider = class {
|
|
257
|
+
constructor(ctx, options, client) {
|
|
258
|
+
this.ctx = ctx;
|
|
259
|
+
this.options = options;
|
|
260
|
+
this.client = client;
|
|
261
|
+
this.id = ctx.id;
|
|
262
|
+
this.descriptor = FAL_DESCRIPTORS[options.model];
|
|
263
|
+
this.caps = this.descriptor.caps;
|
|
264
|
+
this.voiceCatalog = this.descriptor.voiceCatalog;
|
|
265
|
+
}
|
|
266
|
+
async generate(chunk, options) {
|
|
267
|
+
const signal = options?.signal;
|
|
268
|
+
signal?.throwIfAborted();
|
|
269
|
+
const overrides = options?.overrides;
|
|
270
|
+
const mergedParams = this.options.params || overrides?.params ? {
|
|
271
|
+
...this.options.params,
|
|
272
|
+
...overrides?.params
|
|
273
|
+
} : void 0;
|
|
274
|
+
const canonical = {
|
|
275
|
+
text: stripSpeak(chunk),
|
|
276
|
+
voice: overrides?.voice ?? this.options.voice,
|
|
277
|
+
params: mergedParams
|
|
278
|
+
};
|
|
279
|
+
const input = this.descriptor.buildInput(canonical);
|
|
280
|
+
const logger = this.ctx.config.logger;
|
|
281
|
+
logger?.info?.("[fal] subscribe start", { model: this.options.model });
|
|
282
|
+
try {
|
|
283
|
+
const result = await this.client.subscribe(this.descriptor.endpointId, {
|
|
284
|
+
input,
|
|
285
|
+
...signal ? { abortSignal: signal } : {}
|
|
286
|
+
});
|
|
287
|
+
const { url, mimeType } = this.descriptor.extractAudio(result.data);
|
|
288
|
+
const audio = await this.fetchAudio(url, signal);
|
|
289
|
+
const duration = this.descriptor.extractDuration?.(result.data);
|
|
290
|
+
logger?.info?.("[fal] generation done", {
|
|
291
|
+
model: this.options.model,
|
|
292
|
+
bytes: audio.length,
|
|
293
|
+
requestId: result.requestId
|
|
294
|
+
});
|
|
295
|
+
return {
|
|
296
|
+
audio,
|
|
297
|
+
mimeType,
|
|
298
|
+
...duration !== void 0 ? { duration } : {},
|
|
299
|
+
size: audio.length,
|
|
300
|
+
providerMeta: { request_id: result.requestId }
|
|
301
|
+
};
|
|
302
|
+
} catch (error) {
|
|
303
|
+
if (isAbortError(error)) throw error;
|
|
304
|
+
if (signal?.aborted) throw new DOMException("Aborted", "AbortError");
|
|
305
|
+
if (error instanceof TtsError) throw error;
|
|
306
|
+
const mapped = new TtsError(`fal generation failed: ${error instanceof Error ? error.message : String(error)}`, { cause: error });
|
|
307
|
+
logger?.error?.("[fal] generation error", { message: mapped.message });
|
|
308
|
+
throw mapped;
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
/** Shared, abort-aware fetch turning a model's `audio.url` into bytes. */
|
|
312
|
+
async fetchAudio(url, signal) {
|
|
313
|
+
const res = await fetch(url, signal ? { signal } : {});
|
|
314
|
+
if (!res.ok) throw new TtsError(`fal audio fetch failed (${res.status}): ${url}`, { statusCode: res.status });
|
|
315
|
+
return Buffer.from(await res.arrayBuffer());
|
|
316
|
+
}
|
|
317
|
+
};
|
|
318
|
+
const falProviderFactory = {
|
|
319
|
+
id: "fal",
|
|
320
|
+
create(ctx, options) {
|
|
321
|
+
if (!options.apiKey) throw new Error("fal provider requires an apiKey");
|
|
322
|
+
if (!options.model || !FAL_DESCRIPTORS[options.model]) throw new Error(`fal provider: unknown model "${options.model}"`);
|
|
323
|
+
return new FalProvider(ctx, options, createFalClient({ credentials: options.apiKey }));
|
|
324
|
+
}
|
|
325
|
+
};
|
|
326
|
+
//#endregion
|
|
327
|
+
export { FAL_DESCRIPTORS, chatterboxTextToSpeech, elevenlabsTurboV25, falProviderFactory, flatBuildInput, gemini31FlashTts, geminiVoiceCatalog, minimaxSpeech02Hd, staticVoiceCatalog };
|
|
328
|
+
|
|
329
|
+
//# sourceMappingURL=index.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.mjs","names":[],"sources":["../src/descriptors/shared.ts","../src/descriptors/chatterbox.ts","../src/descriptors/flatBuildInput.ts","../src/descriptors/elevenlabs.ts","../src/voiceCatalog.ts","../src/descriptors/gemini.ts","../src/descriptors/minimax.ts","../src/descriptors/index.ts","../src/falProvider.ts"],"sourcesContent":["import { TtsError } from '@alien-lobster-buffet/tts-conductor-core';\nimport type { FalAudioLocation } from '../types';\n\n/**\n * Locate the audio URL (+ mime) in a fal TTS response. All four starter models\n * return `audio` as a fal `File` (`{ url, content_type? }`) when\n * `output_format` is url-based — the provider's shared fetch turns the URL into\n * bytes. Throws a {@link TtsError} if the response carries no usable `audio.url`\n * (already-classified, so the provider passes it through without re-wrapping).\n */\nexport function locateAudio(data: unknown, defaultMime: string): FalAudioLocation {\n const audio = (data as { audio?: { url?: unknown; content_type?: unknown } } | null | undefined)\n ?.audio;\n const url = typeof audio?.url === 'string' ? audio.url : undefined;\n if (!url) {\n throw new TtsError('[fal] response did not contain an audio.url');\n }\n const mimeType = typeof audio?.content_type === 'string' ? audio.content_type : defaultMime;\n return { url, mimeType };\n}\n","import type { FalModelDescriptor } from '../types';\nimport { locateAudio } from './shared';\n\n/**\n * `fal-ai/chatterbox/text-to-speech` — voice cloning, no voice id. A `clone`\n * selection supplies `audio_url` (a plain URL string); omitting it lets fal use\n * its demo voice. Generation knobs (`exaggeration` / `cfg` / `temperature` /\n * `seed`) ride `params`. Returns `.wav`; no duration field → core ffprobe\n * fallback. No `voiceCatalog` — there are no enumerable speakers.\n */\nconst CHATTERBOX_PARAM_KEYS = ['exaggeration', 'cfg', 'temperature', 'seed'] as const;\n\nexport const chatterboxTextToSpeech: FalModelDescriptor = {\n endpointId: 'fal-ai/chatterbox/text-to-speech',\n caps: { maxInlineBreakSeconds: null, maxCharsPerRequest: 5000 },\n buildInput(input) {\n const params = input.params ?? {};\n const wire: Record<string, unknown> = { text: input.text };\n if (input.voice?.kind === 'clone') wire.audio_url = input.voice.audioUrl;\n for (const key of CHATTERBOX_PARAM_KEYS) {\n if (params[key] !== undefined) wire[key] = params[key];\n }\n return wire;\n },\n extractAudio: (data) => locateAudio(data, 'audio/wav'),\n};\n","import type { CanonicalTtsInput } from '../types';\n\nexport interface FlatBuildInputOptions {\n /** Wire key the chunk text maps to (`text` for most, `prompt` for gemini). */\n textKey: 'text' | 'prompt';\n /** Wire key a `preset` voice id maps to. */\n voiceKey: string;\n /** Static input merged in first (e.g. an output_format or default voice). */\n defaults?: Record<string, unknown>;\n}\n\n/**\n * Produces a `buildInput` for flat models — text at `textKey`, a `preset` voice\n * id at `voiceKey`, scalar `params` merged on top. Keeps one-line ergonomics for\n * structurally-simple models without a separate declarative code path; models\n * with object/multi-speaker/clone voice encoding hand-write `buildInput` instead.\n */\nexport function flatBuildInput(opts: FlatBuildInputOptions) {\n return (input: CanonicalTtsInput): Record<string, unknown> => ({\n ...opts.defaults,\n [opts.textKey]: input.text,\n ...(input.voice?.kind === 'preset' ? { [opts.voiceKey]: input.voice.id } : {}),\n ...(input.params ?? {}),\n });\n}\n","import type { FalModelDescriptor } from '../types';\nimport { flatBuildInput } from './flatBuildInput';\nimport { locateAudio } from './shared';\n\n/**\n * `fal-ai/elevenlabs/tts/turbo-v2.5` — the flat model: `text` key, `voice` id\n * (default Rachel), and 11labs scalar knobs (`stability` / `similarity_boost` /\n * `style` / `speed` / `previous_text` / `next_text` / `language_code` /\n * `apply_text_normalization`) forwarded via `params`. `voice` is a free string\n * with documented examples, not a schema enum, so no `voiceCatalog`. Returns\n * `.mp3`; no duration field → core ffprobe fallback.\n */\nexport const elevenlabsTurboV25: FalModelDescriptor = {\n endpointId: 'fal-ai/elevenlabs/tts/turbo-v2.5',\n // No schema char limit; 2000 is a deliberate narration default (a bit above\n // the direct 11labs adapter's 1200) balancing progress granularity against\n // request count. Soft — raise per job via BuildAudioOptions.maxCharsPerRequest.\n caps: { maxInlineBreakSeconds: null, maxCharsPerRequest: 2000 },\n buildInput: flatBuildInput({ textKey: 'text', voiceKey: 'voice', defaults: { voice: 'Rachel' } }),\n extractAudio: (data) => locateAudio(data, 'audio/mpeg'),\n};\n","import type {\n VoiceCatalog,\n VoiceCatalogEntry,\n VoiceCatalogQuery,\n} from '@alien-lobster-buffet/tts-conductor-core';\n\n/**\n * Build a `VoiceCatalog` from a fixed list of entries, applying the\n * cross-provider client-side filter baseline (case-insensitive substring search;\n * language prefix-match; gender equality; custom-only). Used for fal models\n * whose voices are a schema-enumerable set.\n */\nexport function staticVoiceCatalog<TRaw = unknown>(\n entries: ReadonlyArray<VoiceCatalogEntry<TRaw>>,\n): VoiceCatalog<TRaw> {\n return {\n async listVoices(query?: VoiceCatalogQuery): Promise<VoiceCatalogEntry<TRaw>[]> {\n let result = [...entries];\n if (query?.search) {\n const q = query.search.toLowerCase();\n result = result.filter((v) =>\n [v.name, v.description, ...Object.values(v.labels ?? {})]\n .filter((s): s is string => typeof s === 'string')\n .some((s) => s.toLowerCase().includes(q)),\n );\n }\n if (query?.language) {\n const lang = query.language.toLowerCase();\n result = result.filter((v) => v.languages.some((l) => l.toLowerCase().startsWith(lang)));\n }\n if (query?.gender) {\n const g = query.gender.toLowerCase();\n result = result.filter((v) => v.gender?.toLowerCase() === g);\n }\n if (query?.customOnly) {\n result = result.filter((v) => v.custom === true);\n }\n return result;\n },\n };\n}\n\n/** Raw shape for a gemini voice entry — just the preset name. */\nexport interface GeminiRawVoice {\n name: string;\n}\n\n/**\n * The 30 gemini-3.1-flash-tts preset voices, straight from the model's OpenAPI\n * `voice` enum (authoritative). gemini is multilingual via `language_code`\n * rather than per-voice, so `languages` is left empty.\n */\nconst GEMINI_VOICE_NAMES = [\n 'Achernar',\n 'Achird',\n 'Algenib',\n 'Algieba',\n 'Alnilam',\n 'Aoede',\n 'Autonoe',\n 'Callirrhoe',\n 'Charon',\n 'Despina',\n 'Enceladus',\n 'Erinome',\n 'Fenrir',\n 'Gacrux',\n 'Iapetus',\n 'Kore',\n 'Laomedeia',\n 'Leda',\n 'Orus',\n 'Pulcherrima',\n 'Puck',\n 'Rasalgethi',\n 'Sadachbia',\n 'Sadaltager',\n 'Schedar',\n 'Sulafat',\n 'Umbriel',\n 'Vindemiatrix',\n 'Zephyr',\n 'Zubenelgenubi',\n] as const;\n\nexport const geminiVoiceCatalog: VoiceCatalog<GeminiRawVoice> = staticVoiceCatalog(\n GEMINI_VOICE_NAMES.map((name) => ({ id: name, name, languages: [], raw: { name } })),\n);\n","import type { FalModelDescriptor } from '../types';\nimport { geminiVoiceCatalog } from '../voiceCatalog';\nimport { locateAudio } from './shared';\n\n/**\n * `fal-ai/gemini-3.1-flash-tts` — uses the `prompt` key (not `text`).\n * Single-speaker uses the `voice` enum (default Kore); multi-speaker maps the\n * canonical `speakers` to fal `SpeakerConfig[]` (`{ speaker_id, voice }`) and\n * omits `voice`. `style_instructions` / `language_code` / `temperature` ride\n * `params`. No duration field → core ffprobe fallback.\n *\n * **Multi-speaker + chunking caveat:** fal expects every request to carry the\n * full `speakers[]` AND the prompt to keep its `Alias:` line-prefixes. A naive\n * length-chunker can split mid-turn, so long multi-speaker scripts are not yet\n * supported — treat multi-speaker as single-chunk / short-form for now.\n */\nexport const gemini31FlashTts: FalModelDescriptor = {\n endpointId: 'fal-ai/gemini-3.1-flash-tts',\n // Schema allows up to 50000. Default 5000 favors progress granularity and a\n // small per-failure blast radius for narration. NB: smaller chunks mean more\n // fal.subscribe calls → more request_ids to reconcile + more stitch seams\n // (fal cost is ~per-request, not per-char), so long-form consumers should\n // raise this per job via BuildAudioOptions.maxCharsPerRequest (it's a soft\n // default, not a ceiling).\n caps: { maxInlineBreakSeconds: null, maxCharsPerRequest: 5000 },\n buildInput(input) {\n const params = input.params ?? {};\n const wire: Record<string, unknown> = {\n prompt: input.text,\n output_format: 'mp3',\n };\n if (params.styleInstructions !== undefined) wire.style_instructions = params.styleInstructions;\n if (params.languageCode !== undefined) wire.language_code = params.languageCode;\n if (params.temperature !== undefined) wire.temperature = params.temperature;\n\n if (input.voice?.kind === 'multiSpeaker') {\n wire.speakers = input.voice.speakers.map((s) => ({\n speaker_id: s.speakerId,\n voice: s.voiceId,\n }));\n } else {\n wire.voice = input.voice?.kind === 'preset' ? input.voice.id : 'Kore';\n }\n return wire;\n },\n extractAudio: (data) => locateAudio(data, 'audio/mpeg'),\n voiceCatalog: geminiVoiceCatalog,\n};\n","import type { FalModelDescriptor } from '../types';\nimport { locateAudio } from './shared';\n\n/**\n * `fal-ai/minimax/speech-02-hd` — object-voice. The preset voice id nests into a\n * `voice_setting` object alongside scalar knobs (vol/speed/pitch/emotion via\n * `params.voiceSetting`). `output_format` is forced to `url` so the response is\n * the schema-confirmed `audio.url` fetch shape shared by all four models (hex\n * inline bytes are a possible later optimization). minimax is the only starter\n * model that returns a duration (`duration_ms`).\n */\nexport const minimaxSpeech02Hd: FalModelDescriptor = {\n endpointId: 'fal-ai/minimax/speech-02-hd',\n // Schema hard limit is 5000 chars; used directly as the chunk budget.\n caps: { maxInlineBreakSeconds: null, maxCharsPerRequest: 5000 },\n buildInput(input) {\n const params = input.params ?? {};\n const voiceSetting = (params.voiceSetting as Record<string, unknown> | undefined) ?? {};\n const presetId = input.voice?.kind === 'preset' ? input.voice.id : undefined;\n const wire: Record<string, unknown> = {\n text: input.text,\n voice_setting: {\n ...voiceSetting,\n // The explicit canonical voice wins over a voice_id smuggled into\n // params.voiceSetting; fall back to that, then the model default.\n voice_id: presetId ?? voiceSetting.voice_id ?? 'Wise_Woman',\n },\n output_format: 'url',\n };\n if (params.audioSetting !== undefined) wire.audio_setting = params.audioSetting;\n if (params.languageBoost !== undefined) wire.language_boost = params.languageBoost;\n return wire;\n },\n extractAudio: (data) => locateAudio(data, 'audio/mpeg'),\n extractDuration(data) {\n const ms = (data as { duration_ms?: unknown }).duration_ms;\n return typeof ms === 'number' ? ms / 1000 : undefined;\n },\n // minimax voice_id is a free string with documented examples (not a schema\n // enum), so there's no authoritative list to enumerate — voiceCatalog omitted.\n};\n","import type { FalModelDescriptor, FalModelId } from '../types';\nimport { chatterboxTextToSpeech } from './chatterbox';\nimport { elevenlabsTurboV25 } from './elevenlabs';\nimport { gemini31FlashTts } from './gemini';\nimport { minimaxSpeech02Hd } from './minimax';\n\nexport { chatterboxTextToSpeech } from './chatterbox';\nexport { elevenlabsTurboV25 } from './elevenlabs';\nexport { type FlatBuildInputOptions, flatBuildInput } from './flatBuildInput';\nexport { gemini31FlashTts } from './gemini';\nexport { minimaxSpeech02Hd } from './minimax';\n\n/** Registry of every supported fal model, keyed by its `endpointId`. */\nexport const FAL_DESCRIPTORS: Record<FalModelId, FalModelDescriptor> = {\n 'fal-ai/minimax/speech-02-hd': minimaxSpeech02Hd,\n 'fal-ai/gemini-3.1-flash-tts': gemini31FlashTts,\n 'fal-ai/chatterbox/text-to-speech': chatterboxTextToSpeech,\n 'fal-ai/elevenlabs/tts/turbo-v2.5': elevenlabsTurboV25,\n};\n","import type {\n GenerateCallOptions,\n GenerationResult,\n ProviderCapabilities,\n TtsProvider,\n TtsProviderContext,\n TtsProviderFactory,\n VoiceCatalog,\n} from '@alien-lobster-buffet/tts-conductor-core';\nimport { TtsError } from '@alien-lobster-buffet/tts-conductor-core';\nimport { createFalClient, type FalClient } from '@fal-ai/client';\nimport { FAL_DESCRIPTORS } from './descriptors';\nimport type { CanonicalTtsInput, FalModelDescriptor, FalModelId, FalVoiceSelection } from './types';\n\n/**\n * Construction options for the single `fal` provider id. `model` selects the\n * engine (the marketplace lives here, at construction); the returned provider\n * instance is bound to that one model. `voice` / `params` are per-instance\n * defaults that per-call overrides can replace.\n */\nexport interface FalProviderOptions {\n apiKey: string;\n model: FalModelId;\n voice?: FalVoiceSelection;\n params?: Record<string, unknown>;\n}\n\n/** Per-call overrides — vary the voice or scalar params for a single `generate`. */\nexport interface FalCallOverrides {\n voice?: FalVoiceSelection;\n params?: Record<string, unknown>;\n}\n\ndeclare module '@alien-lobster-buffet/tts-conductor-core' {\n interface TtsProviderRegistry {\n fal: FalProviderOptions;\n }\n interface TtsProviderCallOverridesRegistry {\n fal: FalCallOverrides;\n }\n}\n\n/**\n * Remove core's `<speak>…</speak>` wrapper, which the orchestrator adds to every\n * chunk. fal engines take plain text/prompt, not SSML (and the descriptors\n * declare `maxInlineBreakSeconds: null`, so the chunker emits no `<break>` tags).\n */\nfunction stripSpeak(input: string): string {\n return input\n .replace(/^\\s*<speak[^>]*>/i, '')\n .replace(/<\\/speak>\\s*$/i, '')\n .trim();\n}\n\nfunction isAbortError(error: unknown): boolean {\n return error instanceof Error && error.name === 'AbortError';\n}\n\nclass FalProvider implements TtsProvider<FalCallOverrides> {\n readonly id: string;\n readonly caps: ProviderCapabilities;\n readonly voiceCatalog?: VoiceCatalog;\n private readonly descriptor: FalModelDescriptor;\n\n constructor(\n private readonly ctx: TtsProviderContext,\n private readonly options: FalProviderOptions,\n // Instance-local fal client — credentials are scoped to this provider, not\n // a global singleton, so multiple providers with different keys don't race.\n private readonly client: FalClient,\n ) {\n this.id = ctx.id;\n this.descriptor = FAL_DESCRIPTORS[options.model];\n this.caps = this.descriptor.caps;\n this.voiceCatalog = this.descriptor.voiceCatalog;\n }\n\n async generate(\n chunk: string,\n options?: GenerateCallOptions<FalCallOverrides>,\n ): Promise<GenerationResult> {\n const signal = options?.signal;\n signal?.throwIfAborted();\n\n const overrides = options?.overrides;\n const mergedParams =\n this.options.params || overrides?.params\n ? { ...this.options.params, ...overrides?.params }\n : undefined;\n const canonical: CanonicalTtsInput = {\n text: stripSpeak(chunk),\n voice: overrides?.voice ?? this.options.voice,\n params: mergedParams,\n };\n\n const input = this.descriptor.buildInput(canonical);\n const logger = this.ctx.config.logger;\n logger?.info?.('[fal] subscribe start', { model: this.options.model });\n\n try {\n const result = await this.client.subscribe(this.descriptor.endpointId, {\n input,\n ...(signal ? { abortSignal: signal } : {}),\n });\n\n const { url, mimeType } = this.descriptor.extractAudio(result.data);\n const audio = await this.fetchAudio(url, signal);\n const duration = this.descriptor.extractDuration?.(result.data);\n\n logger?.info?.('[fal] generation done', {\n model: this.options.model,\n bytes: audio.length,\n requestId: result.requestId,\n });\n\n return {\n audio,\n mimeType,\n ...(duration !== undefined ? { duration } : {}),\n size: audio.length,\n // Opaque per-chunk metadata for core; consumers reconcile fal cost from\n // the aggregated request_ids on the final result.\n providerMeta: { request_id: result.requestId },\n };\n } catch (error) {\n // Aborts are not failures — propagate so consumers can distinguish\n // cancellation from a real error (BullMQ workers, retry logic, etc.).\n if (isAbortError(error)) throw error;\n // If the call was cancelled, surface a clean AbortError rather than\n // re-throwing whatever the SDK happened to raise after the abort — that\n // would otherwise mislabel a real API error as a cancellation.\n if (signal?.aborted) throw new DOMException('Aborted', 'AbortError');\n // A TtsError raised below (e.g. a failed fetch) is already classified.\n if (error instanceof TtsError) throw error;\n const message = error instanceof Error ? error.message : String(error);\n const mapped = new TtsError(`fal generation failed: ${message}`, { cause: error });\n logger?.error?.('[fal] generation error', { message: mapped.message });\n throw mapped;\n }\n }\n\n /** Shared, abort-aware fetch turning a model's `audio.url` into bytes. */\n private async fetchAudio(url: string, signal?: AbortSignal): Promise<Buffer> {\n const res = await fetch(url, signal ? { signal } : {});\n if (!res.ok) {\n throw new TtsError(`fal audio fetch failed (${res.status}): ${url}`, {\n statusCode: res.status,\n });\n }\n return Buffer.from(await res.arrayBuffer());\n }\n}\n\nexport const falProviderFactory: TtsProviderFactory<'fal', FalCallOverrides> = {\n id: 'fal',\n create(ctx: TtsProviderContext, options: FalProviderOptions) {\n if (!options.apiKey) {\n throw new Error('fal provider requires an apiKey');\n }\n if (!options.model || !FAL_DESCRIPTORS[options.model]) {\n throw new Error(`fal provider: unknown model \"${options.model}\"`);\n }\n // Instance-local client — credentials scoped here, no global mutation.\n const client = createFalClient({ credentials: options.apiKey });\n return new FalProvider(ctx, options, client);\n },\n};\n"],"mappings":";;;;;;;;;;AAUA,SAAgB,YAAY,MAAe,aAAuC;CAChF,MAAM,QAAS,MACX;CACJ,MAAM,MAAM,OAAO,OAAO,QAAQ,WAAW,MAAM,MAAM,KAAA;CACzD,IAAI,CAAC,KACH,MAAM,IAAI,SAAS,6CAA6C;CAGlE,OAAO;EAAE;EAAK,UADG,OAAO,OAAO,iBAAiB,WAAW,MAAM,eAAe;CACzD;AACzB;;;;;;;;;;ACTA,MAAM,wBAAwB;CAAC;CAAgB;CAAO;CAAe;AAAM;AAE3E,MAAa,yBAA6C;CACxD,YAAY;CACZ,MAAM;EAAE,uBAAuB;EAAM,oBAAoB;CAAK;CAC9D,WAAW,OAAO;EAChB,MAAM,SAAS,MAAM,UAAU,CAAC;EAChC,MAAM,OAAgC,EAAE,MAAM,MAAM,KAAK;EACzD,IAAI,MAAM,OAAO,SAAS,SAAS,KAAK,YAAY,MAAM,MAAM;EAChE,KAAK,MAAM,OAAO,uBAChB,IAAI,OAAO,SAAS,KAAA,GAAW,KAAK,OAAO,OAAO;EAEpD,OAAO;CACT;CACA,eAAe,SAAS,YAAY,MAAM,WAAW;AACvD;;;;;;;;;ACRA,SAAgB,eAAe,MAA6B;CAC1D,QAAQ,WAAuD;EAC7D,GAAG,KAAK;GACP,KAAK,UAAU,MAAM;EACtB,GAAI,MAAM,OAAO,SAAS,WAAW,GAAG,KAAK,WAAW,MAAM,MAAM,GAAG,IAAI,CAAC;EAC5E,GAAI,MAAM,UAAU,CAAC;CACvB;AACF;;;;;;;;;;;ACZA,MAAa,qBAAyC;CACpD,YAAY;CAIZ,MAAM;EAAE,uBAAuB;EAAM,oBAAoB;CAAK;CAC9D,YAAY,eAAe;EAAE,SAAS;EAAQ,UAAU;EAAS,UAAU,EAAE,OAAO,SAAS;CAAE,CAAC;CAChG,eAAe,SAAS,YAAY,MAAM,YAAY;AACxD;;;;;;;;;ACRA,SAAgB,mBACd,SACoB;CACpB,OAAO,EACL,MAAM,WAAW,OAA+D;EAC9E,IAAI,SAAS,CAAC,GAAG,OAAO;EACxB,IAAI,OAAO,QAAQ;GACjB,MAAM,IAAI,MAAM,OAAO,YAAY;GACnC,SAAS,OAAO,QAAQ,MACtB;IAAC,EAAE;IAAM,EAAE;IAAa,GAAG,OAAO,OAAO,EAAE,UAAU,CAAC,CAAC;GAAC,EACrD,QAAQ,MAAmB,OAAO,MAAM,QAAQ,EAChD,MAAM,MAAM,EAAE,YAAY,EAAE,SAAS,CAAC,CAAC,CAC5C;EACF;EACA,IAAI,OAAO,UAAU;GACnB,MAAM,OAAO,MAAM,SAAS,YAAY;GACxC,SAAS,OAAO,QAAQ,MAAM,EAAE,UAAU,MAAM,MAAM,EAAE,YAAY,EAAE,WAAW,IAAI,CAAC,CAAC;EACzF;EACA,IAAI,OAAO,QAAQ;GACjB,MAAM,IAAI,MAAM,OAAO,YAAY;GACnC,SAAS,OAAO,QAAQ,MAAM,EAAE,QAAQ,YAAY,MAAM,CAAC;EAC7D;EACA,IAAI,OAAO,YACT,SAAS,OAAO,QAAQ,MAAM,EAAE,WAAW,IAAI;EAEjD,OAAO;CACT,EACF;AACF;AA6CA,MAAa,qBAAmD,mBAC9D;CAjCA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;AAIA,EAAmB,KAAK,UAAU;CAAE,IAAI;CAAM;CAAM,WAAW,CAAC;CAAG,KAAK,EAAE,KAAK;AAAE,EAAE,CACrF;;;;;;;;;;;;;;;ACvEA,MAAa,mBAAuC;CAClD,YAAY;CAOZ,MAAM;EAAE,uBAAuB;EAAM,oBAAoB;CAAK;CAC9D,WAAW,OAAO;EAChB,MAAM,SAAS,MAAM,UAAU,CAAC;EAChC,MAAM,OAAgC;GACpC,QAAQ,MAAM;GACd,eAAe;EACjB;EACA,IAAI,OAAO,sBAAsB,KAAA,GAAW,KAAK,qBAAqB,OAAO;EAC7E,IAAI,OAAO,iBAAiB,KAAA,GAAW,KAAK,gBAAgB,OAAO;EACnE,IAAI,OAAO,gBAAgB,KAAA,GAAW,KAAK,cAAc,OAAO;EAEhE,IAAI,MAAM,OAAO,SAAS,gBACxB,KAAK,WAAW,MAAM,MAAM,SAAS,KAAK,OAAO;GAC/C,YAAY,EAAE;GACd,OAAO,EAAE;EACX,EAAE;OAEF,KAAK,QAAQ,MAAM,OAAO,SAAS,WAAW,MAAM,MAAM,KAAK;EAEjE,OAAO;CACT;CACA,eAAe,SAAS,YAAY,MAAM,YAAY;CACtD,cAAc;AAChB;;;;;;;;;;;ACpCA,MAAa,oBAAwC;CACnD,YAAY;CAEZ,MAAM;EAAE,uBAAuB;EAAM,oBAAoB;CAAK;CAC9D,WAAW,OAAO;EAChB,MAAM,SAAS,MAAM,UAAU,CAAC;EAChC,MAAM,eAAgB,OAAO,gBAAwD,CAAC;EACtF,MAAM,WAAW,MAAM,OAAO,SAAS,WAAW,MAAM,MAAM,KAAK,KAAA;EACnE,MAAM,OAAgC;GACpC,MAAM,MAAM;GACZ,eAAe;IACb,GAAG;IAGH,UAAU,YAAY,aAAa,YAAY;GACjD;GACA,eAAe;EACjB;EACA,IAAI,OAAO,iBAAiB,KAAA,GAAW,KAAK,gBAAgB,OAAO;EACnE,IAAI,OAAO,kBAAkB,KAAA,GAAW,KAAK,iBAAiB,OAAO;EACrE,OAAO;CACT;CACA,eAAe,SAAS,YAAY,MAAM,YAAY;CACtD,gBAAgB,MAAM;EACpB,MAAM,KAAM,KAAmC;EAC/C,OAAO,OAAO,OAAO,WAAW,KAAK,MAAO,KAAA;CAC9C;AAGF;;;;AC3BA,MAAa,kBAA0D;CACrE,+BAA+B;CAC/B,+BAA+B;CAC/B,oCAAoC;CACpC,oCAAoC;AACtC;;;;;;;;AC6BA,SAAS,WAAW,OAAuB;CACzC,OAAO,MACJ,QAAQ,qBAAqB,EAAE,EAC/B,QAAQ,kBAAkB,EAAE,EAC5B,KAAK;AACV;AAEA,SAAS,aAAa,OAAyB;CAC7C,OAAO,iBAAiB,SAAS,MAAM,SAAS;AAClD;AAEA,IAAM,cAAN,MAA2D;CAMzD,YACE,KACA,SAGA,QACA;EALiB,KAAA,MAAA;EACA,KAAA,UAAA;EAGA,KAAA,SAAA;EAEjB,KAAK,KAAK,IAAI;EACd,KAAK,aAAa,gBAAgB,QAAQ;EAC1C,KAAK,OAAO,KAAK,WAAW;EAC5B,KAAK,eAAe,KAAK,WAAW;CACtC;CAEA,MAAM,SACJ,OACA,SAC2B;EAC3B,MAAM,SAAS,SAAS;EACxB,QAAQ,eAAe;EAEvB,MAAM,YAAY,SAAS;EAC3B,MAAM,eACJ,KAAK,QAAQ,UAAU,WAAW,SAC9B;GAAE,GAAG,KAAK,QAAQ;GAAQ,GAAG,WAAW;EAAO,IAC/C,KAAA;EACN,MAAM,YAA+B;GACnC,MAAM,WAAW,KAAK;GACtB,OAAO,WAAW,SAAS,KAAK,QAAQ;GACxC,QAAQ;EACV;EAEA,MAAM,QAAQ,KAAK,WAAW,WAAW,SAAS;EAClD,MAAM,SAAS,KAAK,IAAI,OAAO;EAC/B,QAAQ,OAAO,yBAAyB,EAAE,OAAO,KAAK,QAAQ,MAAM,CAAC;EAErE,IAAI;GACF,MAAM,SAAS,MAAM,KAAK,OAAO,UAAU,KAAK,WAAW,YAAY;IACrE;IACA,GAAI,SAAS,EAAE,aAAa,OAAO,IAAI,CAAC;GAC1C,CAAC;GAED,MAAM,EAAE,KAAK,aAAa,KAAK,WAAW,aAAa,OAAO,IAAI;GAClE,MAAM,QAAQ,MAAM,KAAK,WAAW,KAAK,MAAM;GAC/C,MAAM,WAAW,KAAK,WAAW,kBAAkB,OAAO,IAAI;GAE9D,QAAQ,OAAO,yBAAyB;IACtC,OAAO,KAAK,QAAQ;IACpB,OAAO,MAAM;IACb,WAAW,OAAO;GACpB,CAAC;GAED,OAAO;IACL;IACA;IACA,GAAI,aAAa,KAAA,IAAY,EAAE,SAAS,IAAI,CAAC;IAC7C,MAAM,MAAM;IAGZ,cAAc,EAAE,YAAY,OAAO,UAAU;GAC/C;EACF,SAAS,OAAO;GAGd,IAAI,aAAa,KAAK,GAAG,MAAM;GAI/B,IAAI,QAAQ,SAAS,MAAM,IAAI,aAAa,WAAW,YAAY;GAEnE,IAAI,iBAAiB,UAAU,MAAM;GAErC,MAAM,SAAS,IAAI,SAAS,0BADZ,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,KACJ,EAAE,OAAO,MAAM,CAAC;GACjF,QAAQ,QAAQ,0BAA0B,EAAE,SAAS,OAAO,QAAQ,CAAC;GACrE,MAAM;EACR;CACF;;CAGA,MAAc,WAAW,KAAa,QAAuC;EAC3E,MAAM,MAAM,MAAM,MAAM,KAAK,SAAS,EAAE,OAAO,IAAI,CAAC,CAAC;EACrD,IAAI,CAAC,IAAI,IACP,MAAM,IAAI,SAAS,2BAA2B,IAAI,OAAO,KAAK,OAAO,EACnE,YAAY,IAAI,OAClB,CAAC;EAEH,OAAO,OAAO,KAAK,MAAM,IAAI,YAAY,CAAC;CAC5C;AACF;AAEA,MAAa,qBAAkE;CAC7E,IAAI;CACJ,OAAO,KAAyB,SAA6B;EAC3D,IAAI,CAAC,QAAQ,QACX,MAAM,IAAI,MAAM,iCAAiC;EAEnD,IAAI,CAAC,QAAQ,SAAS,CAAC,gBAAgB,QAAQ,QAC7C,MAAM,IAAI,MAAM,gCAAgC,QAAQ,MAAM,EAAE;EAIlE,OAAO,IAAI,YAAY,KAAK,SADb,gBAAgB,EAAE,aAAa,QAAQ,OAAO,CACnB,CAAC;CAC7C;AACF"}
|
package/package.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@alien-lobster-buffet/tts-conductor-fal",
|
|
3
|
+
"version": "0.2.0-alpha.0",
|
|
4
|
+
"description": "fal.ai provider bindings for the TTS Conductor ecosystem",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"tts",
|
|
7
|
+
"fal",
|
|
8
|
+
"fal-ai",
|
|
9
|
+
"provider"
|
|
10
|
+
],
|
|
11
|
+
"license": "MIT",
|
|
12
|
+
"author": "Cole Reed <alienlobsterbuffet.dev@gmail.com>",
|
|
13
|
+
"homepage": "https://github.com/ichabodcole/tts-conductor#readme",
|
|
14
|
+
"bugs": {
|
|
15
|
+
"url": "https://github.com/ichabodcole/tts-conductor/issues"
|
|
16
|
+
},
|
|
17
|
+
"repository": {
|
|
18
|
+
"type": "git",
|
|
19
|
+
"url": "git+https://github.com/ichabodcole/tts-conductor.git",
|
|
20
|
+
"directory": "packages/tts-provider-fal"
|
|
21
|
+
},
|
|
22
|
+
"type": "module",
|
|
23
|
+
"exports": {
|
|
24
|
+
".": {
|
|
25
|
+
"import": "./dist/index.mjs",
|
|
26
|
+
"types": "./dist/index.d.mts"
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
"main": "./dist/index.mjs",
|
|
30
|
+
"types": "./dist/index.d.mts",
|
|
31
|
+
"files": [
|
|
32
|
+
"dist",
|
|
33
|
+
"LICENSE",
|
|
34
|
+
"README.md"
|
|
35
|
+
],
|
|
36
|
+
"engines": {
|
|
37
|
+
"node": ">=18"
|
|
38
|
+
},
|
|
39
|
+
"publishConfig": {
|
|
40
|
+
"access": "public"
|
|
41
|
+
},
|
|
42
|
+
"scripts": {
|
|
43
|
+
"build": "tsdown",
|
|
44
|
+
"clean": "tsdown --clean",
|
|
45
|
+
"dev": "tsdown --watch",
|
|
46
|
+
"prepublishOnly": "tsdown",
|
|
47
|
+
"test": "vitest --run --config ../../vitest.config.ts",
|
|
48
|
+
"typecheck": "tsc --noEmit"
|
|
49
|
+
},
|
|
50
|
+
"peerDependencies": {
|
|
51
|
+
"@alien-lobster-buffet/tts-conductor-core": "^0.2.0-alpha.1"
|
|
52
|
+
},
|
|
53
|
+
"dependencies": {
|
|
54
|
+
"@fal-ai/client": "^1.10.1"
|
|
55
|
+
},
|
|
56
|
+
"devDependencies": {
|
|
57
|
+
"@alien-lobster-buffet/tts-conductor-core": "^0.2.0-alpha.1"
|
|
58
|
+
}
|
|
59
|
+
}
|