react-native-tts-kit 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/ATTRIBUTIONS.md +87 -0
  2. package/LICENSE +21 -0
  3. package/README.md +231 -0
  4. package/android/build.gradle +50 -0
  5. package/android/src/main/AndroidManifest.xml +3 -0
  6. package/android/src/main/java/expo/modules/ttskit/RNTTSKitModule.kt +158 -0
  7. package/android/src/main/java/expo/modules/ttskit/supertonic/AudioEngine.kt +158 -0
  8. package/android/src/main/java/expo/modules/ttskit/supertonic/ModelLocator.kt +372 -0
  9. package/android/src/main/java/expo/modules/ttskit/supertonic/SupertonicSession.kt +373 -0
  10. package/android/src/main/java/expo/modules/ttskit/supertonic/TextFrontend.kt +154 -0
  11. package/android/src/main/java/expo/modules/ttskit/supertonic/VoicePack.kt +47 -0
  12. package/build/engines/BufferedStreamEmitter.d.ts +26 -0
  13. package/build/engines/BufferedStreamEmitter.d.ts.map +1 -0
  14. package/build/engines/BufferedStreamEmitter.js +68 -0
  15. package/build/engines/BufferedStreamEmitter.js.map +1 -0
  16. package/build/engines/Engine.d.ts +15 -0
  17. package/build/engines/Engine.d.ts.map +1 -0
  18. package/build/engines/Engine.js +2 -0
  19. package/build/engines/Engine.js.map +1 -0
  20. package/build/engines/SupertonicEngine.d.ts +14 -0
  21. package/build/engines/SupertonicEngine.d.ts.map +1 -0
  22. package/build/engines/SupertonicEngine.js +183 -0
  23. package/build/engines/SupertonicEngine.js.map +1 -0
  24. package/build/engines/SystemEngine.d.ts +13 -0
  25. package/build/engines/SystemEngine.d.ts.map +1 -0
  26. package/build/engines/SystemEngine.js +78 -0
  27. package/build/engines/SystemEngine.js.map +1 -0
  28. package/build/index.d.ts +46 -0
  29. package/build/index.d.ts.map +1 -0
  30. package/build/index.js +118 -0
  31. package/build/index.js.map +1 -0
  32. package/build/types.d.ts +77 -0
  33. package/build/types.d.ts.map +1 -0
  34. package/build/types.js +2 -0
  35. package/build/types.js.map +1 -0
  36. package/build/voices/catalog.d.ts +12 -0
  37. package/build/voices/catalog.d.ts.map +1 -0
  38. package/build/voices/catalog.js +28 -0
  39. package/build/voices/catalog.js.map +1 -0
  40. package/build/voices/prosody.d.ts +8 -0
  41. package/build/voices/prosody.d.ts.map +1 -0
  42. package/build/voices/prosody.js +28 -0
  43. package/build/voices/prosody.js.map +1 -0
  44. package/expo-module.config.json +9 -0
  45. package/ios/RNTTSKit.podspec +28 -0
  46. package/ios/RNTTSKitModule.swift +133 -0
  47. package/ios/Supertonic/AudioEngine.swift +110 -0
  48. package/ios/Supertonic/ModelLocator.swift +416 -0
  49. package/ios/Supertonic/SupertonicSession.swift +405 -0
  50. package/ios/Supertonic/TextFrontend.swift +216 -0
  51. package/ios/Supertonic/VoicePack.swift +51 -0
  52. package/licenses/OpenRAIL-M.txt +209 -0
  53. package/package.json +77 -0
  54. package/src/engines/BufferedStreamEmitter.ts +50 -0
  55. package/src/engines/Engine.ts +28 -0
  56. package/src/engines/SupertonicEngine.ts +250 -0
  57. package/src/engines/SystemEngine.ts +96 -0
  58. package/src/engines/__tests__/BufferedStreamEmitter.test.ts +65 -0
  59. package/src/index.ts +156 -0
  60. package/src/types.ts +95 -0
  61. package/src/voices/__tests__/catalog.test.ts +46 -0
  62. package/src/voices/__tests__/prosody.test.ts +63 -0
  63. package/src/voices/catalog.ts +32 -0
  64. package/src/voices/prosody.ts +39 -0
@@ -0,0 +1,96 @@
1
+ import type {
2
+ EngineCapabilities,
3
+ EngineId,
4
+ PrefetchProgress,
5
+ SpeakOptions,
6
+ StreamHandle,
7
+ Voice,
8
+ } from '../types';
9
+ import type { Engine } from './Engine';
10
+
11
+ type ExpoSpeechModule = {
12
+ speak(text: string, options?: any): void;
13
+ stop(): Promise<void>;
14
+ getAvailableVoicesAsync(): Promise<Array<{ identifier: string; name: string; language: string }>>;
15
+ };
16
+
17
+ let cached: ExpoSpeechModule | null = null;
18
+ function loadExpoSpeech(): ExpoSpeechModule | null {
19
+ if (cached) return cached;
20
+ try {
21
+ cached = require('expo-speech') as ExpoSpeechModule;
22
+ return cached;
23
+ } catch {
24
+ return null;
25
+ }
26
+ }
27
+
28
+ export class SystemEngine implements Engine {
29
+ readonly id: EngineId = 'system';
30
+ readonly capabilities: EngineCapabilities = {
31
+ streaming: false,
32
+ cloning: false,
33
+ emotionTags: false,
34
+ offline: true,
35
+ languages: ['*'],
36
+ };
37
+
38
+ async isAvailable(): Promise<boolean> {
39
+ return loadExpoSpeech() !== null;
40
+ }
41
+
42
+ async prefetch(_onProgress?: (p: PrefetchProgress) => void): Promise<void> {
43
+ // No-op: system voices are bundled with the OS.
44
+ }
45
+
46
+ async getVoices(): Promise<Voice[]> {
47
+ const speech = loadExpoSpeech();
48
+ if (!speech) return [];
49
+ const voices = await speech.getAvailableVoicesAsync();
50
+ return voices.map((v) => ({
51
+ id: v.identifier,
52
+ name: v.name,
53
+ language: v.language,
54
+ engine: 'system' as EngineId,
55
+ }));
56
+ }
57
+
58
+ speak(text: string, options: SpeakOptions = {}): Promise<void> {
59
+ const speech = loadExpoSpeech();
60
+ if (!speech) {
61
+ throw new Error('[ttskit] expo-speech is not installed');
62
+ }
63
+ return new Promise((resolve, reject) => {
64
+ try {
65
+ speech.speak(text, {
66
+ voice: options.voice,
67
+ language: options.language,
68
+ rate: options.rate,
69
+ pitch: options.pitch,
70
+ volume: options.volume,
71
+ onStart: options.onStart,
72
+ onDone: () => {
73
+ options.onDone?.();
74
+ resolve();
75
+ },
76
+ onError: (err: Error) => {
77
+ options.onError?.(err);
78
+ reject(err);
79
+ },
80
+ onStopped: () => resolve(),
81
+ });
82
+ } catch (err) {
83
+ reject(err);
84
+ }
85
+ });
86
+ }
87
+
88
+ stream(_text: string, _options: SpeakOptions = {}): StreamHandle {
89
+ throw new Error('[ttskit] System engine does not support streaming. Use engine: "supertonic".');
90
+ }
91
+
92
+ async stop(): Promise<void> {
93
+ const speech = loadExpoSpeech();
94
+ await speech?.stop();
95
+ }
96
+ }
@@ -0,0 +1,65 @@
1
+ import { BufferedStreamEmitter } from '../BufferedStreamEmitter';
2
+
3
+ describe('BufferedStreamEmitter', () => {
4
+ it('delivers chunks emitted before any listener attaches', () => {
5
+ const emitter = new BufferedStreamEmitter();
6
+ const a = new Uint8Array([1, 2, 3]);
7
+ const b = new Uint8Array([4, 5]);
8
+ emitter.emitChunk(a);
9
+ emitter.emitChunk(b);
10
+
11
+ const received: Uint8Array[] = [];
12
+ emitter.on('chunk', (pcm) => received.push(pcm));
13
+
14
+ expect(received).toEqual([a, b]);
15
+ });
16
+
17
+ it('forwards live chunks once a listener is attached', () => {
18
+ const emitter = new BufferedStreamEmitter();
19
+ const received: Uint8Array[] = [];
20
+ emitter.on('chunk', (pcm) => received.push(pcm));
21
+
22
+ const a = new Uint8Array([7]);
23
+ emitter.emitChunk(a);
24
+ expect(received).toEqual([a]);
25
+ });
26
+
27
+ it('replays a buffered end event to a late-attaching listener', () => {
28
+ const emitter = new BufferedStreamEmitter();
29
+ emitter.emitEnd();
30
+ const fn = jest.fn();
31
+ emitter.on('end', fn);
32
+ expect(fn).toHaveBeenCalledTimes(1);
33
+ });
34
+
35
+ it('does not double-fire end when a listener is already attached', () => {
36
+ const emitter = new BufferedStreamEmitter();
37
+ const fn = jest.fn();
38
+ emitter.on('end', fn);
39
+ emitter.emitEnd();
40
+ expect(fn).toHaveBeenCalledTimes(1);
41
+ });
42
+
43
+ it('replays a buffered error to a late-attaching listener', () => {
44
+ const emitter = new BufferedStreamEmitter();
45
+ const err = new Error('boom');
46
+ emitter.emitError(err);
47
+ const fn = jest.fn();
48
+ emitter.on('error', fn);
49
+ expect(fn).toHaveBeenCalledWith(err);
50
+ });
51
+
52
+ it('drains pending chunks only once', () => {
53
+ const emitter = new BufferedStreamEmitter();
54
+ const a = new Uint8Array([1]);
55
+ emitter.emitChunk(a);
56
+ const fn = jest.fn();
57
+ emitter.on('chunk', fn);
58
+ expect(fn).toHaveBeenCalledTimes(1);
59
+
60
+ // Add a second listener — it should NOT see the already-drained chunk.
61
+ const fn2 = jest.fn();
62
+ emitter.on('chunk', fn2);
63
+ expect(fn2).not.toHaveBeenCalled();
64
+ });
65
+ });
package/src/index.ts ADDED
@@ -0,0 +1,156 @@
1
+ import { Platform } from 'react-native';
2
+
3
+ import { SupertonicEngine } from './engines/SupertonicEngine';
4
+ import { SystemEngine } from './engines/SystemEngine';
5
+ import type { Engine } from './engines/Engine';
6
+ import type {
7
+ ClonedVoice,
8
+ CloneOptions,
9
+ EngineId,
10
+ PrefetchProgress,
11
+ SpeakOptions,
12
+ StreamHandle,
13
+ Voice,
14
+ } from './types';
15
+
16
+ export type {
17
+ ClonedVoice,
18
+ CloneOptions,
19
+ EngineId,
20
+ PrefetchProgress,
21
+ SpeakOptions,
22
+ StreamHandle,
23
+ Voice,
24
+ } from './types';
25
+ export type { Engine } from './engines/Engine';
26
+ export { parseProsody, stripProsody } from './voices/prosody';
27
+ export { SUPERTONIC_VOICES, SUPERTONIC_LANGUAGES } from './voices/catalog';
28
+
29
+ const engines = new Map<EngineId, Engine>();
30
+ engines.set('supertonic', new SupertonicEngine());
31
+ engines.set('system', new SystemEngine());
32
+
33
+ let activeEngineId: EngineId = 'supertonic';
34
+
35
+ function getEngine(id: EngineId = activeEngineId): Engine {
36
+ const engine = engines.get(id);
37
+ if (!engine) {
38
+ throw new Error(`[ttskit] Engine "${id}" is not registered.`);
39
+ }
40
+ return engine;
41
+ }
42
+
43
+ export const TTSKit = {
44
+ setEngine(id: EngineId): void {
45
+ if (!engines.has(id)) {
46
+ throw new Error(`[ttskit] Engine "${id}" is not registered.`);
47
+ }
48
+ activeEngineId = id;
49
+ },
50
+
51
+ getEngine(): EngineId {
52
+ return activeEngineId;
53
+ },
54
+
55
+ registerEngine(engine: Engine): void {
56
+ engines.set(engine.id, engine);
57
+ },
58
+
59
+ /**
60
+ * Suggest a sensible engine for the current device.
61
+ *
62
+ * - On iOS, always returns `'supertonic'` — every iPhone with iOS 13+ has the
63
+ * Neural Engine and runs neural TTS well (~1-2s TTFA).
64
+ * - On Android, returns `'supertonic'` for devices that report a recent SoC
65
+ * with NNAPI 1.2+ acceleration, else `'system'`. The heuristic is
66
+ * conservative: it errs toward `system` for any mid-range or older device
67
+ * because Supertonic on a mid-range Snapdragon runs at ~0.5× realtime
68
+ * (10s+ TTFA), which is worse UX than a robotic but instant system voice.
69
+ * - Defaults to `'supertonic'` on web / unknown platforms.
70
+ *
71
+ * This is opt-in. The library default is still Supertonic everywhere — apps
72
+ * that want graceful fallback should call this once at startup:
73
+ *
74
+ * TTSKit.setEngine(TTSKit.recommendEngine());
75
+ *
76
+ * The detection is heuristic. For a hard guarantee, run a one-time benchmark
77
+ * (synthesize a known short input, measure TTFA, persist the result) and
78
+ * decide based on actual numbers — that's more accurate than any static
79
+ * device-tier list.
80
+ */
81
+ recommendEngine(): EngineId {
82
+ if (Platform.OS === 'ios') return 'supertonic';
83
+ if (Platform.OS !== 'android') return 'supertonic';
84
+
85
+ // Android tier detection. We can't read SoC directly from JS, so we rely
86
+ // on what `Platform.constants` exposes: Brand, Manufacturer, Model.
87
+ // The check is "is this likely a flagship / recent device?" — keep it
88
+ // narrow and additive. Anything not on the allow-list falls back to system.
89
+ const c: any = Platform.constants ?? {};
90
+ const brand = String(c.Brand ?? '').toLowerCase();
91
+ const manufacturer = String(c.Manufacturer ?? '').toLowerCase();
92
+ const model = String(c.Model ?? '').toLowerCase();
93
+ const apiLevel = typeof c.Release === 'string' ? parseInt(c.Release, 10) : 0;
94
+
95
+ // Android 10 = API 29 = NNAPI 1.2 floor. Below this, NNAPI partitioning
96
+ // is poor enough that ORT often falls back to XNNPACK silently.
97
+ if (apiLevel && apiLevel < 10) return 'system';
98
+
99
+ // Pixel 6 and newer have Tensor G1/G2/G3/G4 with a real NPU.
100
+ if (brand === 'google' && /pixel\s*([6-9]|1\d)/.test(model)) return 'supertonic';
101
+ // Samsung S22+ and Tab S8+ are Snapdragon 8 Gen 1 / Exynos 2200 floor.
102
+ if (manufacturer === 'samsung' && /sm-s9\d\d|sm-x[78]\d\d/i.test(model)) return 'supertonic';
103
+ // OnePlus 10 Pro+, current generation flagships are usually safe.
104
+ if (brand === 'oneplus' && /ne|le2\d\d\d/i.test(model)) return 'supertonic';
105
+
106
+ // Default for everything else (including the Galaxy A52 you tested on,
107
+ // which has SD720G and gets ~10s TTFA): use the system engine.
108
+ return 'system';
109
+ },
110
+
111
+ async isAvailable(engineId?: EngineId): Promise<boolean> {
112
+ return getEngine(engineId).isAvailable();
113
+ },
114
+
115
+ async prefetchModel(
116
+ onProgress?: (p: PrefetchProgress) => void,
117
+ engineId?: EngineId
118
+ ): Promise<void> {
119
+ return getEngine(engineId).prefetch(onProgress);
120
+ },
121
+
122
+ async getVoices(engineId?: EngineId): Promise<Voice[]> {
123
+ return getEngine(engineId).getVoices();
124
+ },
125
+
126
+ async speak(text: string, options: SpeakOptions = {}): Promise<void> {
127
+ return getEngine(options.engine).speak(text, options);
128
+ },
129
+
130
+ stream(text: string, options: SpeakOptions = {}): StreamHandle {
131
+ return getEngine(options.engine).stream(text, options);
132
+ },
133
+
134
+ async stop(engineId?: EngineId): Promise<void> {
135
+ return getEngine(engineId).stop();
136
+ },
137
+
138
+ /** Delete locally cached model files so the next `prefetchModel()` re-downloads.
139
+ * No-op for engines that don't have a cache (e.g. the system engine). */
140
+ async clearCache(engineId?: EngineId): Promise<void> {
141
+ const engine = getEngine(engineId);
142
+ if (engine.clearCache) {
143
+ await engine.clearCache();
144
+ }
145
+ },
146
+
147
+ async cloneVoice(options: CloneOptions, engineId?: EngineId): Promise<ClonedVoice> {
148
+ const engine = getEngine(engineId);
149
+ if (!engine.cloneVoice) {
150
+ throw new Error(`[ttskit] Engine "${engine.id}" does not support voice cloning.`);
151
+ }
152
+ return engine.cloneVoice(options);
153
+ },
154
+ };
155
+
156
+ export default TTSKit;
package/src/types.ts ADDED
@@ -0,0 +1,95 @@
1
+ export type EngineId = 'supertonic' | 'system' | 'neutts' | 'cloud:eleven' | 'cloud:openai' | 'cloud:cartesia';
2
+
3
+ export type SupertonicLang =
4
+ | 'en' | 'ko' | 'ja' | 'ar' | 'bg' | 'cs' | 'da' | 'de' | 'el' | 'es'
5
+ | 'et' | 'fi' | 'fr' | 'hi' | 'hr' | 'hu' | 'id' | 'it' | 'lt' | 'lv'
6
+ | 'nl' | 'pl' | 'pt' | 'ro' | 'ru' | 'sk' | 'sl' | 'sv' | 'tr' | 'uk' | 'vi';
7
+
8
+ export interface Voice {
9
+ id: string;
10
+ name: string;
11
+ gender?: 'male' | 'female' | 'neutral';
12
+ engine: EngineId;
13
+ language?: string;
14
+ sampleUrl?: string;
15
+ }
16
+
17
+ /**
18
+ * Options for synthesis calls.
19
+ *
20
+ * **Privacy:** the text you pass to `speak()` / `stream()` is processed
21
+ * entirely on-device. It is never sent to a remote server when using the
22
+ * `supertonic` engine. The `system` engine forwards text to the OS-level
23
+ * TTS service (`expo-speech`), which on some platforms (notably some
24
+ * Android OEMs) may route through a cloud service — verify with the
25
+ * device vendor's privacy policy if that matters for your app.
26
+ */
27
+ export interface SpeakOptions {
28
+ voice?: string;
29
+ engine?: EngineId;
30
+ /**
31
+ * BCP-47 language code passed to the model.
32
+ * Supertonic-3 supports 31 languages (see SupertonicLang); other engines may
33
+ * use this differently (system engine forwards it as-is to expo-speech).
34
+ */
35
+ language?: string;
36
+ /**
37
+ * Speech speed multiplier (default 1.05 — matches Supertonic upstream).
38
+ * Higher = faster.
39
+ */
40
+ rate?: number;
41
+ pitch?: number;
42
+ volume?: number;
43
+ /**
44
+ * Number of denoising steps for diffusion-based engines (Supertonic).
45
+ * Default 8. Lower = faster but lower quality.
46
+ */
47
+ totalStep?: number;
48
+ onStart?: () => void;
49
+ onDone?: () => void;
50
+ onError?: (err: Error) => void;
51
+ }
52
+
53
+ export interface StreamHandle {
54
+ id: string;
55
+ on(event: 'chunk', listener: (pcm: Uint8Array) => void): this;
56
+ on(event: 'end', listener: () => void): this;
57
+ on(event: 'error', listener: (err: Error) => void): this;
58
+ cancel(): Promise<void>;
59
+ }
60
+
61
+ export interface CloneOptions {
62
+ sampleUri: string;
63
+ name?: string;
64
+ }
65
+
66
+ export interface ClonedVoice {
67
+ id: string;
68
+ name: string;
69
+ engine: EngineId;
70
+ }
71
+
72
+ export interface PrefetchProgress {
73
+ bytesDownloaded: number;
74
+ totalBytes: number;
75
+ percent: number;
76
+ }
77
+
78
+ export interface EngineCapabilities {
79
+ streaming: boolean;
80
+ cloning: boolean;
81
+ emotionTags: boolean;
82
+ offline: boolean;
83
+ languages: string[];
84
+ }
85
+
86
+ export interface TTSKitError extends Error {
87
+ code:
88
+ | 'ENGINE_NOT_AVAILABLE'
89
+ | 'VOICE_NOT_FOUND'
90
+ | 'MODEL_NOT_LOADED'
91
+ | 'SYNTHESIS_FAILED'
92
+ | 'PERMISSION_DENIED'
93
+ | 'NETWORK_ERROR'
94
+ | 'CANCELLED';
95
+ }
@@ -0,0 +1,46 @@
1
+ import {
2
+ DEFAULT_LANGUAGE,
3
+ DEFAULT_VOICE_ID,
4
+ SUPERTONIC_LANGUAGES,
5
+ SUPERTONIC_VOICES,
6
+ findVoice,
7
+ } from '../catalog';
8
+
9
+ describe('voice catalog', () => {
10
+ it('ships exactly 10 voices (5 male, 5 female)', () => {
11
+ expect(SUPERTONIC_VOICES).toHaveLength(10);
12
+ const ids = SUPERTONIC_VOICES.map((v) => v.id).sort();
13
+ expect(ids).toEqual(['F1', 'F2', 'F3', 'F4', 'F5', 'M1', 'M2', 'M3', 'M4', 'M5']);
14
+ expect(SUPERTONIC_VOICES.filter((v) => v.gender === 'male')).toHaveLength(5);
15
+ expect(SUPERTONIC_VOICES.filter((v) => v.gender === 'female')).toHaveLength(5);
16
+ });
17
+
18
+ it('all voices use the supertonic engine', () => {
19
+ for (const v of SUPERTONIC_VOICES) {
20
+ expect(v.engine).toBe('supertonic');
21
+ }
22
+ });
23
+
24
+ it('exports the 31 supported languages, no Mandarin', () => {
25
+ expect(SUPERTONIC_LANGUAGES).toHaveLength(31);
26
+ expect(SUPERTONIC_LANGUAGES).toContain('en');
27
+ expect(SUPERTONIC_LANGUAGES).toContain('ja');
28
+ expect(SUPERTONIC_LANGUAGES).toContain('ko');
29
+ // Mandarin is *not* in the open-source release — guard against accidental re-add.
30
+ expect(SUPERTONIC_LANGUAGES).not.toContain('zh');
31
+ });
32
+
33
+ it('default voice and language are valid catalog entries', () => {
34
+ expect(SUPERTONIC_VOICES.find((v) => v.id === DEFAULT_VOICE_ID)).toBeDefined();
35
+ expect(SUPERTONIC_LANGUAGES).toContain(DEFAULT_LANGUAGE);
36
+ });
37
+
38
+ describe('findVoice', () => {
39
+ it('finds a known voice', () => {
40
+ expect(findVoice('F1')?.id).toBe('F1');
41
+ });
42
+ it('returns undefined for unknown voice', () => {
43
+ expect(findVoice('zz9-plural-z-alpha')).toBeUndefined();
44
+ });
45
+ });
46
+ });
@@ -0,0 +1,63 @@
1
+ import { parseProsody, stripProsody } from '../prosody';
2
+
3
+ describe('parseProsody', () => {
4
+ it('returns a single tagless segment for plain text', () => {
5
+ expect(parseProsody('hello world')).toEqual([{ text: 'hello world', tags: [] }]);
6
+ });
7
+
8
+ it('returns one empty segment shape for empty input', () => {
9
+ // The tag regex matches nothing, so input is returned as-is.
10
+ expect(parseProsody('')).toEqual([{ text: '', tags: [] }]);
11
+ });
12
+
13
+ it('parses a single tag', () => {
14
+ expect(parseProsody('[excited] hello')).toEqual([
15
+ { text: 'hello', tags: ['excited'] },
16
+ ]);
17
+ });
18
+
19
+ it('switches tags mid-string', () => {
20
+ const out = parseProsody('hello [whisper] there [excited] world');
21
+ expect(out).toEqual([
22
+ { text: 'hello', tags: [] },
23
+ { text: 'there', tags: ['whisper'] },
24
+ { text: 'world', tags: ['excited'] },
25
+ ]);
26
+ });
27
+
28
+ it('combines two tags inside one bracket group', () => {
29
+ const out = parseProsody('[fast excited] go');
30
+ expect(out).toEqual([{ text: 'go', tags: ['fast', 'excited'] }]);
31
+ });
32
+
33
+ it('drops unknown tags silently rather than throwing', () => {
34
+ const out = parseProsody('[totallybogustag] hi');
35
+ expect(out).toEqual([{ text: 'hi', tags: [] }]);
36
+ });
37
+ });
38
+
39
+ describe('stripProsody', () => {
40
+ it('passes plain text through unchanged', () => {
41
+ expect(stripProsody('hello world')).toBe('hello world');
42
+ });
43
+
44
+ it('removes a single tag', () => {
45
+ expect(stripProsody('[excited] go')).toBe('go');
46
+ });
47
+
48
+ it('removes multiple tags and collapses whitespace', () => {
49
+ expect(stripProsody('[whisper] hello [excited] world')).toBe('hello world');
50
+ });
51
+
52
+ it('handles tag-only input', () => {
53
+ expect(stripProsody('[whisper]')).toBe('');
54
+ });
55
+
56
+ it('joins text directly adjacent to a tag (no whitespace inserted)', () => {
57
+ // The current implementation simply removes the tag and collapses
58
+ // whitespace. It does NOT insert a separator, so adjacent text fuses.
59
+ // Documented behavior — callers should put a space before the tag if they
60
+ // want word boundaries preserved.
61
+ expect(stripProsody('a[whisper]b')).toBe('ab');
62
+ });
63
+ });
@@ -0,0 +1,32 @@
1
+ import type { Voice } from '../types';
2
+
3
+ /**
4
+ * Supertonic-3 ships 10 voices (5 M, 5 F). Each voice is language-agnostic —
5
+ * the model takes a `language` argument at call time, separate from the voice.
6
+ * Pair any voice with any of the 31 supported languages.
7
+ */
8
+ export const SUPERTONIC_VOICES: Voice[] = [
9
+ { id: 'M1', name: 'M1', gender: 'male', engine: 'supertonic' },
10
+ { id: 'M2', name: 'M2', gender: 'male', engine: 'supertonic' },
11
+ { id: 'M3', name: 'M3', gender: 'male', engine: 'supertonic' },
12
+ { id: 'M4', name: 'M4', gender: 'male', engine: 'supertonic' },
13
+ { id: 'M5', name: 'M5', gender: 'male', engine: 'supertonic' },
14
+ { id: 'F1', name: 'F1', gender: 'female', engine: 'supertonic' },
15
+ { id: 'F2', name: 'F2', gender: 'female', engine: 'supertonic' },
16
+ { id: 'F3', name: 'F3', gender: 'female', engine: 'supertonic' },
17
+ { id: 'F4', name: 'F4', gender: 'female', engine: 'supertonic' },
18
+ { id: 'F5', name: 'F5', gender: 'female', engine: 'supertonic' },
19
+ ];
20
+
21
+ export const SUPERTONIC_LANGUAGES = [
22
+ 'en', 'ko', 'ja', 'ar', 'bg', 'cs', 'da', 'de', 'el', 'es',
23
+ 'et', 'fi', 'fr', 'hi', 'hr', 'hu', 'id', 'it', 'lt', 'lv',
24
+ 'nl', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sv', 'tr', 'uk', 'vi',
25
+ ];
26
+
27
+ export const DEFAULT_VOICE_ID = 'F1';
28
+ export const DEFAULT_LANGUAGE = 'en';
29
+
30
+ export function findVoice(id: string): Voice | undefined {
31
+ return SUPERTONIC_VOICES.find((v) => v.id === id);
32
+ }
@@ -0,0 +1,39 @@
1
+ export type ProsodyTag = 'excited' | 'whisper' | 'calm' | 'sad' | 'angry' | 'fast' | 'slow';
2
+
3
+ export interface ProsodySegment {
4
+ text: string;
5
+ tags: ProsodyTag[];
6
+ }
7
+
8
+ const TAG_RE = /\[([a-z_ ]+)\]/gi;
9
+
10
+ export function parseProsody(input: string): ProsodySegment[] {
11
+ const segments: ProsodySegment[] = [];
12
+ let lastIndex = 0;
13
+ let activeTags: ProsodyTag[] = [];
14
+ let match: RegExpExecArray | null;
15
+
16
+ while ((match = TAG_RE.exec(input)) !== null) {
17
+ if (match.index > lastIndex) {
18
+ const text = input.slice(lastIndex, match.index).trim();
19
+ if (text) segments.push({ text, tags: [...activeTags] });
20
+ }
21
+ const tags = match[1]
22
+ .toLowerCase()
23
+ .split(/\s+/)
24
+ .filter((t): t is ProsodyTag =>
25
+ ['excited', 'whisper', 'calm', 'sad', 'angry', 'fast', 'slow'].includes(t)
26
+ );
27
+ activeTags = tags;
28
+ lastIndex = TAG_RE.lastIndex;
29
+ }
30
+
31
+ const tail = input.slice(lastIndex).trim();
32
+ if (tail) segments.push({ text: tail, tags: [...activeTags] });
33
+
34
+ return segments.length ? segments : [{ text: input, tags: [] }];
35
+ }
36
+
37
+ export function stripProsody(input: string): string {
38
+ return input.replace(TAG_RE, '').replace(/\s+/g, ' ').trim();
39
+ }