react-native-tts-kit 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/ATTRIBUTIONS.md +87 -0
  2. package/LICENSE +21 -0
  3. package/README.md +231 -0
  4. package/android/build.gradle +50 -0
  5. package/android/src/main/AndroidManifest.xml +3 -0
  6. package/android/src/main/java/expo/modules/ttskit/RNTTSKitModule.kt +158 -0
  7. package/android/src/main/java/expo/modules/ttskit/supertonic/AudioEngine.kt +158 -0
  8. package/android/src/main/java/expo/modules/ttskit/supertonic/ModelLocator.kt +372 -0
  9. package/android/src/main/java/expo/modules/ttskit/supertonic/SupertonicSession.kt +373 -0
  10. package/android/src/main/java/expo/modules/ttskit/supertonic/TextFrontend.kt +154 -0
  11. package/android/src/main/java/expo/modules/ttskit/supertonic/VoicePack.kt +47 -0
  12. package/build/engines/BufferedStreamEmitter.d.ts +26 -0
  13. package/build/engines/BufferedStreamEmitter.d.ts.map +1 -0
  14. package/build/engines/BufferedStreamEmitter.js +68 -0
  15. package/build/engines/BufferedStreamEmitter.js.map +1 -0
  16. package/build/engines/Engine.d.ts +15 -0
  17. package/build/engines/Engine.d.ts.map +1 -0
  18. package/build/engines/Engine.js +2 -0
  19. package/build/engines/Engine.js.map +1 -0
  20. package/build/engines/SupertonicEngine.d.ts +14 -0
  21. package/build/engines/SupertonicEngine.d.ts.map +1 -0
  22. package/build/engines/SupertonicEngine.js +183 -0
  23. package/build/engines/SupertonicEngine.js.map +1 -0
  24. package/build/engines/SystemEngine.d.ts +13 -0
  25. package/build/engines/SystemEngine.d.ts.map +1 -0
  26. package/build/engines/SystemEngine.js +78 -0
  27. package/build/engines/SystemEngine.js.map +1 -0
  28. package/build/index.d.ts +46 -0
  29. package/build/index.d.ts.map +1 -0
  30. package/build/index.js +118 -0
  31. package/build/index.js.map +1 -0
  32. package/build/types.d.ts +77 -0
  33. package/build/types.d.ts.map +1 -0
  34. package/build/types.js +2 -0
  35. package/build/types.js.map +1 -0
  36. package/build/voices/catalog.d.ts +12 -0
  37. package/build/voices/catalog.d.ts.map +1 -0
  38. package/build/voices/catalog.js +28 -0
  39. package/build/voices/catalog.js.map +1 -0
  40. package/build/voices/prosody.d.ts +8 -0
  41. package/build/voices/prosody.d.ts.map +1 -0
  42. package/build/voices/prosody.js +28 -0
  43. package/build/voices/prosody.js.map +1 -0
  44. package/expo-module.config.json +9 -0
  45. package/ios/RNTTSKit.podspec +28 -0
  46. package/ios/RNTTSKitModule.swift +133 -0
  47. package/ios/Supertonic/AudioEngine.swift +110 -0
  48. package/ios/Supertonic/ModelLocator.swift +416 -0
  49. package/ios/Supertonic/SupertonicSession.swift +405 -0
  50. package/ios/Supertonic/TextFrontend.swift +216 -0
  51. package/ios/Supertonic/VoicePack.swift +51 -0
  52. package/licenses/OpenRAIL-M.txt +209 -0
  53. package/package.json +77 -0
  54. package/src/engines/BufferedStreamEmitter.ts +50 -0
  55. package/src/engines/Engine.ts +28 -0
  56. package/src/engines/SupertonicEngine.ts +250 -0
  57. package/src/engines/SystemEngine.ts +96 -0
  58. package/src/engines/__tests__/BufferedStreamEmitter.test.ts +65 -0
  59. package/src/index.ts +156 -0
  60. package/src/types.ts +95 -0
  61. package/src/voices/__tests__/catalog.test.ts +46 -0
  62. package/src/voices/__tests__/prosody.test.ts +63 -0
  63. package/src/voices/catalog.ts +32 -0
  64. package/src/voices/prosody.ts +39 -0
package/build/types.js ADDED
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"","sourcesContent":["export type EngineId = 'supertonic' | 'system' | 'neutts' | 'cloud:eleven' | 'cloud:openai' | 'cloud:cartesia';\n\nexport type SupertonicLang =\n | 'en' | 'ko' | 'ja' | 'ar' | 'bg' | 'cs' | 'da' | 'de' | 'el' | 'es'\n | 'et' | 'fi' | 'fr' | 'hi' | 'hr' | 'hu' | 'id' | 'it' | 'lt' | 'lv'\n | 'nl' | 'pl' | 'pt' | 'ro' | 'ru' | 'sk' | 'sl' | 'sv' | 'tr' | 'uk' | 'vi';\n\nexport interface Voice {\n id: string;\n name: string;\n gender?: 'male' | 'female' | 'neutral';\n engine: EngineId;\n language?: string;\n sampleUrl?: string;\n}\n\n/**\n * Options for synthesis calls.\n *\n * **Privacy:** the text you pass to `speak()` / `stream()` is processed\n * entirely on-device. It is never sent to a remote server when using the\n * `supertonic` engine. The `system` engine forwards text to the OS-level\n * TTS service (`expo-speech`), which on some platforms (notably some\n * Android OEMs) may route through a cloud service — verify with the\n * device vendor's privacy policy if that matters for your app.\n */\nexport interface SpeakOptions {\n voice?: string;\n engine?: EngineId;\n /**\n * BCP-47 language code passed to the model.\n * Supertonic-3 supports 31 languages (see SupertonicLang); other engines may\n * use this differently (system engine forwards it as-is to expo-speech).\n */\n language?: string;\n /**\n * Speech speed multiplier (default 1.05 — matches Supertonic upstream).\n * Higher = faster.\n */\n rate?: number;\n pitch?: number;\n volume?: number;\n /**\n * Number of denoising steps for diffusion-based engines (Supertonic).\n * Default 8. Lower = faster but lower quality.\n */\n totalStep?: number;\n onStart?: () => void;\n onDone?: () => void;\n onError?: (err: Error) => void;\n}\n\nexport interface StreamHandle {\n id: string;\n on(event: 'chunk', listener: (pcm: Uint8Array) => void): this;\n on(event: 'end', listener: () => void): this;\n on(event: 'error', listener: (err: Error) => void): this;\n cancel(): Promise<void>;\n}\n\nexport interface CloneOptions {\n sampleUri: string;\n name?: string;\n}\n\nexport interface ClonedVoice {\n id: string;\n name: string;\n engine: EngineId;\n}\n\nexport interface PrefetchProgress {\n bytesDownloaded: number;\n totalBytes: number;\n percent: number;\n}\n\nexport interface EngineCapabilities {\n streaming: boolean;\n cloning: boolean;\n emotionTags: boolean;\n offline: boolean;\n languages: string[];\n}\n\nexport interface TTSKitError extends Error {\n code:\n | 'ENGINE_NOT_AVAILABLE'\n | 'VOICE_NOT_FOUND'\n | 'MODEL_NOT_LOADED'\n | 'SYNTHESIS_FAILED'\n | 'PERMISSION_DENIED'\n | 'NETWORK_ERROR'\n | 'CANCELLED';\n}\n"]}
@@ -0,0 +1,12 @@
1
+ import type { Voice } from '../types';
2
+ /**
3
+ * Supertonic-3 ships 10 voices (5 M, 5 F). Each voice is language-agnostic —
4
+ * the model takes a `language` argument at call time, separate from the voice.
5
+ * Pair any voice with any of the 31 supported languages.
6
+ */
7
+ export declare const SUPERTONIC_VOICES: Voice[];
8
+ export declare const SUPERTONIC_LANGUAGES: string[];
9
+ export declare const DEFAULT_VOICE_ID = "F1";
10
+ export declare const DEFAULT_LANGUAGE = "en";
11
+ export declare function findVoice(id: string): Voice | undefined;
12
+ //# sourceMappingURL=catalog.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"catalog.d.ts","sourceRoot":"","sources":["../../src/voices/catalog.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,UAAU,CAAC;AAEtC;;;;GAIG;AACH,eAAO,MAAM,iBAAiB,EAAE,KAAK,EAWpC,CAAC;AAEF,eAAO,MAAM,oBAAoB,UAIhC,CAAC;AAEF,eAAO,MAAM,gBAAgB,OAAO,CAAC;AACrC,eAAO,MAAM,gBAAgB,OAAO,CAAC;AAErC,wBAAgB,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,KAAK,GAAG,SAAS,CAEvD"}
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Supertonic-3 ships 10 voices (5 M, 5 F). Each voice is language-agnostic —
3
+ * the model takes a `language` argument at call time, separate from the voice.
4
+ * Pair any voice with any of the 31 supported languages.
5
+ */
6
+ export const SUPERTONIC_VOICES = [
7
+ { id: 'M1', name: 'M1', gender: 'male', engine: 'supertonic' },
8
+ { id: 'M2', name: 'M2', gender: 'male', engine: 'supertonic' },
9
+ { id: 'M3', name: 'M3', gender: 'male', engine: 'supertonic' },
10
+ { id: 'M4', name: 'M4', gender: 'male', engine: 'supertonic' },
11
+ { id: 'M5', name: 'M5', gender: 'male', engine: 'supertonic' },
12
+ { id: 'F1', name: 'F1', gender: 'female', engine: 'supertonic' },
13
+ { id: 'F2', name: 'F2', gender: 'female', engine: 'supertonic' },
14
+ { id: 'F3', name: 'F3', gender: 'female', engine: 'supertonic' },
15
+ { id: 'F4', name: 'F4', gender: 'female', engine: 'supertonic' },
16
+ { id: 'F5', name: 'F5', gender: 'female', engine: 'supertonic' },
17
+ ];
18
+ export const SUPERTONIC_LANGUAGES = [
19
+ 'en', 'ko', 'ja', 'ar', 'bg', 'cs', 'da', 'de', 'el', 'es',
20
+ 'et', 'fi', 'fr', 'hi', 'hr', 'hu', 'id', 'it', 'lt', 'lv',
21
+ 'nl', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sv', 'tr', 'uk', 'vi',
22
+ ];
23
+ export const DEFAULT_VOICE_ID = 'F1';
24
+ export const DEFAULT_LANGUAGE = 'en';
25
+ export function findVoice(id) {
26
+ return SUPERTONIC_VOICES.find((v) => v.id === id);
27
+ }
28
+ //# sourceMappingURL=catalog.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"catalog.js","sourceRoot":"","sources":["../../src/voices/catalog.ts"],"names":[],"mappings":"AAEA;;;;GAIG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAY;IACxC,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAI,MAAM,EAAE,YAAY,EAAE;IAChE,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAI,MAAM,EAAE,YAAY,EAAE;IAChE,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAI,MAAM,EAAE,YAAY,EAAE;IAChE,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAI,MAAM,EAAE,YAAY,EAAE;IAChE,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAI,MAAM,EAAE,YAAY,EAAE;IAChE,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,EAAE;IAChE,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,EAAE;IAChE,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,EAAE;IAChE,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,EAAE;IAChE,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,EAAE;CACjE,CAAC;AAEF,MAAM,CAAC,MAAM,oBAAoB,GAAG;IAClC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IAC1D,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IAC1D,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;CACjE,CAAC;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG,IAAI,CAAC;AACrC,MAAM,CAAC,MAAM,gBAAgB,GAAG,IAAI,CAAC;AAErC,MAAM,UAAU,SAAS,CAAC,EAAU;IAClC,OAAO,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;AACpD,CAAC","sourcesContent":["import type { Voice } from '../types';\n\n/**\n * Supertonic-3 ships 10 voices (5 M, 5 F). Each voice is language-agnostic —\n * the model takes a `language` argument at call time, separate from the voice.\n * Pair any voice with any of the 31 supported languages.\n */\nexport const SUPERTONIC_VOICES: Voice[] = [\n { id: 'M1', name: 'M1', gender: 'male', engine: 'supertonic' },\n { id: 'M2', name: 'M2', gender: 'male', engine: 'supertonic' },\n { id: 'M3', name: 'M3', gender: 'male', engine: 'supertonic' },\n { id: 'M4', name: 'M4', gender: 'male', engine: 'supertonic' },\n { id: 'M5', name: 'M5', gender: 'male', engine: 'supertonic' },\n { id: 'F1', name: 'F1', gender: 'female', engine: 'supertonic' },\n { id: 'F2', name: 'F2', gender: 'female', engine: 'supertonic' },\n { id: 'F3', name: 'F3', gender: 'female', engine: 'supertonic' },\n { id: 'F4', name: 'F4', gender: 'female', engine: 'supertonic' },\n { id: 'F5', name: 'F5', gender: 'female', engine: 'supertonic' },\n];\n\nexport const SUPERTONIC_LANGUAGES = [\n 'en', 'ko', 'ja', 'ar', 'bg', 'cs', 'da', 'de', 'el', 'es',\n 'et', 'fi', 'fr', 'hi', 'hr', 'hu', 'id', 'it', 'lt', 'lv',\n 'nl', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sv', 'tr', 'uk', 'vi',\n];\n\nexport const DEFAULT_VOICE_ID = 'F1';\nexport const DEFAULT_LANGUAGE = 'en';\n\nexport function findVoice(id: string): Voice | undefined {\n return SUPERTONIC_VOICES.find((v) => v.id === id);\n}\n"]}
@@ -0,0 +1,8 @@
1
+ export type ProsodyTag = 'excited' | 'whisper' | 'calm' | 'sad' | 'angry' | 'fast' | 'slow';
2
+ export interface ProsodySegment {
3
+ text: string;
4
+ tags: ProsodyTag[];
5
+ }
6
+ export declare function parseProsody(input: string): ProsodySegment[];
7
+ export declare function stripProsody(input: string): string;
8
+ //# sourceMappingURL=prosody.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prosody.d.ts","sourceRoot":"","sources":["../../src/voices/prosody.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,UAAU,GAAG,SAAS,GAAG,SAAS,GAAG,MAAM,GAAG,KAAK,GAAG,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;AAE5F,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,UAAU,EAAE,CAAC;CACpB;AAID,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,cAAc,EAAE,CAyB5D;AAED,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAElD"}
@@ -0,0 +1,28 @@
1
+ const TAG_RE = /\[([a-z_ ]+)\]/gi;
2
+ export function parseProsody(input) {
3
+ const segments = [];
4
+ let lastIndex = 0;
5
+ let activeTags = [];
6
+ let match;
7
+ while ((match = TAG_RE.exec(input)) !== null) {
8
+ if (match.index > lastIndex) {
9
+ const text = input.slice(lastIndex, match.index).trim();
10
+ if (text)
11
+ segments.push({ text, tags: [...activeTags] });
12
+ }
13
+ const tags = match[1]
14
+ .toLowerCase()
15
+ .split(/\s+/)
16
+ .filter((t) => ['excited', 'whisper', 'calm', 'sad', 'angry', 'fast', 'slow'].includes(t));
17
+ activeTags = tags;
18
+ lastIndex = TAG_RE.lastIndex;
19
+ }
20
+ const tail = input.slice(lastIndex).trim();
21
+ if (tail)
22
+ segments.push({ text: tail, tags: [...activeTags] });
23
+ return segments.length ? segments : [{ text: input, tags: [] }];
24
+ }
25
+ export function stripProsody(input) {
26
+ return input.replace(TAG_RE, '').replace(/\s+/g, ' ').trim();
27
+ }
28
+ //# sourceMappingURL=prosody.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prosody.js","sourceRoot":"","sources":["../../src/voices/prosody.ts"],"names":[],"mappings":"AAOA,MAAM,MAAM,GAAG,kBAAkB,CAAC;AAElC,MAAM,UAAU,YAAY,CAAC,KAAa;IACxC,MAAM,QAAQ,GAAqB,EAAE,CAAC;IACtC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,UAAU,GAAiB,EAAE,CAAC;IAClC,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC7C,IAAI,KAAK,CAAC,KAAK,GAAG,SAAS,EAAE,CAAC;YAC5B,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;YACxD,IAAI,IAAI;gBAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,GAAG,UAAU,CAAC,EAAE,CAAC,CAAC;QAC3D,CAAC;QACD,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC;aAClB,WAAW,EAAE;aACb,KAAK,CAAC,KAAK,CAAC;aACZ,MAAM,CAAC,CAAC,CAAC,EAAmB,EAAE,CAC7B,CAAC,SAAS,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAC3E,CAAC;QACJ,UAAU,GAAG,IAAI,CAAC;QAClB,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;IAC/B,CAAC;IAED,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IAC3C,IAAI,IAAI;QAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,GAAG,UAAU,CAAC,EAAE,CAAC,CAAC;IAE/D,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;AAClE,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,KAAa;IACxC,OAAO,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AAC/D,CAAC","sourcesContent":["export type ProsodyTag = 'excited' | 'whisper' | 'calm' | 'sad' | 'angry' | 'fast' | 'slow';\n\nexport interface ProsodySegment {\n text: string;\n tags: ProsodyTag[];\n}\n\nconst TAG_RE = /\\[([a-z_ ]+)\\]/gi;\n\nexport function parseProsody(input: string): ProsodySegment[] {\n const segments: ProsodySegment[] = [];\n let lastIndex = 0;\n let activeTags: ProsodyTag[] = [];\n let match: RegExpExecArray | null;\n\n while ((match = TAG_RE.exec(input)) !== null) {\n if (match.index > lastIndex) {\n const text = input.slice(lastIndex, match.index).trim();\n if (text) segments.push({ text, tags: [...activeTags] });\n }\n const tags = match[1]\n .toLowerCase()\n .split(/\\s+/)\n .filter((t): t is ProsodyTag =>\n ['excited', 'whisper', 'calm', 'sad', 'angry', 'fast', 'slow'].includes(t)\n );\n activeTags = tags;\n lastIndex = TAG_RE.lastIndex;\n }\n\n const tail = input.slice(lastIndex).trim();\n if (tail) segments.push({ text: tail, tags: [...activeTags] });\n\n return segments.length ? segments : [{ text: input, tags: [] }];\n}\n\nexport function stripProsody(input: string): string {\n return input.replace(TAG_RE, '').replace(/\\s+/g, ' ').trim();\n}\n"]}
@@ -0,0 +1,9 @@
1
+ {
2
+ "platforms": ["ios", "android"],
3
+ "ios": {
4
+ "modules": ["RNTTSKitModule"]
5
+ },
6
+ "android": {
7
+ "modules": ["expo.modules.ttskit.RNTTSKitModule"]
8
+ }
9
+ }
@@ -0,0 +1,28 @@
1
+ require 'json'
2
+
3
+ package = JSON.parse(File.read(File.join(__dir__, '..', 'package.json')))
4
+
5
+ Pod::Spec.new do |s|
6
+ s.name = 'RNTTSKit'
7
+ s.version = package['version']
8
+ s.summary = package['description']
9
+ s.description = package['description']
10
+ s.license = package['license']
11
+ s.author = package['author']
12
+ s.homepage = package['homepage']
13
+ s.platforms = { :ios => '14.0', :tvos => '14.0' }
14
+ s.swift_version = '5.9'
15
+ s.source = { git: 'https://github.com/ahk-d/react-native-tts-kit' }
16
+ s.static_framework = true
17
+
18
+ s.dependency 'ExpoModulesCore'
19
+ s.dependency 'onnxruntime-objc', '~> 1.18.0'
20
+
21
+ s.pod_target_xcconfig = {
22
+ 'DEFINES_MODULE' => 'YES',
23
+ 'SWIFT_COMPILATION_MODE' => 'wholemodule'
24
+ }
25
+
26
+ s.source_files = '**/*.{h,m,mm,swift,hpp,cpp}'
27
+ s.resources = ['Resources/**/*']
28
+ end
@@ -0,0 +1,133 @@
1
+ import ExpoModulesCore
2
+
3
+ public class RNTTSKitModule: Module {
4
+ private var session: SupertonicSession?
5
+ private let audioEngine = AudioEngine()
6
+ private var prefetchTask: Task<Void, Error>?
7
+
8
+ public func definition() -> ModuleDefinition {
9
+ Name("RNTTSKit")
10
+
11
+ Events(
12
+ "onPrefetchProgress",
13
+ "onStreamChunk",
14
+ "onStreamEnd",
15
+ "onStreamError",
16
+ "onSpeakStart",
17
+ "onSpeakDone"
18
+ )
19
+
20
+ OnCreate {
21
+ self.session = SupertonicSession()
22
+ }
23
+
24
+ OnDestroy {
25
+ self.session?.tearDown()
26
+ self.session = nil
27
+ self.audioEngine.tearDown()
28
+ }
29
+
30
+ AsyncFunction("isAvailable") { () -> Bool in
31
+ return ModelLocator.modelExists()
32
+ }
33
+
34
+ AsyncFunction("prefetch") { (promise: Promise) in
35
+ self.prefetchTask?.cancel()
36
+ self.prefetchTask = Task { [weak self] in
37
+ guard let self else { return }
38
+ do {
39
+ try await ModelLocator.ensureModel { progress in
40
+ self.sendEvent("onPrefetchProgress", [
41
+ "bytesDownloaded": progress.bytesDownloaded,
42
+ "totalBytes": progress.totalBytes,
43
+ "percent": progress.percent
44
+ ])
45
+ }
46
+ try self.session?.loadIfNeeded()
47
+ // Pre-warm the default voice so first speak() after
48
+ // prefetch doesn't pay JSON-decode + tensor-alloc cost.
49
+ self.session?.prewarmDefaultVoice()
50
+ promise.resolve()
51
+ } catch {
52
+ promise.reject("PREFETCH_FAILED", error.localizedDescription)
53
+ }
54
+ }
55
+ }
56
+
57
+ AsyncFunction("speak") { (id: String, text: String, voiceId: String, lang: String, totalStep: Int, speed: Double, volume: Double, promise: Promise) in
58
+ Task { [weak self] in
59
+ guard let self, let session = self.session else {
60
+ promise.reject("MODEL_NOT_LOADED", "Supertonic session unavailable")
61
+ return
62
+ }
63
+ do {
64
+ try session.loadIfNeeded()
65
+ self.sendEvent("onSpeakStart", ["id": id])
66
+ let samples = try session.synthesize(
67
+ text: text,
68
+ lang: lang,
69
+ voiceId: voiceId,
70
+ totalStep: totalStep,
71
+ speed: speed
72
+ )
73
+ try await self.audioEngine.play(pcm: samples, sampleRate: session.sampleRate, volume: Float(volume))
74
+ self.sendEvent("onSpeakDone", ["id": id])
75
+ promise.resolve()
76
+ } catch {
77
+ promise.reject("SYNTHESIS_FAILED", error.localizedDescription)
78
+ }
79
+ }
80
+ }
81
+
82
+ AsyncFunction("stream") { (id: String, text: String, voiceId: String, lang: String, totalStep: Int, speed: Double, volume: Double, promise: Promise) in
83
+ Task { [weak self] in
84
+ guard let self, let session = self.session else {
85
+ promise.reject("MODEL_NOT_LOADED", "Supertonic session unavailable")
86
+ return
87
+ }
88
+ do {
89
+ try session.loadIfNeeded()
90
+ try self.audioEngine.beginStream(sampleRate: session.sampleRate, volume: Float(volume))
91
+ try session.synthesizeStreaming(
92
+ text: text,
93
+ lang: lang,
94
+ voiceId: voiceId,
95
+ totalStep: totalStep,
96
+ speed: speed
97
+ ) { samples in
98
+ let pcm16 = SupertonicSession.toPCM16(samples: samples)
99
+ self.sendEvent("onStreamChunk", ["id": id, "pcm": pcm16.base64EncodedString()])
100
+ self.audioEngine.feedStream(chunk: samples)
101
+ }
102
+ self.audioEngine.endStream()
103
+ self.sendEvent("onStreamEnd", ["id": id])
104
+ promise.resolve()
105
+ } catch {
106
+ self.audioEngine.endStream()
107
+ self.sendEvent("onStreamError", ["id": id, "message": error.localizedDescription])
108
+ promise.reject("SYNTHESIS_FAILED", error.localizedDescription)
109
+ }
110
+ }
111
+ }
112
+
113
+ AsyncFunction("stop") { (promise: Promise) in
114
+ self.audioEngine.stop()
115
+ self.session?.cancel()
116
+ promise.resolve()
117
+ }
118
+
119
+ AsyncFunction("clearCache") { (promise: Promise) in
120
+ // Tear down loaded ORTSessions first — they hold references to the
121
+ // files we're about to delete. Otherwise the next loadIfNeeded()
122
+ // would short-circuit (isReady == true) and skip re-loading from
123
+ // disk, masking whether the re-download actually worked.
124
+ self.prefetchTask?.cancel()
125
+ self.audioEngine.stop()
126
+ self.session?.cancel()
127
+ self.session?.tearDown()
128
+ ModelLocator.clearCache()
129
+ self.session = SupertonicSession()
130
+ promise.resolve()
131
+ }
132
+ }
133
+ }
@@ -0,0 +1,110 @@
1
+ import AVFoundation
2
+ import Foundation
3
+
4
+ /// Streams float32 PCM through AVAudioEngine. Used for both blocking `speak()`
5
+ /// (one-shot enqueue + wait) and `stream()` (chunk-by-chunk feeding).
6
+ ///
7
+ /// We keep PCM as float32 internally — that's what Supertonic emits and what
8
+ /// `AVAudioPlayerNode` natively consumes. The Int16 conversion only happens
9
+ /// at the JS bridge boundary (when emitting `onStreamChunk`).
10
+ final class AudioEngine {
11
+ private let engine = AVAudioEngine()
12
+ private let player = AVAudioPlayerNode()
13
+ private var configuredSampleRate: Double = 0
14
+ private var attached = false
15
+ private var streaming = false
16
+ private let queue = DispatchQueue(label: "ttskit.audioengine", qos: .userInitiated)
17
+
18
+ /// Configure (or reconfigure) the player for the given sample rate.
19
+ private func ensureAttached(sampleRate: Int) throws {
20
+ let target = Double(sampleRate)
21
+ if attached && configuredSampleRate == target { return }
22
+
23
+ if attached {
24
+ engine.disconnectNodeOutput(player)
25
+ } else {
26
+ engine.attach(player)
27
+ }
28
+ let format = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: target, channels: 1, interleaved: false)
29
+ engine.connect(player, to: engine.mainMixerNode, format: format)
30
+
31
+ let session = AVAudioSession.sharedInstance()
32
+ try session.setCategory(.playback, mode: .spokenAudio, options: [.duckOthers])
33
+ try session.setActive(true, options: [])
34
+
35
+ if !engine.isRunning {
36
+ try engine.start()
37
+ }
38
+ configuredSampleRate = target
39
+ attached = true
40
+ }
41
+
42
+ /// One-shot playback that resolves only when the buffer has finished playing.
43
+ func play(pcm: [Float], sampleRate: Int, volume: Float) async throws {
44
+ try ensureAttached(sampleRate: sampleRate)
45
+ player.volume = volume
46
+ guard let buffer = makeBuffer(from: pcm, sampleRate: sampleRate) else { return }
47
+
48
+ await withCheckedContinuation { (cont: CheckedContinuation<Void, Never>) in
49
+ player.scheduleBuffer(buffer, at: nil, options: []) {
50
+ cont.resume()
51
+ }
52
+ if !player.isPlaying { player.play() }
53
+ }
54
+ }
55
+
56
+ func beginStream(sampleRate: Int, volume: Float) throws {
57
+ try ensureAttached(sampleRate: sampleRate)
58
+ player.volume = volume
59
+ streaming = true
60
+ if !player.isPlaying { player.play() }
61
+ }
62
+
63
+ func feedStream(chunk: [Float]) {
64
+ // Only refuse work if we've been fully stopped — not on endStream(),
65
+ // which only signals "no more chunks coming." Pending buffers that are
66
+ // already in the async pipeline must still play out.
67
+ guard streaming else { return }
68
+ let sr = Int(configuredSampleRate)
69
+ queue.async { [weak self] in
70
+ guard let self, let buffer = self.makeBuffer(from: chunk, sampleRate: sr) else { return }
71
+ self.player.scheduleBuffer(buffer, at: nil, options: [], completionHandler: nil)
72
+ }
73
+ }
74
+
75
+ /// Signals that no more chunks will be fed. Does NOT cancel pending audio —
76
+ /// `feedStream` may have enqueued buffers on the audio queue that haven't
77
+ /// scheduled yet. Those must play out so the user hears the audio they
78
+ /// synthesized. Use `stop()` to interrupt actual playback.
79
+ func endStream() {
80
+ // Intentionally leaves `streaming = true` so any feedStream() calls
81
+ // still in flight from the synthesis callback complete normally.
82
+ // The flag is reset by `stop()` or by the next `beginStream()`.
83
+ }
84
+
85
+ func stop() {
86
+ streaming = false
87
+ if player.isPlaying { player.stop() }
88
+ }
89
+
90
+ func tearDown() {
91
+ stop()
92
+ if engine.isRunning { engine.stop() }
93
+ attached = false
94
+ configuredSampleRate = 0
95
+ }
96
+
97
+ private func makeBuffer(from pcm: [Float], sampleRate: Int) -> AVAudioPCMBuffer? {
98
+ guard !pcm.isEmpty,
99
+ let format = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: Double(sampleRate), channels: 1, interleaved: false),
100
+ let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(pcm.count))
101
+ else { return nil }
102
+ buffer.frameLength = AVAudioFrameCount(pcm.count)
103
+ if let dst = buffer.floatChannelData?.pointee {
104
+ pcm.withUnsafeBufferPointer { src in
105
+ dst.update(from: src.baseAddress!, count: pcm.count)
106
+ }
107
+ }
108
+ return buffer
109
+ }
110
+ }