react-native-tts-kit 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ATTRIBUTIONS.md +87 -0
- package/LICENSE +21 -0
- package/README.md +231 -0
- package/android/build.gradle +50 -0
- package/android/src/main/AndroidManifest.xml +3 -0
- package/android/src/main/java/expo/modules/ttskit/RNTTSKitModule.kt +158 -0
- package/android/src/main/java/expo/modules/ttskit/supertonic/AudioEngine.kt +158 -0
- package/android/src/main/java/expo/modules/ttskit/supertonic/ModelLocator.kt +372 -0
- package/android/src/main/java/expo/modules/ttskit/supertonic/SupertonicSession.kt +373 -0
- package/android/src/main/java/expo/modules/ttskit/supertonic/TextFrontend.kt +154 -0
- package/android/src/main/java/expo/modules/ttskit/supertonic/VoicePack.kt +47 -0
- package/build/engines/BufferedStreamEmitter.d.ts +26 -0
- package/build/engines/BufferedStreamEmitter.d.ts.map +1 -0
- package/build/engines/BufferedStreamEmitter.js +68 -0
- package/build/engines/BufferedStreamEmitter.js.map +1 -0
- package/build/engines/Engine.d.ts +15 -0
- package/build/engines/Engine.d.ts.map +1 -0
- package/build/engines/Engine.js +2 -0
- package/build/engines/Engine.js.map +1 -0
- package/build/engines/SupertonicEngine.d.ts +14 -0
- package/build/engines/SupertonicEngine.d.ts.map +1 -0
- package/build/engines/SupertonicEngine.js +183 -0
- package/build/engines/SupertonicEngine.js.map +1 -0
- package/build/engines/SystemEngine.d.ts +13 -0
- package/build/engines/SystemEngine.d.ts.map +1 -0
- package/build/engines/SystemEngine.js +78 -0
- package/build/engines/SystemEngine.js.map +1 -0
- package/build/index.d.ts +46 -0
- package/build/index.d.ts.map +1 -0
- package/build/index.js +118 -0
- package/build/index.js.map +1 -0
- package/build/types.d.ts +77 -0
- package/build/types.d.ts.map +1 -0
- package/build/types.js +2 -0
- package/build/types.js.map +1 -0
- package/build/voices/catalog.d.ts +12 -0
- package/build/voices/catalog.d.ts.map +1 -0
- package/build/voices/catalog.js +28 -0
- package/build/voices/catalog.js.map +1 -0
- package/build/voices/prosody.d.ts +8 -0
- package/build/voices/prosody.d.ts.map +1 -0
- package/build/voices/prosody.js +28 -0
- package/build/voices/prosody.js.map +1 -0
- package/expo-module.config.json +9 -0
- package/ios/RNTTSKit.podspec +28 -0
- package/ios/RNTTSKitModule.swift +133 -0
- package/ios/Supertonic/AudioEngine.swift +110 -0
- package/ios/Supertonic/ModelLocator.swift +416 -0
- package/ios/Supertonic/SupertonicSession.swift +405 -0
- package/ios/Supertonic/TextFrontend.swift +216 -0
- package/ios/Supertonic/VoicePack.swift +51 -0
- package/licenses/OpenRAIL-M.txt +209 -0
- package/package.json +77 -0
- package/src/engines/BufferedStreamEmitter.ts +50 -0
- package/src/engines/Engine.ts +28 -0
- package/src/engines/SupertonicEngine.ts +250 -0
- package/src/engines/SystemEngine.ts +96 -0
- package/src/engines/__tests__/BufferedStreamEmitter.test.ts +65 -0
- package/src/index.ts +156 -0
- package/src/types.ts +95 -0
- package/src/voices/__tests__/catalog.test.ts +46 -0
- package/src/voices/__tests__/prosody.test.ts +63 -0
- package/src/voices/catalog.ts +32 -0
- package/src/voices/prosody.ts +39 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
package expo.modules.ttskit.supertonic
|
|
2
|
+
|
|
3
|
+
import android.media.AudioAttributes
|
|
4
|
+
import android.media.AudioFormat
|
|
5
|
+
import android.media.AudioTrack
|
|
6
|
+
import java.nio.ByteBuffer
|
|
7
|
+
import java.nio.ByteOrder
|
|
8
|
+
import java.util.concurrent.atomic.AtomicBoolean
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Streams float32 PCM through AudioTrack. The model emits float32 samples in
|
|
12
|
+
* [-1, 1]; we feed those straight into AudioTrack's `ENCODING_PCM_FLOAT` so we
|
|
13
|
+
* skip a conversion on the hot path.
|
|
14
|
+
*
|
|
15
|
+
* `play()` blocks until the buffer has actually been rendered (not just
|
|
16
|
+
* enqueued) so the JS-side `onSpeakDone` event fires accurately.
|
|
17
|
+
*/
|
|
18
|
+
class AudioEngine {
|
|
19
|
+
private var track: AudioTrack? = null
|
|
20
|
+
private var configuredSampleRate: Int = 0
|
|
21
|
+
private val streaming = AtomicBoolean(false)
|
|
22
|
+
// Frames written via feedStream() since the last beginStream(). Used by
|
|
23
|
+
// endStream() to wait for the playback head to catch up so the caller's
|
|
24
|
+
// "done" callback fires after the user actually hears the last samples.
|
|
25
|
+
private var streamFramesWritten: Long = 0
|
|
26
|
+
// playbackHeadPosition value at beginStream(). Used as a baseline because
|
|
27
|
+
// the track is reused across calls (ensureTrack returns the existing track
|
|
28
|
+
// when the sample rate matches) and the head counter is monotonic across
|
|
29
|
+
// its lifetime, not per-stream.
|
|
30
|
+
private var streamHeadBaseline: Long = 0
|
|
31
|
+
|
|
32
|
+
private fun ensureTrack(sampleRate: Int): AudioTrack {
|
|
33
|
+
val existing = track
|
|
34
|
+
if (existing != null && configuredSampleRate == sampleRate) return existing
|
|
35
|
+
|
|
36
|
+
existing?.runCatching { release() }
|
|
37
|
+
track = null
|
|
38
|
+
|
|
39
|
+
val minBuf = AudioTrack.getMinBufferSize(
|
|
40
|
+
sampleRate,
|
|
41
|
+
AudioFormat.CHANNEL_OUT_MONO,
|
|
42
|
+
AudioFormat.ENCODING_PCM_FLOAT
|
|
43
|
+
).coerceAtLeast(32 * 1024)
|
|
44
|
+
|
|
45
|
+
val t = AudioTrack.Builder()
|
|
46
|
+
.setAudioAttributes(
|
|
47
|
+
AudioAttributes.Builder()
|
|
48
|
+
.setUsage(AudioAttributes.USAGE_ASSISTANT)
|
|
49
|
+
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
|
|
50
|
+
.build()
|
|
51
|
+
)
|
|
52
|
+
.setAudioFormat(
|
|
53
|
+
AudioFormat.Builder()
|
|
54
|
+
.setEncoding(AudioFormat.ENCODING_PCM_FLOAT)
|
|
55
|
+
.setSampleRate(sampleRate)
|
|
56
|
+
.setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
|
|
57
|
+
.build()
|
|
58
|
+
)
|
|
59
|
+
.setBufferSizeInBytes(minBuf)
|
|
60
|
+
.setTransferMode(AudioTrack.MODE_STREAM)
|
|
61
|
+
.build()
|
|
62
|
+
track = t
|
|
63
|
+
configuredSampleRate = sampleRate
|
|
64
|
+
return t
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/** Blocking playback. Returns once playback has actually drained, not just enqueued. */
|
|
68
|
+
fun play(samples: FloatArray, sampleRate: Int, volume: Float) {
|
|
69
|
+
if (samples.isEmpty()) return
|
|
70
|
+
val t = ensureTrack(sampleRate)
|
|
71
|
+
t.setVolume(volume.coerceIn(0f, 1f))
|
|
72
|
+
if (t.playState != AudioTrack.PLAYSTATE_PLAYING) t.play()
|
|
73
|
+
|
|
74
|
+
var written = 0
|
|
75
|
+
while (written < samples.size) {
|
|
76
|
+
val n = t.write(samples, written, samples.size - written, AudioTrack.WRITE_BLOCKING)
|
|
77
|
+
if (n <= 0) break
|
|
78
|
+
written += n
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Wait for playback head to reach the end — write() returns when buffered, not played.
|
|
82
|
+
val totalFrames = written
|
|
83
|
+
val pollIntervalMs = 20L
|
|
84
|
+
var safety = 0
|
|
85
|
+
while (t.playbackHeadPosition < totalFrames && safety < 5_000) {
|
|
86
|
+
Thread.sleep(pollIntervalMs)
|
|
87
|
+
safety++
|
|
88
|
+
}
|
|
89
|
+
t.stop()
|
|
90
|
+
t.flush()
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
fun beginStream(sampleRate: Int, volume: Float) {
|
|
94
|
+
val t = ensureTrack(sampleRate)
|
|
95
|
+
t.setVolume(volume.coerceIn(0f, 1f))
|
|
96
|
+
if (t.playState != AudioTrack.PLAYSTATE_PLAYING) t.play()
|
|
97
|
+
streamFramesWritten = 0
|
|
98
|
+
streamHeadBaseline = t.playbackHeadPosition.toLong() and 0xFFFFFFFFL
|
|
99
|
+
streaming.set(true)
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
fun feedStream(chunk: FloatArray) {
|
|
103
|
+
if (!streaming.get()) return
|
|
104
|
+
val t = track ?: return
|
|
105
|
+
var written = 0
|
|
106
|
+
while (written < chunk.size && streaming.get()) {
|
|
107
|
+
val n = t.write(chunk, written, chunk.size - written, AudioTrack.WRITE_BLOCKING)
|
|
108
|
+
if (n <= 0) break
|
|
109
|
+
written += n
|
|
110
|
+
}
|
|
111
|
+
streamFramesWritten += written
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Wait for the AudioTrack to actually play out the frames we've written
|
|
116
|
+
* before returning, so the caller's "done" callback fires after the user
|
|
117
|
+
* hears the last samples — not just when we've finished enqueueing them.
|
|
118
|
+
*/
|
|
119
|
+
fun endStream() {
|
|
120
|
+
val t = track
|
|
121
|
+
if (t != null && streaming.get() && streamFramesWritten > 0) {
|
|
122
|
+
val targetFrames = streamHeadBaseline + streamFramesWritten
|
|
123
|
+
val deadline = System.currentTimeMillis() + 10_000
|
|
124
|
+
while (System.currentTimeMillis() < deadline) {
|
|
125
|
+
val played = t.playbackHeadPosition.toLong() and 0xFFFFFFFFL
|
|
126
|
+
if (played >= targetFrames) break
|
|
127
|
+
Thread.sleep(20)
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
streaming.set(false)
|
|
131
|
+
streamFramesWritten = 0
|
|
132
|
+
streamHeadBaseline = 0
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
fun stop() {
|
|
136
|
+
streaming.set(false)
|
|
137
|
+
track?.runCatching { pause(); flush() }
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
fun tearDown() {
|
|
141
|
+
stop()
|
|
142
|
+
track?.runCatching { release() }
|
|
143
|
+
track = null
|
|
144
|
+
configuredSampleRate = 0
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/** Convert float32 samples to little-endian PCM16 for the JS bridge. */
|
|
148
|
+
companion object {
|
|
149
|
+
fun toPcm16(samples: FloatArray): ByteArray {
|
|
150
|
+
val out = ByteBuffer.allocate(samples.size * 2).order(ByteOrder.LITTLE_ENDIAN)
|
|
151
|
+
for (s in samples) {
|
|
152
|
+
val clamped = if (s > 1f) 1f else if (s < -1f) -1f else s
|
|
153
|
+
out.putShort((clamped * 32767f).toInt().toShort())
|
|
154
|
+
}
|
|
155
|
+
return out.array()
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
package expo.modules.ttskit.supertonic
|
|
2
|
+
|
|
3
|
+
import android.content.Context
|
|
4
|
+
import java.io.File
|
|
5
|
+
import java.net.URL
|
|
6
|
+
import java.security.MessageDigest
|
|
7
|
+
|
|
8
|
+
object ModelLocator {
|
|
9
|
+
/**
|
|
10
|
+
* Weight precision tier. fp16 is a smaller download but only lives on
|
|
11
|
+
* the ahk-d mirror — the upstream Supertone repo ships fp32 only. See
|
|
12
|
+
* `tools/quantize.md` for how the fp16 files are produced and validated.
|
|
13
|
+
* ONNX graph I/O is float32 for both tiers (fp16 uses keep_io_types),
|
|
14
|
+
* so SupertonicSession.kt does not need to change between them.
|
|
15
|
+
*
|
|
16
|
+
* Int8 was evaluated and dropped: MatMul-only int8 (required to avoid
|
|
17
|
+
* ConvInteger ops the iOS CPU EP refuses) produced ~94%-of-fp32 sizes
|
|
18
|
+
* AND -1 dB SNR vs fp32 — unusable. Not worth a separate tier.
|
|
19
|
+
*/
|
|
20
|
+
enum class Precision(val onnxSubdir: String, val hasUpstreamFallback: Boolean) {
|
|
21
|
+
FP32("onnx", true),
|
|
22
|
+
FP16("onnx-fp16", false),
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Default tier shipped to users.
|
|
27
|
+
*
|
|
28
|
+
* Android: FP32. ONNX Runtime's XNNPACK EP and CPU EP do not have native
|
|
29
|
+
* fp16 kernels — loading an fp16 model triggers a Cast-storm at every
|
|
30
|
+
* fp16↔fp32 boundary (ORT issue #25824) that makes synthesis ~10× slower
|
|
31
|
+
* AND introduces numerical error that garbles diffusion-model audio.
|
|
32
|
+
* Instead we ship fp32 weights and use the NNAPI EP with USE_FP16 to get
|
|
33
|
+
* runtime fp16 math on the device. See SupertonicSession.kt for the EP
|
|
34
|
+
* config and tools/quantize.md for the full rationale.
|
|
35
|
+
*
|
|
36
|
+
* iOS uses FP16 because CoreML / iOS CPU EP has true end-to-end fp16
|
|
37
|
+
* kernels — set in ModelLocator.swift, independent of this Kotlin value.
|
|
38
|
+
*/
|
|
39
|
+
val PRECISION: Precision = Precision.FP32
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Mirror sources, tried in order. We host a pinned mirror of the
|
|
43
|
+
* Supertonic-3 multilingual weights so:
|
|
44
|
+
* - Upstream availability changes (deletes, renames, paywall) don't
|
|
45
|
+
* break installed copies of this package.
|
|
46
|
+
* - We control when consumers see new model versions; an unpinned
|
|
47
|
+
* `main` would let surprise upstream pushes change behavior.
|
|
48
|
+
*
|
|
49
|
+
* Both entries are pinned to commit SHAs. The fallback is the official
|
|
50
|
+
* Supertone repo at the *same* logical version — never v2 / v1.
|
|
51
|
+
*
|
|
52
|
+
* The two SHAs differ because each repo has its own commit history,
|
|
53
|
+
* but the file contents at these revisions are byte-identical at the
|
|
54
|
+
* fp32 tier.
|
|
55
|
+
*/
|
|
56
|
+
private const val MIRROR_REVISION = "4cb89eb91e92e9a92b60cac890b464f55a5d0064"
|
|
57
|
+
private const val UPSTREAM_REVISION = "724fb5abbf5502583fb520898d45929e62f02c0b"
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Per-tier URL list. fp32 falls back to upstream; quantized tiers are
|
|
61
|
+
* mirror-only because upstream does not host them.
|
|
62
|
+
*/
|
|
63
|
+
private val BASES: List<String>
|
|
64
|
+
get() = buildList {
|
|
65
|
+
add("https://huggingface.co/ahk-d/supertonic-3/resolve/$MIRROR_REVISION")
|
|
66
|
+
if (PRECISION.hasUpstreamFallback) {
|
|
67
|
+
add("https://huggingface.co/Supertone/supertonic-3/resolve/$UPSTREAM_REVISION")
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
val ONNX_FILES = listOf(
|
|
72
|
+
"duration_predictor.onnx",
|
|
73
|
+
"text_encoder.onnx",
|
|
74
|
+
"vector_estimator.onnx",
|
|
75
|
+
"vocoder.onnx",
|
|
76
|
+
"tts.json",
|
|
77
|
+
"unicode_indexer.json"
|
|
78
|
+
)
|
|
79
|
+
val VOICE_IDS = listOf("M1", "M2", "M3", "M4", "M5", "F1", "F2", "F3", "F4", "F5")
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* SHA-256 fingerprints of every shipped file at the pinned mirror commit.
|
|
83
|
+
*
|
|
84
|
+
* `download()` verifies each file post-download and rejects the
|
|
85
|
+
* mirror+fallback pair if both serve corrupted or substituted bytes.
|
|
86
|
+
* To regenerate when bumping MIRROR_REVISION/UPSTREAM_REVISION: run
|
|
87
|
+
* `tools/fingerprint.sh` and paste output here. Cross-checked against
|
|
88
|
+
* upstream — values are byte-identical between the two repos.
|
|
89
|
+
*/
|
|
90
|
+
val EXPECTED_HASHES: Map<String, String> = mapOf(
|
|
91
|
+
"onnx/duration_predictor.onnx" to "c3eb91414d5ff8a7a239b7fe9e34e7e2bf8a8140d8375ffb14718b1c639325db",
|
|
92
|
+
"onnx/text_encoder.onnx" to "c7befd5ea8c3119769e8a6c1486c4edc6a3bc8365c67621c881bbb774b9902ff",
|
|
93
|
+
"onnx/vector_estimator.onnx" to "883ac868ea0275ef0e991524dc64f16b3c0376efd7c320af6b53f5b780d7c61c",
|
|
94
|
+
"onnx/vocoder.onnx" to "085de76dd8e8d5836d6ca66826601f615939218f90e519f70ee8a36ed2a4c4ba",
|
|
95
|
+
"onnx/tts.json" to "42078d3aef1cd43ab43021f3c54f47d2d75ceb4e75f627f118890128b06a0d09",
|
|
96
|
+
"onnx/unicode_indexer.json" to "9bf7346e43883a81f8645c81224f786d43c5b57f3641f6e7671a7d6c493cb24f",
|
|
97
|
+
"voice_styles/F1.json" to "bbdec6ee00231c2c742ad05483df5334cab3b52fda3ba38e6a07059c4563dbc2",
|
|
98
|
+
"voice_styles/F2.json" to "7c722c6a72707b1a77f035d67f0d1351ba187738e06f7683e8c72b1df3477fc6",
|
|
99
|
+
"voice_styles/F3.json" to "12f6ef2573baa2defa1128069cb59f203e3ab67c92af77b42df8a0e3a2f7c6ab",
|
|
100
|
+
"voice_styles/F4.json" to "c2fa764c1225a76dfc3e2c73e8aa4f70d9ee48793860eb34c295fff01c2e032b",
|
|
101
|
+
"voice_styles/F5.json" to "45966e73316415626cf41a7d1c6f3b4c70dbc1ba2bee5c1978ef0ce33244fc8d",
|
|
102
|
+
"voice_styles/M1.json" to "e35604687f5d23694b8e91593a93eec0e4eca6c0b02bb8ed69139ab2ea6b0a5b",
|
|
103
|
+
"voice_styles/M2.json" to "b76cbf62bac707c710cf0ae5aba5e31eea1a6339a9734bfae33ab98499534a50",
|
|
104
|
+
"voice_styles/M3.json" to "ea1ac35ccb91b0d7ecad533a2fbd0eec10c91513d8951e3b25fbba99954e159b",
|
|
105
|
+
"voice_styles/M4.json" to "ca8eefad4fcd989c9379032ff3e50738adc547eeb5e221b82593a6d7b3bac303",
|
|
106
|
+
"voice_styles/M5.json" to "dd22b92740314321f8ae11c5e87f8dd60d060f15dd3a632b5adf77f471f77af2",
|
|
107
|
+
|
|
108
|
+
// fp16 weights — produced by tools/quantize_colab.ipynb.
|
|
109
|
+
// Attention sub-graphs kept in fp32 to work around an onnxconverter_common
|
|
110
|
+
// bug; vector_estimator therefore ends up at ~54% of fp32 instead of 50%.
|
|
111
|
+
// Paste new hashes here when re-quantizing; placeholder values must be
|
|
112
|
+
// updated together with the MIRROR_REVISION SHA above.
|
|
113
|
+
"onnx-fp16/duration_predictor.onnx" to "95bf8c2dd3affd6e40bb57ad1c76018e47abc7b56a7978fe211ebe1359e478f1",
|
|
114
|
+
"onnx-fp16/text_encoder.onnx" to "fdfb21cb1596a6ac84699a6a0e236add97f95bfb492264209807777dd6c2e046",
|
|
115
|
+
"onnx-fp16/vector_estimator.onnx" to "7df9169002c8b8af4990bb1370cbb1c6600bcffef9749d9a83200e1b30a7a8b8",
|
|
116
|
+
"onnx-fp16/vocoder.onnx" to "f409960b6e74ef6e51c32b2cc77047ffbd426179f341214f42efb2a61aa91e57",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
private fun supportDir(ctx: Context): File =
|
|
120
|
+
File(ctx.filesDir, "RNTTSKit/Supertonic").apply {
|
|
121
|
+
mkdirs(); File(this, PRECISION.onnxSubdir).mkdirs(); File(this, "voice_styles").mkdirs()
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
fun onnxDir(ctx: Context): File = File(supportDir(ctx), PRECISION.onnxSubdir)
|
|
125
|
+
fun voicesDir(ctx: Context): File = File(supportDir(ctx), "voice_styles")
|
|
126
|
+
|
|
127
|
+
/** Bundled lookup checks app assets at `assets/models/<rest-of-path>`. */
|
|
128
|
+
private fun bundledStream(ctx: Context, relPath: String): java.io.InputStream? = try {
|
|
129
|
+
ctx.assets.open("models/$relPath")
|
|
130
|
+
} catch (_: Exception) {
|
|
131
|
+
null
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
fun resolvedOnnxPath(ctx: Context, name: String): String {
|
|
135
|
+
val packed = File(onnxDir(ctx), name)
|
|
136
|
+
if (packed.exists()) return packed.absolutePath
|
|
137
|
+
// Config files always live under onnx/ in pre-bundled assets too,
|
|
138
|
+
// since quantization doesn't touch them.
|
|
139
|
+
val isConfig = name.endsWith(".json")
|
|
140
|
+
val assetRel = "${if (isConfig) "onnx" else PRECISION.onnxSubdir}/$name"
|
|
141
|
+
bundledStream(ctx, assetRel)?.use {
|
|
142
|
+
packed.parentFile?.mkdirs()
|
|
143
|
+
packed.outputStream().use { dst -> it.copyTo(dst) }
|
|
144
|
+
return packed.absolutePath
|
|
145
|
+
}
|
|
146
|
+
return packed.absolutePath
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
fun resolvedVoicePath(ctx: Context, voiceId: String): String {
|
|
150
|
+
val packed = File(voicesDir(ctx), "$voiceId.json")
|
|
151
|
+
if (packed.exists()) return packed.absolutePath
|
|
152
|
+
bundledStream(ctx, "voice_styles/$voiceId.json")?.use {
|
|
153
|
+
packed.parentFile?.mkdirs()
|
|
154
|
+
packed.outputStream().use { dst -> it.copyTo(dst) }
|
|
155
|
+
return packed.absolutePath
|
|
156
|
+
}
|
|
157
|
+
return packed.absolutePath
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Wipe every downloaded file under filesDir/RNTTSKit/Supertonic (all
|
|
162
|
+
* precision subdirs + voice_styles). Pre-bundled files in app assets are
|
|
163
|
+
* NOT touched — they're read-only and don't live here. Next call to
|
|
164
|
+
* `ensureModel()` will re-download from the mirror.
|
|
165
|
+
*/
|
|
166
|
+
fun clearCache(ctx: Context) {
|
|
167
|
+
val dir = File(ctx.filesDir, "RNTTSKit/Supertonic")
|
|
168
|
+
val ok = dir.deleteRecursively()
|
|
169
|
+
if (ok) {
|
|
170
|
+
android.util.Log.i("ST.locator", "cleared cache at ${dir.absolutePath}")
|
|
171
|
+
} else {
|
|
172
|
+
android.util.Log.w("ST.locator", "clearCache failed at ${dir.absolutePath}")
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
fun modelExists(ctx: Context): Boolean {
|
|
177
|
+
for (f in ONNX_FILES) {
|
|
178
|
+
val p = resolvedOnnxPath(ctx, f); if (!File(p).exists()) return false
|
|
179
|
+
}
|
|
180
|
+
return VOICE_IDS.any { File(resolvedVoicePath(ctx, it)).exists() }
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/** Build the candidate URL list for a relative path. Tried in order. */
|
|
184
|
+
private fun candidateUrls(relativePath: String): List<String> =
|
|
185
|
+
BASES.map { "$it/$relativePath" }
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* True if `file` is missing or its SHA-256 doesn't match EXPECTED_HASHES.
|
|
189
|
+
* On hash mismatch, deletes the file so the caller re-downloads it. Covers:
|
|
190
|
+
* 1. Mirror revision bumped to a new model build — stale cache invalidates.
|
|
191
|
+
* 2. Partial/corrupted file from an interrupted download.
|
|
192
|
+
* Files without a registered hash (configs not in EXPECTED_HASHES) are
|
|
193
|
+
* trusted on cache hit; only missing/corrupt is detected.
|
|
194
|
+
*/
|
|
195
|
+
private fun needsDownload(file: File, relativePath: String): Boolean {
|
|
196
|
+
if (!file.exists()) return true
|
|
197
|
+
val expected = EXPECTED_HASHES[relativePath] ?: return false
|
|
198
|
+
val actual = sha256(file)
|
|
199
|
+
if (actual.equals(expected, ignoreCase = true)) return false
|
|
200
|
+
android.util.Log.w(
|
|
201
|
+
"ST.locator",
|
|
202
|
+
"cached $relativePath hash mismatch (have ${actual.take(12)}, want ${expected.take(12)}) — re-downloading"
|
|
203
|
+
)
|
|
204
|
+
file.delete()
|
|
205
|
+
return true
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
suspend fun ensureModel(ctx: Context, onProgress: (Long, Long) -> Unit) {
|
|
209
|
+
// (relative path, candidate URL list, destination). First-success-wins per file.
|
|
210
|
+
data class Pending(val rel: String, val urls: List<String>, val dst: File)
|
|
211
|
+
val pending = mutableListOf<Pending>()
|
|
212
|
+
for (f in ONNX_FILES) {
|
|
213
|
+
val dst = File(resolvedOnnxPath(ctx, f))
|
|
214
|
+
// Config files (tts.json, unicode_indexer.json) only live under
|
|
215
|
+
// upstream's onnx/ — quantization doesn't touch them. Pull from
|
|
216
|
+
// the fp32 path regardless of the active precision tier.
|
|
217
|
+
val isConfig = f.endsWith(".json")
|
|
218
|
+
val rel = "${if (isConfig) "onnx" else PRECISION.onnxSubdir}/$f"
|
|
219
|
+
if (needsDownload(dst, rel)) {
|
|
220
|
+
pending.add(Pending(rel, candidateUrls(rel), dst))
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
for (v in VOICE_IDS) {
|
|
224
|
+
val dst = File(resolvedVoicePath(ctx, v))
|
|
225
|
+
val rel = "voice_styles/$v.json"
|
|
226
|
+
if (needsDownload(dst, rel)) {
|
|
227
|
+
pending.add(Pending(rel, candidateUrls(rel), dst))
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
if (pending.isEmpty()) {
|
|
231
|
+
logCachedSize(ctx, "cache hit")
|
|
232
|
+
onProgress(1, 1); return
|
|
233
|
+
}
|
|
234
|
+
android.util.Log.i("ST.locator", "downloading ${pending.size} file(s) (precision=${PRECISION.onnxSubdir})")
|
|
235
|
+
// Discover sizes from whichever mirror responds first. Used for progress
|
|
236
|
+
// accounting only — actual download will surface failures if all mirrors
|
|
237
|
+
// are unreachable.
|
|
238
|
+
val totals = LongArray(pending.size) { i -> firstSuccessfulSize(pending[i].urls) }
|
|
239
|
+
val grandTotal = totals.sum()
|
|
240
|
+
var alreadyDownloaded = 0L
|
|
241
|
+
for ((i, p) in pending.withIndex()) {
|
|
242
|
+
downloadWithFallback(p.urls, p.dst, p.rel) { fileBytes ->
|
|
243
|
+
onProgress(alreadyDownloaded + fileBytes, grandTotal)
|
|
244
|
+
}
|
|
245
|
+
// Log each file's on-disk size so a download summary shows up incrementally.
|
|
246
|
+
val sz = if (p.dst.exists()) p.dst.length() else -1L
|
|
247
|
+
android.util.Log.i("ST.locator", "downloaded ${p.rel} (${formatBytes(sz)})")
|
|
248
|
+
alreadyDownloaded += totals[i]
|
|
249
|
+
}
|
|
250
|
+
onProgress(grandTotal, grandTotal)
|
|
251
|
+
logCachedSize(ctx, "downloaded")
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Sum every file under the active onnx subdir + voice_styles and emit a
|
|
256
|
+
* one-line log. Called from `ensureModel()` whether bytes were pulled or
|
|
257
|
+
* files were already on disk.
|
|
258
|
+
*/
|
|
259
|
+
private fun logCachedSize(ctx: Context, prefix: String) {
|
|
260
|
+
val dirs = listOf(onnxDir(ctx), voicesDir(ctx))
|
|
261
|
+
var total = 0L
|
|
262
|
+
var fileCount = 0
|
|
263
|
+
for (dir in dirs) {
|
|
264
|
+
dir.walkTopDown().forEach { f ->
|
|
265
|
+
if (f.isFile) { total += f.length(); fileCount += 1 }
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
android.util.Log.i(
|
|
269
|
+
"ST.locator",
|
|
270
|
+
"$prefix: ${formatBytes(total)} across $fileCount file(s) under ${supportDir(ctx).absolutePath}"
|
|
271
|
+
)
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/** "138.1 MB" / "1.9 MB" / "8.3 KB" / "—" so logs stay readable. */
|
|
275
|
+
private fun formatBytes(bytes: Long): String {
|
|
276
|
+
if (bytes < 0) return "—"
|
|
277
|
+
val kb = bytes / 1024.0
|
|
278
|
+
if (kb < 1024.0) return "%.1f KB".format(kb)
|
|
279
|
+
val mb = kb / 1024.0
|
|
280
|
+
if (mb < 1024.0) return "%.1f MB".format(mb)
|
|
281
|
+
return "%.2f GB".format(mb / 1024.0)
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
private fun firstSuccessfulSize(urls: List<String>): Long {
|
|
285
|
+
for (u in urls) {
|
|
286
|
+
try {
|
|
287
|
+
val conn = (URL(u).openConnection() as java.net.HttpURLConnection).apply {
|
|
288
|
+
requestMethod = "HEAD"
|
|
289
|
+
connectTimeout = 15_000
|
|
290
|
+
readTimeout = 15_000
|
|
291
|
+
}
|
|
292
|
+
conn.connect()
|
|
293
|
+
if (conn.responseCode in 200..299) {
|
|
294
|
+
val len = conn.contentLengthLong
|
|
295
|
+
conn.disconnect()
|
|
296
|
+
if (len > 0) return len
|
|
297
|
+
}
|
|
298
|
+
conn.disconnect()
|
|
299
|
+
} catch (_: Exception) {
|
|
300
|
+
// try next mirror
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
return 0
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
private fun downloadWithFallback(
|
|
307
|
+
candidates: List<String>,
|
|
308
|
+
destination: File,
|
|
309
|
+
relativePath: String,
|
|
310
|
+
onProgress: (Long) -> Unit
|
|
311
|
+
) {
|
|
312
|
+
var lastError: Exception? = null
|
|
313
|
+
for (url in candidates) {
|
|
314
|
+
try {
|
|
315
|
+
download(url, destination, onProgress)
|
|
316
|
+
// Verify file integrity if we have an expected hash.
|
|
317
|
+
val expected = EXPECTED_HASHES[relativePath]
|
|
318
|
+
if (expected != null) {
|
|
319
|
+
val actual = sha256(destination)
|
|
320
|
+
if (actual.equals(expected, ignoreCase = true)) return
|
|
321
|
+
// Mismatch — delete and try next mirror.
|
|
322
|
+
destination.delete()
|
|
323
|
+
lastError = RuntimeException(
|
|
324
|
+
"Downloaded $relativePath failed SHA-256 check (mirror may be compromised or stale)."
|
|
325
|
+
)
|
|
326
|
+
continue
|
|
327
|
+
}
|
|
328
|
+
return
|
|
329
|
+
} catch (e: Exception) {
|
|
330
|
+
lastError = e
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
throw lastError ?: RuntimeException("All mirrors failed for ${destination.name}")
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
/** Stream-hashes `file` without holding it in memory. */
|
|
337
|
+
private fun sha256(file: File): String {
|
|
338
|
+
val md = MessageDigest.getInstance("SHA-256")
|
|
339
|
+
file.inputStream().use { input ->
|
|
340
|
+
val buf = ByteArray(64 * 1024)
|
|
341
|
+
while (true) {
|
|
342
|
+
val n = input.read(buf); if (n <= 0) break
|
|
343
|
+
md.update(buf, 0, n)
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
return md.digest().joinToString("") { "%02x".format(it) }
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
private fun download(urlStr: String, destination: File, onProgress: (Long) -> Unit) {
|
|
350
|
+
val tmp = File(destination.parentFile, destination.name + ".part")
|
|
351
|
+
tmp.parentFile?.mkdirs()
|
|
352
|
+
val conn = URL(urlStr).openConnection()
|
|
353
|
+
conn.connect()
|
|
354
|
+
var downloaded = 0L
|
|
355
|
+
conn.getInputStream().use { input ->
|
|
356
|
+
tmp.outputStream().use { output ->
|
|
357
|
+
val buffer = ByteArray(64 * 1024)
|
|
358
|
+
while (true) {
|
|
359
|
+
val n = input.read(buffer); if (n <= 0) break
|
|
360
|
+
output.write(buffer, 0, n)
|
|
361
|
+
downloaded += n
|
|
362
|
+
if (downloaded % (256 * 1024) < 64 * 1024) onProgress(downloaded)
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
if (destination.exists()) destination.delete()
|
|
367
|
+
if (!tmp.renameTo(destination)) {
|
|
368
|
+
throw RuntimeException("Failed to install ${destination.absolutePath}")
|
|
369
|
+
}
|
|
370
|
+
onProgress(downloaded)
|
|
371
|
+
}
|
|
372
|
+
}
|