react-native-sherpa-onnx 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +3 -0
  2. package/android/src/main/assets/model_licenses/alignment-models-license-status.csv +5 -0
  3. package/android/src/main/cpp/CMakeLists.txt +3 -0
  4. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-alignment-wrapper.cpp +66 -0
  5. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-alignment-wrapper.h +17 -0
  6. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-alignment.cpp +108 -0
  7. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +30 -0
  8. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-alignment.cpp +66 -0
  9. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-alignment.h +30 -0
  10. package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +21 -0
  11. package/android/src/main/java/com/sherpaonnx/SherpaOnnxAlignmentHelper.kt +555 -0
  12. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +76 -0
  13. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTextSegmenter.kt +330 -0
  14. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +114 -10
  15. package/ios/Resources/model_licenses/alignment-models-license-status.csv +5 -0
  16. package/ios/SherpaOnnx+Alignment.mm +704 -0
  17. package/ios/SherpaOnnx+STT.mm +6 -0
  18. package/ios/SherpaOnnx+TTS.mm +624 -50
  19. package/ios/model_detect/sherpa-onnx-model-detect-alignment.mm +108 -0
  20. package/ios/model_detect/sherpa-onnx-model-detect.h +31 -0
  21. package/ios/model_detect/sherpa-onnx-validate-alignment.h +30 -0
  22. package/ios/model_detect/sherpa-onnx-validate-alignment.mm +66 -0
  23. package/ios/stt/sherpa-onnx-stt-wrapper.h +3 -1
  24. package/ios/stt/sherpa-onnx-stt-wrapper.mm +6 -0
  25. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  26. package/lib/module/alignment/index.js +27 -0
  27. package/lib/module/alignment/index.js.map +1 -0
  28. package/lib/module/alignment/types.js +2 -0
  29. package/lib/module/alignment/types.js.map +1 -0
  30. package/lib/module/alignment/vocab.js +40 -0
  31. package/lib/module/alignment/vocab.js.map +1 -0
  32. package/lib/module/download/paths.js +9 -1
  33. package/lib/module/download/paths.js.map +1 -1
  34. package/lib/module/download/registry.js +17 -1
  35. package/lib/module/download/registry.js.map +1 -1
  36. package/lib/module/download/types.js +1 -0
  37. package/lib/module/download/types.js.map +1 -1
  38. package/lib/module/index.js +6 -4
  39. package/lib/module/index.js.map +1 -1
  40. package/lib/module/licenses.js +8 -2
  41. package/lib/module/licenses.js.map +1 -1
  42. package/lib/module/stt/types.js.map +1 -1
  43. package/lib/module/tts/index.js +68 -2
  44. package/lib/module/tts/index.js.map +1 -1
  45. package/lib/module/tts/subtitles.js +400 -0
  46. package/lib/module/tts/subtitles.js.map +1 -0
  47. package/lib/module/tts/tempAudio.js +17 -0
  48. package/lib/module/tts/tempAudio.js.map +1 -0
  49. package/lib/module/tts/types.js.map +1 -1
  50. package/lib/typescript/src/NativeSherpaOnnx.d.ts +34 -3
  51. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  52. package/lib/typescript/src/alignment/index.d.ts +8 -0
  53. package/lib/typescript/src/alignment/index.d.ts.map +1 -0
  54. package/lib/typescript/src/alignment/types.d.ts +23 -0
  55. package/lib/typescript/src/alignment/types.d.ts.map +1 -0
  56. package/lib/typescript/src/alignment/vocab.d.ts +5 -0
  57. package/lib/typescript/src/alignment/vocab.d.ts.map +1 -0
  58. package/lib/typescript/src/download/paths.d.ts +5 -2
  59. package/lib/typescript/src/download/paths.d.ts.map +1 -1
  60. package/lib/typescript/src/download/registry.d.ts.map +1 -1
  61. package/lib/typescript/src/download/types.d.ts +2 -1
  62. package/lib/typescript/src/download/types.d.ts.map +1 -1
  63. package/lib/typescript/src/index.d.ts +1 -0
  64. package/lib/typescript/src/index.d.ts.map +1 -1
  65. package/lib/typescript/src/licenses.d.ts.map +1 -1
  66. package/lib/typescript/src/stt/types.d.ts +5 -2
  67. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  68. package/lib/typescript/src/tts/index.d.ts +2 -1
  69. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  70. package/lib/typescript/src/tts/subtitles.d.ts +24 -0
  71. package/lib/typescript/src/tts/subtitles.d.ts.map +1 -0
  72. package/lib/typescript/src/tts/tempAudio.d.ts +3 -0
  73. package/lib/typescript/src/tts/tempAudio.d.ts.map +1 -0
  74. package/lib/typescript/src/tts/types.d.ts +68 -2
  75. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  76. package/package.json +6 -1
  77. package/scripts/alignment-models/README.md +90 -0
  78. package/scripts/alignment-models/build_and_upload.js +724 -0
  79. package/scripts/alignment-models/sources.csv +5 -0
  80. package/scripts/alignment-models/sync_alignment_license_status.js +123 -0
  81. package/src/NativeSherpaOnnx.ts +35 -3
  82. package/src/alignment/index.ts +41 -0
  83. package/src/alignment/types.ts +22 -0
  84. package/src/alignment/vocab.ts +38 -0
  85. package/src/download/paths.ts +18 -5
  86. package/src/download/registry.ts +23 -3
  87. package/src/download/types.ts +1 -0
  88. package/src/index.tsx +6 -4
  89. package/src/licenses.ts +12 -1
  90. package/src/stt/types.ts +5 -2
  91. package/src/tts/index.ts +110 -3
  92. package/src/tts/subtitles.ts +611 -0
  93. package/src/tts/tempAudio.ts +31 -0
  94. package/src/tts/types.ts +79 -2
  95. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
@@ -0,0 +1,330 @@
1
+ package com.sherpaonnx
2
+
3
+ import kotlin.math.floor
4
+ import kotlin.math.max
5
+
6
+ internal data class SubtitleTimingItem(
7
+ val text: String,
8
+ val start: Double,
9
+ val end: Double
10
+ )
11
+
12
+ internal object SherpaOnnxTextSegmenter {
13
+ private val sentenceTerminators = setOf('.', '!', '?', ';', '。', '!', '?', ';')
14
+ private val trailingClosers = setOf('"', '\'', ')', ']', '}', '>', '”', '’', '」', '』', '】', ')')
15
+ private val commonAbbreviations = setOf(
16
+ "mr",
17
+ "mrs",
18
+ "ms",
19
+ "dr",
20
+ "prof",
21
+ "sr",
22
+ "jr",
23
+ "st",
24
+ "vs",
25
+ "etc",
26
+ "e.g",
27
+ "i.e"
28
+ )
29
+
30
+ fun splitIntoSentences(text: String): List<String> {
31
+ val normalized = text.trim()
32
+ if (normalized.isEmpty()) return emptyList()
33
+
34
+ val out = mutableListOf<String>()
35
+ var start = 0
36
+ var i = 0
37
+
38
+ while (i < normalized.length) {
39
+ val current = normalized[i]
40
+ if (!isSentenceTerminator(current)) {
41
+ i += 1
42
+ continue
43
+ }
44
+
45
+ if (current == '.' && !shouldSplitOnPeriod(normalized, i)) {
46
+ i += 1
47
+ continue
48
+ }
49
+
50
+ val end = sentenceBoundaryEnd(normalized, i)
51
+ val next = normalized.getOrNull(end)
52
+ if (next != null && !next.isWhitespace()) {
53
+ i += 1
54
+ continue
55
+ }
56
+
57
+ val sentence = normalized.substring(start, end).trim()
58
+ if (sentence.isNotEmpty()) {
59
+ out += sentence
60
+ }
61
+
62
+ start = end
63
+ while (start < normalized.length && normalized[start].isWhitespace()) {
64
+ start += 1
65
+ }
66
+ i = start
67
+ }
68
+
69
+ val tail = if (start < normalized.length) normalized.substring(start).trim() else ""
70
+ if (tail.isNotEmpty()) {
71
+ out += tail
72
+ }
73
+
74
+ return if (out.isNotEmpty()) out else listOf(normalized)
75
+ }
76
+
77
+ fun splitIntoWords(text: String): List<String> {
78
+ val normalized = text.trim()
79
+ if (normalized.isEmpty()) return emptyList()
80
+
81
+ val out = mutableListOf<String>()
82
+ val current = StringBuilder()
83
+
84
+ fun flushCurrent() {
85
+ val token = current.toString().trim()
86
+ if (token.isNotEmpty()) {
87
+ out += token
88
+ }
89
+ current.clear()
90
+ }
91
+
92
+ for (char in normalized) {
93
+ when {
94
+ char.isWhitespace() -> flushCurrent()
95
+ isCjkChar(char) -> {
96
+ flushCurrent()
97
+ out += char.toString()
98
+ }
99
+ isWordDelimiter(char) -> flushCurrent()
100
+ else -> current.append(char)
101
+ }
102
+ }
103
+
104
+ flushCurrent()
105
+ return if (out.isNotEmpty()) out else listOf(normalized)
106
+ }
107
+
108
+ fun buildSubtitlesFromChunks(
109
+ segments: List<String>,
110
+ chunkSampleCounts: List<Int>,
111
+ sampleRate: Int
112
+ ): List<SubtitleTimingItem> {
113
+ if (sampleRate <= 0) return emptyList()
114
+
115
+ val cleanedSegments = sanitizeSegments(segments)
116
+ if (cleanedSegments.isEmpty()) return emptyList()
117
+
118
+ val alignedCounts = alignChunkCountsToSegments(cleanedSegments, chunkSampleCounts)
119
+
120
+ val subtitles = mutableListOf<SubtitleTimingItem>()
121
+ var offsetSamples = 0
122
+
123
+ for (index in cleanedSegments.indices) {
124
+ val samples = alignedCounts.getOrElse(index) { 0 }.coerceAtLeast(0)
125
+ if (samples == 0 && offsetSamples == 0) {
126
+ continue
127
+ }
128
+
129
+ val startSec = offsetSamples.toDouble() / sampleRate.toDouble()
130
+ offsetSamples += samples
131
+ val endSec = offsetSamples.toDouble() / sampleRate.toDouble()
132
+
133
+ subtitles += SubtitleTimingItem(
134
+ text = cleanedSegments[index],
135
+ start = startSec,
136
+ end = endSec
137
+ )
138
+ }
139
+
140
+ return subtitles
141
+ }
142
+
143
+ fun buildWordSubtitlesFromSentenceChunks(
144
+ sentences: List<String>,
145
+ sentenceChunkSampleCounts: List<Int>,
146
+ sampleRate: Int
147
+ ): List<SubtitleTimingItem> {
148
+ val cleanedSentences = sanitizeSegments(sentences)
149
+ if (cleanedSentences.isEmpty()) return emptyList()
150
+
151
+ val alignedSentenceCounts = alignChunkCountsToSegments(
152
+ cleanedSentences,
153
+ sentenceChunkSampleCounts
154
+ )
155
+
156
+ val wordSegments = mutableListOf<String>()
157
+ val wordChunkCounts = mutableListOf<Int>()
158
+
159
+ for (index in cleanedSentences.indices) {
160
+ val sentence = cleanedSentences[index]
161
+ val sentenceSamples = alignedSentenceCounts.getOrElse(index) { 0 }.coerceAtLeast(0)
162
+ val words = splitIntoWords(sentence)
163
+ if (words.isEmpty()) continue
164
+
165
+ val distributed = distributeSamplesByTextWeight(sentenceSamples, words)
166
+ for (wordIndex in words.indices) {
167
+ wordSegments += words[wordIndex]
168
+ wordChunkCounts += distributed.getOrElse(wordIndex) { 0 }
169
+ }
170
+ }
171
+
172
+ return buildSubtitlesFromChunks(wordSegments, wordChunkCounts, sampleRate)
173
+ }
174
+
175
+ private fun sanitizeSegments(segments: List<String>): List<String> {
176
+ return segments
177
+ .map { it.trim() }
178
+ .filter { it.isNotEmpty() }
179
+ }
180
+
181
+ private fun alignChunkCountsToSegments(
182
+ segments: List<String>,
183
+ chunkSampleCounts: List<Int>
184
+ ): List<Int> {
185
+ if (segments.isEmpty()) return emptyList()
186
+
187
+ val counts = chunkSampleCounts.map { max(0, it) }
188
+ if (counts.size == segments.size) {
189
+ return counts
190
+ }
191
+
192
+ if (counts.size > segments.size) {
193
+ val merged = counts.take(segments.size).toMutableList()
194
+ val extra = counts.drop(segments.size).sum()
195
+ if (merged.isNotEmpty()) {
196
+ val lastIndex = merged.lastIndex
197
+ merged[lastIndex] = merged[lastIndex] + extra
198
+ }
199
+ return merged
200
+ }
201
+
202
+ return distributeSamplesByTextWeight(counts.sum(), segments)
203
+ }
204
+
205
+ private fun distributeSamplesByTextWeight(totalSamples: Int, segments: List<String>): List<Int> {
206
+ if (segments.isEmpty()) return emptyList()
207
+
208
+ val safeTotal = totalSamples.coerceAtLeast(0)
209
+ if (safeTotal == 0) {
210
+ return List(segments.size) { 0 }
211
+ }
212
+
213
+ val weights = segments.map { max(1, it.length) }
214
+ val weightSum = weights.sum().coerceAtLeast(1)
215
+
216
+ val base = MutableList(segments.size) { 0 }
217
+ val fractions = mutableListOf<Pair<Int, Double>>()
218
+
219
+ for (index in segments.indices) {
220
+ val exact = (safeTotal.toDouble() * weights[index].toDouble()) / weightSum.toDouble()
221
+ val floorValue = floor(exact).toInt()
222
+ base[index] = floorValue
223
+ fractions += index to (exact - floorValue.toDouble())
224
+ }
225
+
226
+ var assigned = base.sum()
227
+ var remaining = safeTotal - assigned
228
+
229
+ if (remaining > 0) {
230
+ val order = fractions.sortedByDescending { it.second }
231
+ var ptr = 0
232
+ while (remaining > 0 && order.isNotEmpty()) {
233
+ val target = order[ptr % order.size].first
234
+ base[target] = base[target] + 1
235
+ assigned += 1
236
+ remaining = safeTotal - assigned
237
+ ptr += 1
238
+ }
239
+ }
240
+
241
+ return base
242
+ }
243
+
244
+ private fun isSentenceTerminator(char: Char): Boolean {
245
+ return sentenceTerminators.contains(char)
246
+ }
247
+
248
+ private fun shouldSplitOnPeriod(text: String, periodIndex: Int): Boolean {
249
+ val prev = text.getOrNull(periodIndex - 1)
250
+ val next = text.getOrNull(periodIndex + 1)
251
+
252
+ if (prev != null && next != null && prev.isDigit() && next.isDigit()) {
253
+ return false
254
+ }
255
+
256
+ val tokenRaw = extractTokenBeforePeriod(text, periodIndex)
257
+ val tokenLower = tokenRaw.lowercase()
258
+ if (commonAbbreviations.contains(tokenLower)) {
259
+ return false
260
+ }
261
+
262
+ // Likely initial, e.g. "A. Smith" — use original case; tokenLower[0] is never uppercase.
263
+ if (tokenRaw.length == 1 && tokenRaw[0].isUpperCase()) {
264
+ return false
265
+ }
266
+
267
+ return true
268
+ }
269
+
270
+ private fun extractTokenBeforePeriod(text: String, periodIndex: Int): String {
271
+ var i = periodIndex - 1
272
+ while (i >= 0 && text[i].isWhitespace()) {
273
+ i -= 1
274
+ }
275
+
276
+ val end = i
277
+ while (i >= 0) {
278
+ val c = text[i]
279
+ if (c.isLetter() || c == '.') {
280
+ i -= 1
281
+ continue
282
+ }
283
+ break
284
+ }
285
+
286
+ if (end < i + 1) return ""
287
+
288
+ var token = text.substring(i + 1, end + 1)
289
+ while (token.endsWith('.')) {
290
+ token = token.dropLast(1)
291
+ }
292
+ return token
293
+ }
294
+
295
+ private fun sentenceBoundaryEnd(text: String, startIndex: Int): Int {
296
+ var end = startIndex + 1
297
+ while (end < text.length && isSentenceTerminator(text[end])) {
298
+ end += 1
299
+ }
300
+ while (end < text.length && trailingClosers.contains(text[end])) {
301
+ end += 1
302
+ }
303
+ return end
304
+ }
305
+
306
+ private fun isCjkChar(char: Char): Boolean {
307
+ val block = Character.UnicodeBlock.of(char)
308
+ return block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS ||
309
+ block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A ||
310
+ block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B ||
311
+ block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C ||
312
+ block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D ||
313
+ block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E ||
314
+ block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F ||
315
+ block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G ||
316
+ block == Character.UnicodeBlock.HIRAGANA ||
317
+ block == Character.UnicodeBlock.KATAKANA ||
318
+ block == Character.UnicodeBlock.HANGUL_SYLLABLES
319
+ }
320
+
321
+ private fun isWordDelimiter(char: Char): Boolean {
322
+ return when (char) {
323
+ '.', ',', '!', '?', ';', ':', '(', ')', '[', ']', '{', '}',
324
+ '"', '\'', '`', '~', '<', '>', '/', '\\', '|', '@', '#', '$',
325
+ '%', '^', '&', '*', '+', '=', '…', ',', '。', '!', '?', ';',
326
+ ':', '、' -> true
327
+ else -> false
328
+ }
329
+ }
330
+ }
@@ -491,10 +491,66 @@ internal class SherpaOnnxTtsHelper(
491
491
  promise.reject("TTS_GENERATE_ERROR", "TTS not initialized")
492
492
  return
493
493
  }
494
+
495
+ val subtitleMode = getSubtitleMode(options)
496
+ val subtitleGranularity = getSubtitleGranularity(options)
497
+ if (isCharacterGranularityRequested(options) && subtitleMode != "accurate") {
498
+ Log.e(
499
+ "SherpaOnnxTts",
500
+ "TTS_SUBTITLE_ERROR: Character granularity is only supported when subtitleMode is 'accurate'"
501
+ )
502
+ promise.reject(
503
+ "TTS_SUBTITLE_ERROR",
504
+ "Character granularity is only supported when subtitleMode is 'accurate'."
505
+ )
506
+ return
507
+ }
508
+
494
509
  val sid = getSid(options)
495
510
  val speed = getSpeed(options)
496
511
  val sentenceChunkSizes = mutableListOf<Int>()
497
512
  val audio = when {
513
+ subtitleMode == "off" -> {
514
+ when {
515
+ hasReferenceAudio(options) && (inst.isZipvoice || inst.isPocket) -> {
516
+ if (inst.isZipvoice) {
517
+ val promptText = options!!.getString("referenceText")?.trim().orEmpty()
518
+ if (promptText.isEmpty()) {
519
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Zipvoice voice cloning requires non-empty referenceText")
520
+ promise.reject(
521
+ "TTS_GENERATE_ERROR",
522
+ "Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio)."
523
+ )
524
+ return
525
+ }
526
+ }
527
+ val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
528
+ inst.tts!!.generateWithConfig(text, config)
529
+ }
530
+ hasReferenceAudio(options) -> {
531
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Reference audio is not supported for this TTS model type")
532
+ promise.reject(
533
+ "TTS_GENERATE_ERROR",
534
+ "Reference audio is only supported for Zipvoice and Pocket TTS."
535
+ )
536
+ return
537
+ }
538
+ inst.isPocket -> {
539
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
540
+ promise.reject(
541
+ "TTS_GENERATE_ERROR",
542
+ "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
543
+ )
544
+ return
545
+ }
546
+ else -> dispatchGenerate(inst, text, sid, speed)
547
+ ?: run {
548
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: TTS not initialized")
549
+ promise.reject("TTS_GENERATE_ERROR", "TTS not initialized")
550
+ return
551
+ }
552
+ }
553
+ }
498
554
  hasReferenceAudio(options) && (inst.isZipvoice || inst.isPocket) -> {
499
555
  if (inst.isZipvoice) {
500
556
  val promptText = options!!.getString("referenceText")?.trim().orEmpty()
@@ -540,6 +596,11 @@ internal class SherpaOnnxTtsHelper(
540
596
  tts.generateWithCallback(text, sid, speed, ttsChunkCallbackForJni(sentenceChunkSizes))
541
597
  }
542
598
  }
599
+
600
+ if (subtitleMode != "off" && sentenceChunkSizes.isEmpty() && audio.samples.isNotEmpty()) {
601
+ sentenceChunkSizes.add(audio.samples.size)
602
+ }
603
+
543
604
  val map = Arguments.createMap()
544
605
  val samplesArray = Arguments.createArray()
545
606
  for (sample in audio.samples) {
@@ -547,17 +608,29 @@ internal class SherpaOnnxTtsHelper(
547
608
  }
548
609
  map.putArray("samples", samplesArray)
549
610
  map.putInt("sampleRate", audio.sampleRate)
550
- val subtitlesArray = Arguments.createArray()
551
- if (audio.samples.isNotEmpty() && audio.sampleRate > 0) {
552
- val durationSec = audio.samples.size.toDouble() / audio.sampleRate
553
- val subtitleMap = Arguments.createMap()
554
- subtitleMap.putString("text", text)
555
- subtitleMap.putDouble("start", 0.0)
556
- subtitleMap.putDouble("end", durationSec)
557
- subtitlesArray.pushMap(subtitleMap)
611
+
612
+ val subtitleItems = if (subtitleMode == "off") {
613
+ emptyList()
614
+ } else {
615
+ val sentenceSegments = SherpaOnnxTextSegmenter.splitIntoSentences(text)
616
+ if (subtitleGranularity == "word") {
617
+ SherpaOnnxTextSegmenter.buildWordSubtitlesFromSentenceChunks(
618
+ sentenceSegments,
619
+ sentenceChunkSizes,
620
+ audio.sampleRate
621
+ )
622
+ } else {
623
+ SherpaOnnxTextSegmenter.buildSubtitlesFromChunks(
624
+ sentenceSegments,
625
+ sentenceChunkSizes,
626
+ audio.sampleRate
627
+ )
628
+ }
558
629
  }
559
- map.putArray("subtitles", subtitlesArray)
560
- map.putBoolean("estimated", true)
630
+
631
+ map.putArray("subtitles", toSubtitleWritableArray(subtitleItems))
632
+ val timingMode = if (subtitleMode == "off") "off" else "estimated"
633
+ map.putString("timingMode", timingMode)
561
634
  promise.resolve(map)
562
635
  } catch (e: Exception) {
563
636
  Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: ${e.message ?: "Failed to generate speech"}", e)
@@ -968,6 +1041,37 @@ internal class SherpaOnnxTtsHelper(
968
1041
  private fun getSpeed(options: ReadableMap?): Float =
969
1042
  if (options != null && options.hasKey("speed")) options.getDouble("speed").toFloat() else 1.0f
970
1043
 
1044
+ private fun getSubtitleMode(options: ReadableMap?): String {
1045
+ val raw = options?.getString("subtitleMode")?.trim()?.lowercase()
1046
+ return when (raw) {
1047
+ "off", "fast", "accurate" -> raw
1048
+ else -> "fast"
1049
+ }
1050
+ }
1051
+
1052
+ private fun getSubtitleGranularity(options: ReadableMap?): String {
1053
+ val raw = options?.getString("subtitleGranularity")?.trim()?.lowercase()
1054
+ return when (raw) {
1055
+ "word", "sentence" -> raw
1056
+ else -> "sentence"
1057
+ }
1058
+ }
1059
+
1060
+ private fun isCharacterGranularityRequested(options: ReadableMap?): Boolean {
1061
+ val raw = options?.getString("subtitleGranularity")?.trim()?.lowercase()
1062
+ return raw == "character"
1063
+ }
1064
+
1065
+ private fun toSubtitleWritableArray(items: List<SubtitleTimingItem>) = Arguments.createArray().apply {
1066
+ for (item in items) {
1067
+ val subtitleMap = Arguments.createMap()
1068
+ subtitleMap.putString("text", item.text)
1069
+ subtitleMap.putDouble("start", item.start)
1070
+ subtitleMap.putDouble("end", item.end)
1071
+ pushMap(subtitleMap)
1072
+ }
1073
+ }
1074
+
971
1075
  /** Build Kotlin GenerationConfig from ReadableMap. Returns null only when options is null; otherwise returns a config with sid, speed, silenceScale, numSteps, and any reference/extra fields from options. */
972
1076
  private fun parseGenerationConfig(options: ReadableMap?): GenerationConfig? {
973
1077
  if (options == null) return null
@@ -0,0 +1,5 @@
1
+ asset_name,license_type,commercial_use,confidence,detection_source,license_file
2
+ wav2vec2-base-960h-fp16.tar.bz2,apache-2.0,yes,high,manual,https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md
3
+ wav2vec2-base-960h-int8.tar.bz2,apache-2.0,yes,high,manual,https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md
4
+ wav2vec2-base-960h-q4f16.tar.bz2,apache-2.0,yes,high,manual,https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md
5
+ wav2vec2-base-960h.tar.bz2,apache-2.0,yes,high,manual,https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md