npm - react-native-sherpa-onnx - Versions diffs - 0.4.0 → 0.4.2 - Mend

react-native-sherpa-onnx 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

package/README.md +3 -0
package/android/src/main/assets/model_licenses/alignment-models-license-status.csv +5 -0
package/android/src/main/cpp/CMakeLists.txt +3 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-alignment-wrapper.cpp +66 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-alignment-wrapper.h +17 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-alignment.cpp +108 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +30 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-alignment.cpp +66 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-alignment.h +30 -0
package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +21 -0
package/android/src/main/java/com/sherpaonnx/SherpaOnnxAlignmentHelper.kt +555 -0
package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +76 -0
package/android/src/main/java/com/sherpaonnx/SherpaOnnxTextSegmenter.kt +330 -0
package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +180 -23
package/ios/Resources/model_licenses/alignment-models-license-status.csv +5 -0
package/ios/SherpaOnnx+Alignment.mm +704 -0
package/ios/SherpaOnnx+STT.mm +6 -0
package/ios/SherpaOnnx+TTS.mm +624 -50
package/ios/model_detect/sherpa-onnx-model-detect-alignment.mm +108 -0
package/ios/model_detect/sherpa-onnx-model-detect.h +31 -0
package/ios/model_detect/sherpa-onnx-validate-alignment.h +30 -0
package/ios/model_detect/sherpa-onnx-validate-alignment.mm +66 -0
package/ios/stt/sherpa-onnx-stt-wrapper.h +3 -1
package/ios/stt/sherpa-onnx-stt-wrapper.mm +6 -0
package/lib/module/NativeSherpaOnnx.js.map +1 -1
package/lib/module/alignment/index.js +27 -0
package/lib/module/alignment/index.js.map +1 -0
package/lib/module/alignment/types.js +2 -0
package/lib/module/alignment/types.js.map +1 -0
package/lib/module/alignment/vocab.js +40 -0
package/lib/module/alignment/vocab.js.map +1 -0
package/lib/module/download/paths.js +9 -1
package/lib/module/download/paths.js.map +1 -1
package/lib/module/download/registry.js +17 -1
package/lib/module/download/registry.js.map +1 -1
package/lib/module/download/types.js +1 -0
package/lib/module/download/types.js.map +1 -1
package/lib/module/index.js +6 -4
package/lib/module/index.js.map +1 -1
package/lib/module/licenses.js +8 -2
package/lib/module/licenses.js.map +1 -1
package/lib/module/stt/types.js.map +1 -1
package/lib/module/tts/index.js +68 -2
package/lib/module/tts/index.js.map +1 -1
package/lib/module/tts/subtitles.js +400 -0
package/lib/module/tts/subtitles.js.map +1 -0
package/lib/module/tts/tempAudio.js +17 -0
package/lib/module/tts/tempAudio.js.map +1 -0
package/lib/module/tts/types.js.map +1 -1
package/lib/typescript/src/NativeSherpaOnnx.d.ts +34 -3
package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
package/lib/typescript/src/alignment/index.d.ts +8 -0
package/lib/typescript/src/alignment/index.d.ts.map +1 -0
package/lib/typescript/src/alignment/types.d.ts +23 -0
package/lib/typescript/src/alignment/types.d.ts.map +1 -0
package/lib/typescript/src/alignment/vocab.d.ts +5 -0
package/lib/typescript/src/alignment/vocab.d.ts.map +1 -0
package/lib/typescript/src/download/paths.d.ts +5 -2
package/lib/typescript/src/download/paths.d.ts.map +1 -1
package/lib/typescript/src/download/registry.d.ts.map +1 -1
package/lib/typescript/src/download/types.d.ts +2 -1
package/lib/typescript/src/download/types.d.ts.map +1 -1
package/lib/typescript/src/index.d.ts +1 -0
package/lib/typescript/src/index.d.ts.map +1 -1
package/lib/typescript/src/licenses.d.ts.map +1 -1
package/lib/typescript/src/stt/types.d.ts +5 -2
package/lib/typescript/src/stt/types.d.ts.map +1 -1
package/lib/typescript/src/tts/index.d.ts +2 -1
package/lib/typescript/src/tts/index.d.ts.map +1 -1
package/lib/typescript/src/tts/subtitles.d.ts +24 -0
package/lib/typescript/src/tts/subtitles.d.ts.map +1 -0
package/lib/typescript/src/tts/tempAudio.d.ts +3 -0
package/lib/typescript/src/tts/tempAudio.d.ts.map +1 -0
package/lib/typescript/src/tts/types.d.ts +68 -2
package/lib/typescript/src/tts/types.d.ts.map +1 -1
package/package.json +6 -1
package/scripts/alignment-models/README.md +90 -0
package/scripts/alignment-models/build_and_upload.js +724 -0
package/scripts/alignment-models/sources.csv +5 -0
package/scripts/alignment-models/sync_alignment_license_status.js +123 -0
package/src/NativeSherpaOnnx.ts +35 -3
package/src/alignment/index.ts +41 -0
package/src/alignment/types.ts +22 -0
package/src/alignment/vocab.ts +38 -0
package/src/download/paths.ts +18 -5
package/src/download/registry.ts +23 -3
package/src/download/types.ts +1 -0
package/src/index.tsx +6 -4
package/src/licenses.ts +12 -1
package/src/stt/types.ts +5 -2
package/src/tts/index.ts +110 -3
package/src/tts/subtitles.ts +611 -0
package/src/tts/tempAudio.ts +31 -0
package/src/tts/types.ts +79 -2
package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1

package/android/src/main/java/com/sherpaonnx/SherpaOnnxTextSegmenter.kt ADDED Viewed

@@ -0,0 +1,330 @@
+package com.sherpaonnx
+import kotlin.math.floor
+import kotlin.math.max
+internal data class SubtitleTimingItem(
+  val text: String,
+  val start: Double,
+  val end: Double
+)
+internal object SherpaOnnxTextSegmenter {
+  private val sentenceTerminators = setOf('.', '!', '?', ';', '。', '！', '？', '；')
+  private val trailingClosers = setOf('"', '\'', ')', ']', '}', '>', '”', '’', '」', '』', '】', '）')
+  private val commonAbbreviations = setOf(
+    "mr",
+    "mrs",
+    "ms",
+    "dr",
+    "prof",
+    "sr",
+    "jr",
+    "st",
+    "vs",
+    "etc",
+    "e.g",
+    "i.e"
+  )
+  fun splitIntoSentences(text: String): List<String> {
+    val normalized = text.trim()
+    if (normalized.isEmpty()) return emptyList()
+    val out = mutableListOf<String>()
+    var start = 0
+    var i = 0
+    while (i < normalized.length) {
+      val current = normalized[i]
+      if (!isSentenceTerminator(current)) {
+        i += 1
+        continue
+      }
+      if (current == '.' && !shouldSplitOnPeriod(normalized, i)) {
+        i += 1
+        continue
+      }
+      val end = sentenceBoundaryEnd(normalized, i)
+      val next = normalized.getOrNull(end)
+      if (next != null && !next.isWhitespace()) {
+        i += 1
+        continue
+      }
+      val sentence = normalized.substring(start, end).trim()
+      if (sentence.isNotEmpty()) {
+        out += sentence
+      }
+      start = end
+      while (start < normalized.length && normalized[start].isWhitespace()) {
+        start += 1
+      }
+      i = start
+    }
+    val tail = if (start < normalized.length) normalized.substring(start).trim() else ""
+    if (tail.isNotEmpty()) {
+      out += tail
+    }
+    return if (out.isNotEmpty()) out else listOf(normalized)
+  }
+  fun splitIntoWords(text: String): List<String> {
+    val normalized = text.trim()
+    if (normalized.isEmpty()) return emptyList()
+    val out = mutableListOf<String>()
+    val current = StringBuilder()
+    fun flushCurrent() {
+      val token = current.toString().trim()
+      if (token.isNotEmpty()) {
+        out += token
+      }
+      current.clear()
+    }
+    for (char in normalized) {
+      when {
+        char.isWhitespace() -> flushCurrent()
+        isCjkChar(char) -> {
+          flushCurrent()
+          out += char.toString()
+        }
+        isWordDelimiter(char) -> flushCurrent()
+        else -> current.append(char)
+      }
+    }
+    flushCurrent()
+    return if (out.isNotEmpty()) out else listOf(normalized)
+  }
+  fun buildSubtitlesFromChunks(
+    segments: List<String>,
+    chunkSampleCounts: List<Int>,
+    sampleRate: Int
+  ): List<SubtitleTimingItem> {
+    if (sampleRate <= 0) return emptyList()
+    val cleanedSegments = sanitizeSegments(segments)
+    if (cleanedSegments.isEmpty()) return emptyList()
+    val alignedCounts = alignChunkCountsToSegments(cleanedSegments, chunkSampleCounts)
+    val subtitles = mutableListOf<SubtitleTimingItem>()
+    var offsetSamples = 0
+    for (index in cleanedSegments.indices) {
+      val samples = alignedCounts.getOrElse(index) { 0 }.coerceAtLeast(0)
+      if (samples == 0 && offsetSamples == 0) {
+        continue
+      }
+      val startSec = offsetSamples.toDouble() / sampleRate.toDouble()
+      offsetSamples += samples
+      val endSec = offsetSamples.toDouble() / sampleRate.toDouble()
+      subtitles += SubtitleTimingItem(
+        text = cleanedSegments[index],
+        start = startSec,
+        end = endSec
+      )
+    }
+    return subtitles
+  }
+  fun buildWordSubtitlesFromSentenceChunks(
+    sentences: List<String>,
+    sentenceChunkSampleCounts: List<Int>,
+    sampleRate: Int
+  ): List<SubtitleTimingItem> {
+    val cleanedSentences = sanitizeSegments(sentences)
+    if (cleanedSentences.isEmpty()) return emptyList()
+    val alignedSentenceCounts = alignChunkCountsToSegments(
+      cleanedSentences,
+      sentenceChunkSampleCounts
+    )
+    val wordSegments = mutableListOf<String>()
+    val wordChunkCounts = mutableListOf<Int>()
+    for (index in cleanedSentences.indices) {
+      val sentence = cleanedSentences[index]
+      val sentenceSamples = alignedSentenceCounts.getOrElse(index) { 0 }.coerceAtLeast(0)
+      val words = splitIntoWords(sentence)
+      if (words.isEmpty()) continue
+      val distributed = distributeSamplesByTextWeight(sentenceSamples, words)
+      for (wordIndex in words.indices) {
+        wordSegments += words[wordIndex]
+        wordChunkCounts += distributed.getOrElse(wordIndex) { 0 }
+      }
+    }
+    return buildSubtitlesFromChunks(wordSegments, wordChunkCounts, sampleRate)
+  }
+  private fun sanitizeSegments(segments: List<String>): List<String> {
+    return segments
+      .map { it.trim() }
+      .filter { it.isNotEmpty() }
+  }
+  private fun alignChunkCountsToSegments(
+    segments: List<String>,
+    chunkSampleCounts: List<Int>
+  ): List<Int> {
+    if (segments.isEmpty()) return emptyList()
+    val counts = chunkSampleCounts.map { max(0, it) }
+    if (counts.size == segments.size) {
+      return counts
+    }
+    if (counts.size > segments.size) {
+      val merged = counts.take(segments.size).toMutableList()
+      val extra = counts.drop(segments.size).sum()
+      if (merged.isNotEmpty()) {
+        val lastIndex = merged.lastIndex
+        merged[lastIndex] = merged[lastIndex] + extra
+      }
+      return merged
+    }
+    return distributeSamplesByTextWeight(counts.sum(), segments)
+  }
+  private fun distributeSamplesByTextWeight(totalSamples: Int, segments: List<String>): List<Int> {
+    if (segments.isEmpty()) return emptyList()
+    val safeTotal = totalSamples.coerceAtLeast(0)
+    if (safeTotal == 0) {
+      return List(segments.size) { 0 }
+    }
+    val weights = segments.map { max(1, it.length) }
+    val weightSum = weights.sum().coerceAtLeast(1)
+    val base = MutableList(segments.size) { 0 }
+    val fractions = mutableListOf<Pair<Int, Double>>()
+    for (index in segments.indices) {
+      val exact = (safeTotal.toDouble() * weights[index].toDouble()) / weightSum.toDouble()
+      val floorValue = floor(exact).toInt()
+      base[index] = floorValue
+      fractions += index to (exact - floorValue.toDouble())
+    }
+    var assigned = base.sum()
+    var remaining = safeTotal - assigned
+    if (remaining > 0) {
+      val order = fractions.sortedByDescending { it.second }
+      var ptr = 0
+      while (remaining > 0 && order.isNotEmpty()) {
+        val target = order[ptr % order.size].first
+        base[target] = base[target] + 1
+        assigned += 1
+        remaining = safeTotal - assigned
+        ptr += 1
+      }
+    }
+    return base
+  }
+  private fun isSentenceTerminator(char: Char): Boolean {
+    return sentenceTerminators.contains(char)
+  }
+  private fun shouldSplitOnPeriod(text: String, periodIndex: Int): Boolean {
+    val prev = text.getOrNull(periodIndex - 1)
+    val next = text.getOrNull(periodIndex + 1)
+    if (prev != null && next != null && prev.isDigit() && next.isDigit()) {
+      return false
+    }
+    val tokenRaw = extractTokenBeforePeriod(text, periodIndex)
+    val tokenLower = tokenRaw.lowercase()
+    if (commonAbbreviations.contains(tokenLower)) {
+      return false
+    }
+    // Likely initial, e.g. "A. Smith" — use original case; tokenLower[0] is never uppercase.
+    if (tokenRaw.length == 1 && tokenRaw[0].isUpperCase()) {
+      return false
+    }
+    return true
+  }
+  private fun extractTokenBeforePeriod(text: String, periodIndex: Int): String {
+    var i = periodIndex - 1
+    while (i >= 0 && text[i].isWhitespace()) {
+      i -= 1
+    }
+    val end = i
+    while (i >= 0) {
+      val c = text[i]
+      if (c.isLetter() || c == '.') {
+        i -= 1
+        continue
+      }
+      break
+    }
+    if (end < i + 1) return ""
+    var token = text.substring(i + 1, end + 1)
+    while (token.endsWith('.')) {
+      token = token.dropLast(1)
+    }
+    return token
+  }
+  private fun sentenceBoundaryEnd(text: String, startIndex: Int): Int {
+    var end = startIndex + 1
+    while (end < text.length && isSentenceTerminator(text[end])) {
+      end += 1
+    }
+    while (end < text.length && trailingClosers.contains(text[end])) {
+      end += 1
+    }
+    return end
+  }
+  private fun isCjkChar(char: Char): Boolean {
+    val block = Character.UnicodeBlock.of(char)
+    return block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS ||
+      block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A ||
+      block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B ||
+      block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C ||
+      block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D ||
+      block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E ||
+      block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F ||
+      block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G ||
+      block == Character.UnicodeBlock.HIRAGANA ||
+      block == Character.UnicodeBlock.KATAKANA ||
+      block == Character.UnicodeBlock.HANGUL_SYLLABLES
+  }
+  private fun isWordDelimiter(char: Char): Boolean {
+    return when (char) {
+      '.', ',', '!', '?', ';', ':', '(', ')', '[', ']', '{', '}',
+      '"', '\'', '`', '~', '<', '>', '/', '\\', '|', '@', '#', '$',
+      '%', '^', '&', '*', '+', '=', '…', '，', '。', '！', '？', '；',
+      '：', '、' -> true
+      else -> false
+    }
+  }
+}

package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt CHANGED Viewed

@@ -111,6 +111,46 @@ internal class SherpaOnnxTtsHelper(
     }
   }
+  /**
+   * libsherpa-onnx-jni looks up `invoke([F)Ljava/lang/Integer` (see sherpa-onnx `offline-tts.cc` CallCallback).
+   * Kotlin `Function1<*, Int>` compiles to `invoke([F)I`, so GetMethodID fails and JNI aborts.
+   * Using [java.lang.Integer] as the type parameter yields the boxed JVM signature the JNI expects.
+   * The cast is only for the Kotlin API (`generateWithCallback` still declares `Function1<FloatArray, Int>`).
+   */
+  /** Box for JNI: must be real [java.lang.Integer], not Kotlin [Int] (primitive `invoke([F)I` breaks sherpa JNI). */
+  @Suppress("DEPRECATION")
+  private fun boxForTtsJni(n: Int): java.lang.Integer = java.lang.Integer(n)
+  @Suppress("UNCHECKED_CAST")
+  private fun ttsChunkCallbackForJni(
+    sentenceChunkSizes: MutableList<Int>
+  ): kotlin.Function1<FloatArray, Int> {
+    val boxed =
+      object : kotlin.jvm.functions.Function1<FloatArray, java.lang.Integer> {
+        override fun invoke(chunk: FloatArray): java.lang.Integer {
+          sentenceChunkSizes.add(chunk.size)
+          return boxForTtsJni(chunk.size)
+        }
+      }
+    return boxed as kotlin.Function1<FloatArray, Int>
+  }
+  @Suppress("UNCHECKED_CAST")
+  private fun ttsStreamChunkCallbackForJni(
+    cancelled: AtomicBoolean,
+    onChunk: (FloatArray) -> Unit
+  ): kotlin.Function1<FloatArray, Int> {
+    val boxed =
+      object : kotlin.jvm.functions.Function1<FloatArray, java.lang.Integer> {
+        override fun invoke(chunk: FloatArray): java.lang.Integer {
+          if (cancelled.get()) return boxForTtsJni(0)
+          onChunk(chunk)
+          return boxForTtsJni(chunk.size)
+        }
+      }
+    return boxed as kotlin.Function1<FloatArray, Int>
+  }
   /** Single-thread executor for TTS init so the RN bridge thread is not blocked (avoids Inspector/dev WebSocket races in debug builds). */
   private val ttsInitExecutor = Executors.newSingleThreadExecutor()
@@ -451,9 +491,66 @@ internal class SherpaOnnxTtsHelper(
         promise.reject("TTS_GENERATE_ERROR", "TTS not initialized")
         return
       }
+      val subtitleMode = getSubtitleMode(options)
+      val subtitleGranularity = getSubtitleGranularity(options)
+      if (isCharacterGranularityRequested(options) && subtitleMode != "accurate") {
+        Log.e(
+          "SherpaOnnxTts",
+          "TTS_SUBTITLE_ERROR: Character granularity is only supported when subtitleMode is 'accurate'"
+        )
+        promise.reject(
+          "TTS_SUBTITLE_ERROR",
+          "Character granularity is only supported when subtitleMode is 'accurate'."
+        )
+        return
+      }
       val sid = getSid(options)
       val speed = getSpeed(options)
+      val sentenceChunkSizes = mutableListOf<Int>()
       val audio = when {
+        subtitleMode == "off" -> {
+          when {
+            hasReferenceAudio(options) && (inst.isZipvoice || inst.isPocket) -> {
+              if (inst.isZipvoice) {
+                val promptText = options!!.getString("referenceText")?.trim().orEmpty()
+                if (promptText.isEmpty()) {
+                  Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Zipvoice voice cloning requires non-empty referenceText")
+                  promise.reject(
+                    "TTS_GENERATE_ERROR",
+                    "Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio)."
+                  )
+                  return
+                }
+              }
+              val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
+              inst.tts!!.generateWithConfig(text, config)
+            }
+            hasReferenceAudio(options) -> {
+              Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Reference audio is not supported for this TTS model type")
+              promise.reject(
+                "TTS_GENERATE_ERROR",
+                "Reference audio is only supported for Zipvoice and Pocket TTS."
+              )
+              return
+            }
+            inst.isPocket -> {
+              Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
+              promise.reject(
+                "TTS_GENERATE_ERROR",
+                "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
+              )
+              return
+            }
+            else -> dispatchGenerate(inst, text, sid, speed)
+              ?: run {
+                Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: TTS not initialized")
+                promise.reject("TTS_GENERATE_ERROR", "TTS not initialized")
+                return
+              }
+          }
+        }
         hasReferenceAudio(options) && (inst.isZipvoice || inst.isPocket) -> {
           if (inst.isZipvoice) {
             val promptText = options!!.getString("referenceText")?.trim().orEmpty()
@@ -467,7 +564,11 @@ internal class SherpaOnnxTtsHelper(
             }
           }
           val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
-          inst.tts!!.generateWithConfig(text, config)
+          inst.tts!!.generateWithConfigAndCallback(
+            text,
+            config,
+            ttsChunkCallbackForJni(sentenceChunkSizes)
+          )
         }
         hasReferenceAudio(options) -> {
           Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Reference audio is not supported for this TTS model type")
@@ -485,13 +586,21 @@ internal class SherpaOnnxTtsHelper(
           )
           return
         }
-        else -> dispatchGenerate(inst, text, sid, speed)
-          ?: run {
+        else -> {
+          val tts = inst.tts
+          if (tts == null) {
             Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: TTS not initialized")
             promise.reject("TTS_GENERATE_ERROR", "TTS not initialized")
             return
           }
+          tts.generateWithCallback(text, sid, speed, ttsChunkCallbackForJni(sentenceChunkSizes))
+        }
+      }
+      if (subtitleMode != "off" && sentenceChunkSizes.isEmpty() && audio.samples.isNotEmpty()) {
+        sentenceChunkSizes.add(audio.samples.size)
       }
       val map = Arguments.createMap()
       val samplesArray = Arguments.createArray()
       for (sample in audio.samples) {
@@ -499,17 +608,29 @@ internal class SherpaOnnxTtsHelper(
       }
       map.putArray("samples", samplesArray)
       map.putInt("sampleRate", audio.sampleRate)
-      val subtitlesArray = Arguments.createArray()
-      if (audio.samples.isNotEmpty() && audio.sampleRate > 0) {
-        val durationSec = audio.samples.size.toDouble() / audio.sampleRate
-        val subtitleMap = Arguments.createMap()
-        subtitleMap.putString("text", text)
-        subtitleMap.putDouble("start", 0.0)
-        subtitleMap.putDouble("end", durationSec)
-        subtitlesArray.pushMap(subtitleMap)
+      val subtitleItems = if (subtitleMode == "off") {
+        emptyList()
+      } else {
+        val sentenceSegments = SherpaOnnxTextSegmenter.splitIntoSentences(text)
+        if (subtitleGranularity == "word") {
+          SherpaOnnxTextSegmenter.buildWordSubtitlesFromSentenceChunks(
+            sentenceSegments,
+            sentenceChunkSizes,
+            audio.sampleRate
+          )
+        } else {
+          SherpaOnnxTextSegmenter.buildSubtitlesFromChunks(
+            sentenceSegments,
+            sentenceChunkSizes,
+            audio.sampleRate
+          )
+        }
       }
-      map.putArray("subtitles", subtitlesArray)
-      map.putBoolean("estimated", true)
+      map.putArray("subtitles", toSubtitleWritableArray(subtitleItems))
+      val timingMode = if (subtitleMode == "off") "off" else "estimated"
+      map.putString("timingMode", timingMode)
       promise.resolve(map)
     } catch (e: Exception) {
       Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: ${e.message ?: "Failed to generate speech"}", e)
@@ -564,18 +685,23 @@ internal class SherpaOnnxTtsHelper(
         when {
           hasReferenceAudio(options) && inst.isPocket -> {
             val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
-            inst.tts!!.generateWithConfigAndCallback(text, config) { chunk ->
-              if (inst.ttsStreamCancelled.get()) return@generateWithConfigAndCallback 0
-              emitChunk(instanceId, requestId, chunk, sampleRate, 0f, false)
-              chunk.size
-            }
+            inst.tts!!.generateWithConfigAndCallback(
+              text,
+              config,
+              ttsStreamChunkCallbackForJni(inst.ttsStreamCancelled) { chunk ->
+                emitChunk(instanceId, requestId, chunk, sampleRate, 0f, false)
+              }
+            )
           }
           else -> {
-            inst.tts!!.generateWithCallback(text, sid, speed) { chunk ->
-              if (inst.ttsStreamCancelled.get()) return@generateWithCallback 0
-              emitChunk(instanceId, requestId, chunk, sampleRate, 0f, false)
-              chunk.size
-            }
+            inst.tts!!.generateWithCallback(
+              text,
+              sid,
+              speed,
+              ttsStreamChunkCallbackForJni(inst.ttsStreamCancelled) { chunk ->
+                emitChunk(instanceId, requestId, chunk, sampleRate, 0f, false)
+              }
+            )
           }
         }
         if (!inst.ttsStreamCancelled.get()) {
@@ -915,6 +1041,37 @@ internal class SherpaOnnxTtsHelper(
   private fun getSpeed(options: ReadableMap?): Float =
     if (options != null && options.hasKey("speed")) options.getDouble("speed").toFloat() else 1.0f
+  private fun getSubtitleMode(options: ReadableMap?): String {
+    val raw = options?.getString("subtitleMode")?.trim()?.lowercase()
+    return when (raw) {
+      "off", "fast", "accurate" -> raw
+      else -> "fast"
+    }
+  }
+  private fun getSubtitleGranularity(options: ReadableMap?): String {
+    val raw = options?.getString("subtitleGranularity")?.trim()?.lowercase()
+    return when (raw) {
+      "word", "sentence" -> raw
+      else -> "sentence"
+    }
+  }
+  private fun isCharacterGranularityRequested(options: ReadableMap?): Boolean {
+    val raw = options?.getString("subtitleGranularity")?.trim()?.lowercase()
+    return raw == "character"
+  }
+  private fun toSubtitleWritableArray(items: List<SubtitleTimingItem>) = Arguments.createArray().apply {
+    for (item in items) {
+      val subtitleMap = Arguments.createMap()
+      subtitleMap.putString("text", item.text)
+      subtitleMap.putDouble("start", item.start)
+      subtitleMap.putDouble("end", item.end)
+      pushMap(subtitleMap)
+    }
+  }
   /** Build Kotlin GenerationConfig from ReadableMap. Returns null only when options is null; otherwise returns a config with sid, speed, silenceScale, numSteps, and any reference/extra fields from options. */
   private fun parseGenerationConfig(options: ReadableMap?): GenerationConfig? {
     if (options == null) return null

package/ios/Resources/model_licenses/alignment-models-license-status.csv ADDED Viewed

@@ -0,0 +1,5 @@
+asset_name,license_type,commercial_use,confidence,detection_source,license_file
+wav2vec2-base-960h-fp16.tar.bz2,apache-2.0,yes,high,manual,https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md
+wav2vec2-base-960h-int8.tar.bz2,apache-2.0,yes,high,manual,https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md
+wav2vec2-base-960h-q4f16.tar.bz2,apache-2.0,yes,high,manual,https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md
+wav2vec2-base-960h.tar.bz2,apache-2.0,yes,high,manual,https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md