npm - react-native-sherpa-onnx - Versions diffs - 0.4.0 → 0.4.2 - Mend

react-native-sherpa-onnx 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

package/README.md +3 -0
package/android/src/main/assets/model_licenses/alignment-models-license-status.csv +5 -0
package/android/src/main/cpp/CMakeLists.txt +3 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-alignment-wrapper.cpp +66 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-alignment-wrapper.h +17 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-alignment.cpp +108 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +30 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-alignment.cpp +66 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-alignment.h +30 -0
package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +21 -0
package/android/src/main/java/com/sherpaonnx/SherpaOnnxAlignmentHelper.kt +555 -0
package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +76 -0
package/android/src/main/java/com/sherpaonnx/SherpaOnnxTextSegmenter.kt +330 -0
package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +180 -23
package/ios/Resources/model_licenses/alignment-models-license-status.csv +5 -0
package/ios/SherpaOnnx+Alignment.mm +704 -0
package/ios/SherpaOnnx+STT.mm +6 -0
package/ios/SherpaOnnx+TTS.mm +624 -50
package/ios/model_detect/sherpa-onnx-model-detect-alignment.mm +108 -0
package/ios/model_detect/sherpa-onnx-model-detect.h +31 -0
package/ios/model_detect/sherpa-onnx-validate-alignment.h +30 -0
package/ios/model_detect/sherpa-onnx-validate-alignment.mm +66 -0
package/ios/stt/sherpa-onnx-stt-wrapper.h +3 -1
package/ios/stt/sherpa-onnx-stt-wrapper.mm +6 -0
package/lib/module/NativeSherpaOnnx.js.map +1 -1
package/lib/module/alignment/index.js +27 -0
package/lib/module/alignment/index.js.map +1 -0
package/lib/module/alignment/types.js +2 -0
package/lib/module/alignment/types.js.map +1 -0
package/lib/module/alignment/vocab.js +40 -0
package/lib/module/alignment/vocab.js.map +1 -0
package/lib/module/download/paths.js +9 -1
package/lib/module/download/paths.js.map +1 -1
package/lib/module/download/registry.js +17 -1
package/lib/module/download/registry.js.map +1 -1
package/lib/module/download/types.js +1 -0
package/lib/module/download/types.js.map +1 -1
package/lib/module/index.js +6 -4
package/lib/module/index.js.map +1 -1
package/lib/module/licenses.js +8 -2
package/lib/module/licenses.js.map +1 -1
package/lib/module/stt/types.js.map +1 -1
package/lib/module/tts/index.js +68 -2
package/lib/module/tts/index.js.map +1 -1
package/lib/module/tts/subtitles.js +400 -0
package/lib/module/tts/subtitles.js.map +1 -0
package/lib/module/tts/tempAudio.js +17 -0
package/lib/module/tts/tempAudio.js.map +1 -0
package/lib/module/tts/types.js.map +1 -1
package/lib/typescript/src/NativeSherpaOnnx.d.ts +34 -3
package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
package/lib/typescript/src/alignment/index.d.ts +8 -0
package/lib/typescript/src/alignment/index.d.ts.map +1 -0
package/lib/typescript/src/alignment/types.d.ts +23 -0
package/lib/typescript/src/alignment/types.d.ts.map +1 -0
package/lib/typescript/src/alignment/vocab.d.ts +5 -0
package/lib/typescript/src/alignment/vocab.d.ts.map +1 -0
package/lib/typescript/src/download/paths.d.ts +5 -2
package/lib/typescript/src/download/paths.d.ts.map +1 -1
package/lib/typescript/src/download/registry.d.ts.map +1 -1
package/lib/typescript/src/download/types.d.ts +2 -1
package/lib/typescript/src/download/types.d.ts.map +1 -1
package/lib/typescript/src/index.d.ts +1 -0
package/lib/typescript/src/index.d.ts.map +1 -1
package/lib/typescript/src/licenses.d.ts.map +1 -1
package/lib/typescript/src/stt/types.d.ts +5 -2
package/lib/typescript/src/stt/types.d.ts.map +1 -1
package/lib/typescript/src/tts/index.d.ts +2 -1
package/lib/typescript/src/tts/index.d.ts.map +1 -1
package/lib/typescript/src/tts/subtitles.d.ts +24 -0
package/lib/typescript/src/tts/subtitles.d.ts.map +1 -0
package/lib/typescript/src/tts/tempAudio.d.ts +3 -0
package/lib/typescript/src/tts/tempAudio.d.ts.map +1 -0
package/lib/typescript/src/tts/types.d.ts +68 -2
package/lib/typescript/src/tts/types.d.ts.map +1 -1
package/package.json +6 -1
package/scripts/alignment-models/README.md +90 -0
package/scripts/alignment-models/build_and_upload.js +724 -0
package/scripts/alignment-models/sources.csv +5 -0
package/scripts/alignment-models/sync_alignment_license_status.js +123 -0
package/src/NativeSherpaOnnx.ts +35 -3
package/src/alignment/index.ts +41 -0
package/src/alignment/types.ts +22 -0
package/src/alignment/vocab.ts +38 -0
package/src/download/paths.ts +18 -5
package/src/download/registry.ts +23 -3
package/src/download/types.ts +1 -0
package/src/index.tsx +6 -4
package/src/licenses.ts +12 -1
package/src/stt/types.ts +5 -2
package/src/tts/index.ts +110 -3
package/src/tts/subtitles.ts +611 -0
package/src/tts/tempAudio.ts +31 -0
package/src/tts/types.ts +79 -2
package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1

package/scripts/alignment-models/sources.csv ADDED Viewed

@@ -0,0 +1,5 @@
+id;onnx_url;license;license_type;commercial_use
+wav2vec2-base-960h-int8;https://huggingface.co/onnx-community/wav2vec2-base-960h-ONNX/resolve/main/onnx/model_int8.onnx;https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md;apache-2.0;yes
+wav2vec2-base-960h-fp16;https://huggingface.co/onnx-community/wav2vec2-base-960h-ONNX/resolve/main/onnx/model_fp16.onnx;https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md;apache-2.0;yes
+wav2vec2-base-960h;https://huggingface.co/onnx-community/wav2vec2-base-960h-ONNX/resolve/main/onnx/model.onnx;https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md;apache-2.0;yes
+wav2vec2-base-960h-q4f16;https://huggingface.co/onnx-community/wav2vec2-base-960h-ONNX/resolve/main/onnx/model_q4f16.onnx;https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md;apache-2.0;yes

package/scripts/alignment-models/sync_alignment_license_status.js ADDED Viewed

@@ -0,0 +1,123 @@
+#!/usr/bin/env node
+'use strict';
+const path = require('node:path');
+const fsp = require('node:fs/promises');
+const { readSources } = require('./build_and_upload.js');
+const REPO_ROOT = path.join(__dirname, '..', '..');
+const DEFAULT_CSV = path.join(__dirname, 'sources.csv');
+const TARGETS = [
+  path.join(
+    REPO_ROOT,
+    'android/src/main/assets/model_licenses/alignment-models-license-status.csv'
+  ),
+  path.join(
+    REPO_ROOT,
+    'ios/Resources/model_licenses/alignment-models-license-status.csv'
+  ),
+];
+const HEADER =
+  'asset_name,license_type,commercial_use,confidence,detection_source,license_file';
+function parseCsvLine(line) {
+  const out = [];
+  let cur = '';
+  let inQuotes = false;
+  for (let i = 0; i < line.length; i += 1) {
+    const c = line[i];
+    if (inQuotes) {
+      if (c === '"') {
+        if (line[i + 1] === '"') {
+          cur += '"';
+          i += 1;
+        } else {
+          inQuotes = false;
+        }
+      } else {
+        cur += c;
+      }
+    } else if (c === ',') {
+      out.push(cur);
+      cur = '';
+    } else if (c === '"') {
+      inQuotes = true;
+    } else {
+      cur += c;
+    }
+  }
+  out.push(cur);
+  return out;
+}
+function escapeCsvField(value) {
+  const s = value == null ? '' : String(value);
+  if (/[",\n\r]/.test(s)) {
+    return `"${s.replace(/"/g, '""')}"`;
+  }
+  return s;
+}
+function rowFromSource(source) {
+  return [
+    `${source.modelId}.tar.bz2`,
+    source.licenseType,
+    source.commercialUse,
+    'high',
+    'manual',
+    source.licenseUrl || '',
+  ]
+    .map(escapeCsvField)
+    .join(',');
+}
+async function readExistingRows(filePath) {
+  let text;
+  try {
+    text = await fsp.readFile(filePath, 'utf8');
+  } catch {
+    return new Map();
+  }
+  const lines = text.split('\n').filter((line) => line.length > 0);
+  if (lines.length === 0) {
+    return new Map();
+  }
+  const map = new Map();
+  for (let i = 1; i < lines.length; i += 1) {
+    const cols = parseCsvLine(lines[i]);
+    if (cols[0]) {
+      map.set(cols[0], lines[i]);
+    }
+  }
+  return map;
+}
+async function writeMerged(filePath, sources) {
+  const byAsset = await readExistingRows(filePath);
+  for (const s of sources) {
+    byAsset.set(`${s.modelId}.tar.bz2`, rowFromSource(s));
+  }
+  const keys = Array.from(byAsset.keys()).sort((a, b) => a.localeCompare(b));
+  const body = keys.map((k) => byAsset.get(k)).join('\n');
+  const out = `${HEADER}\n${body}\n`;
+  await fsp.mkdir(path.dirname(filePath), { recursive: true });
+  await fsp.writeFile(filePath, out, 'utf8');
+}
+async function main() {
+  const csvPath = process.argv[2]
+    ? path.resolve(process.cwd(), process.argv[2])
+    : DEFAULT_CSV;
+  const sources = await readSources(csvPath);
+  for (const target of TARGETS) {
+    await writeMerged(target, sources);
+    console.log(`[sync] ${path.relative(REPO_ROOT, target)}`);
+  }
+}
+main().catch((e) => {
+  console.error(e instanceof Error ? e.message : e);
+  process.exit(1);
+});

package/src/NativeSherpaOnnx.ts CHANGED Viewed

@@ -341,8 +341,8 @@ export interface Spec extends TurboModule {
    * Generate speech with subtitle/timestamp metadata.
    * @param instanceId - Unique ID for this engine instance
    * @param text - Text to convert to speech
-   * @param options - Same as {@link generateTts} options (cloning: Zipvoice/Pocket; Zipvoice needs `referenceText`).
-   * @returns Object with samples, sampleRate, subtitles, and estimated flag
+   * @param options - Same as {@link generateTts} options plus subtitle options (`subtitleMode`, `subtitleGranularity`).
+   * @returns Object with samples, sampleRate, subtitles, and timingMode
    */
   generateTtsWithTimestamps(
     instanceId: string,
@@ -352,7 +352,39 @@ export interface Spec extends TurboModule {
     samples: number[];
     sampleRate: number;
     subtitles: Array<{ text: string; start: number; end: number }>;
-    estimated: boolean;
+    timingMode: string;
+  }>;
+  // ==================== Alignment / Subtitle Methods ====================
+  /**
+   * Run wav2vec2 CTC forced alignment on an audio file and transcript.
+   * @param modelPath - Absolute path to wav2vec2 ONNX model file
+   * @param audioPath - Absolute path to input audio file (WAV recommended)
+   * @param text - Transcript to align
+   * @param vocabJson - JSON map of token -> id (stringified to reduce bridge overhead)
+   */
+  runCTCForcedAlignment(
+    modelPath: string,
+    audioPath: string,
+    text: string,
+    vocabJson: string
+  ): Promise<{
+    words: Array<{ text: string; start: number; end: number }>;
+    chars: Array<{ text: string; start: number; end: number }>;
+  }>;
+  detectAlignmentModel(
+    modelDir: string,
+    modelType?: string
+  ): Promise<{
+    success: boolean;
+    error?: string;
+    detectedModels: Array<{ type: string; modelDir: string }>;
+    modelType?: string;
+    paths?: {
+      model?: string;
+    };
   }>;
   // ==================== Online (streaming) TTS Methods ====================

package/src/alignment/index.ts ADDED Viewed

@@ -0,0 +1,41 @@
+import SherpaOnnx from '../NativeSherpaOnnx';
+import type { ModelPathConfig } from '../types';
+import { resolveModelPath } from '../utils';
+import type { AlignmentDetectResult, AlignmentModelType } from './types';
+export {
+  WAV2VEC2_BLANK_ID,
+  WAV2VEC2_FRAME_DURATION_S,
+  WAV2VEC2_VOCAB,
+  WAV2VEC2_WORD_BOUNDARY_ID,
+} from './vocab';
+export async function detectAlignmentModel(
+  modelPath: ModelPathConfig,
+  options?: { modelType?: AlignmentModelType }
+): Promise<AlignmentDetectResult> {
+  const resolvedPath = await resolveModelPath(modelPath);
+  const raw = await SherpaOnnx.detectAlignmentModel(
+    resolvedPath,
+    options?.modelType
+  );
+  const err = typeof raw.error === 'string' ? raw.error.trim() : '';
+  const modelFilePath =
+    typeof raw.paths?.model === 'string' ? raw.paths.model.trim() : '';
+  return {
+    success: raw.success,
+    ...(err.length > 0 ? { error: err } : {}),
+    detectedModels: raw.detectedModels ?? [],
+    ...(raw.modelType != null && raw.modelType !== ''
+      ? { modelType: raw.modelType }
+      : {}),
+    ...(modelFilePath.length > 0 ? { paths: { model: modelFilePath } } : {}),
+  };
+}
+export type {
+  AlignmentResult,
+  AlignmentTimestamp,
+  AlignmentDetectResult,
+  AlignmentModelType,
+} from './types';

package/src/alignment/types.ts ADDED Viewed

@@ -0,0 +1,22 @@
+export interface AlignmentTimestamp {
+  text: string;
+  start: number;
+  end: number;
+}
+export interface AlignmentResult {
+  words: AlignmentTimestamp[];
+  chars: AlignmentTimestamp[];
+}
+export type AlignmentModelType = 'wav2vec2' | 'auto';
+export interface AlignmentDetectResult {
+  success: boolean;
+  error?: string;
+  detectedModels: Array<{ type: string; modelDir: string }>;
+  modelType?: string;
+  paths?: {
+    model?: string;
+  };
+}

package/src/alignment/vocab.ts ADDED Viewed

@@ -0,0 +1,38 @@
+export const WAV2VEC2_VOCAB: Record<string, number> = {
+  '<pad>': 0,
+  '<s>': 1,
+  '</s>': 2,
+  '<unk>': 3,
+  '|': 4,
+  'E': 5,
+  'T': 6,
+  'A': 7,
+  'O': 8,
+  'N': 9,
+  'I': 10,
+  'H': 11,
+  'S': 12,
+  'R': 13,
+  'D': 14,
+  'L': 15,
+  'U': 16,
+  'W': 17,
+  'M': 18,
+  'C': 19,
+  'F': 20,
+  'G': 21,
+  'Y': 22,
+  'P': 23,
+  'B': 24,
+  'V': 25,
+  'K': 26,
+  "'": 27,
+  'X': 28,
+  'J': 29,
+  'Q': 30,
+  'Z': 31,
+};
+export const WAV2VEC2_BLANK_ID = 0;
+export const WAV2VEC2_WORD_BOUNDARY_ID = 4;
+export const WAV2VEC2_FRAME_DURATION_S = 0.02;

package/src/download/paths.ts CHANGED Viewed

@@ -3,10 +3,14 @@ import { ModelCategory } from './types';
 import type { ModelArchiveExt } from './types';
 import { RELEASE_API_BASE } from './constants';
-export const CATEGORY_CONFIG: Record<
-  ModelCategory,
-  { tag: string; cacheFile: string; baseDir: string }
-> = {
+type CategoryConfig = {
+  tag: string;
+  cacheFile: string;
+  baseDir: string;
+  releaseApiBase?: string;
+};
+export const CATEGORY_CONFIG: Record<ModelCategory, CategoryConfig> = {
   [ModelCategory.Tts]: {
     tag: 'tts-models',
     cacheFile: 'tts-models.json',
@@ -42,6 +46,13 @@ export const CATEGORY_CONFIG: Record<
     cacheFile: 'qnn-models.json',
     baseDir: `${DocumentDirectoryPath}/sherpa-onnx/models/qnn`,
   },
+  [ModelCategory.Alignment]: {
+    tag: 'alignment-models',
+    cacheFile: 'alignment-models.json',
+    baseDir: `${DocumentDirectoryPath}/sherpa-onnx/models/alignment`,
+    releaseApiBase:
+      'https://api.github.com/repos/XDcobra/react-native-sherpa-onnx/releases/tags',
+  },
 };
 export function getCacheDir(): string {
@@ -131,5 +142,7 @@ export function getNativeAssetExtractedModelDir(modelId: string): string {
 }
 export function getReleaseUrl(category: ModelCategory): string {
-  return `${RELEASE_API_BASE}/${CATEGORY_CONFIG[category].tag}`;
+  const config = CATEGORY_CONFIG[category];
+  const releaseApiBase = config.releaseApiBase ?? RELEASE_API_BASE;
+  return `${releaseApiBase}/${config.tag}`;
 }

package/src/download/registry.ts CHANGED Viewed

@@ -39,6 +39,26 @@ const checksumCacheByCategory: Partial<
   Record<ModelCategory, Map<string, string>>
 > = {};
+const DEFAULT_RELEASE_REPO = 'k2-fsa/sherpa-onnx';
+function getReleaseRepoFromConfig(category: ModelCategory): string {
+  const releaseApiBase = CATEGORY_CONFIG[category].releaseApiBase;
+  if (!releaseApiBase) {
+    return DEFAULT_RELEASE_REPO;
+  }
+  const match = releaseApiBase.match(
+    /^https:\/\/api\.github\.com\/repos\/([^/]+\/[^/]+)\/releases\/tags\/?$/
+  );
+  return match?.[1] ?? DEFAULT_RELEASE_REPO;
+}
+function getChecksumUrl(category: ModelCategory): string {
+  const tag = CATEGORY_CONFIG[category].tag;
+  const repo = getReleaseRepoFromConfig(category);
+  return `https://github.com/${repo}/releases/download/${tag}/checksum.txt`;
+}
 export async function fetchChecksumsFromRelease(
   category: ModelCategory
 ): Promise<Map<string, string>> {
@@ -51,9 +71,7 @@ export async function fetchChecksumsFromRelease(
   try {
     const checksums = await retryWithBackoff(
       async () => {
-        const response = await fetch(
-          `https://github.com/k2-fsa/sherpa-onnx/releases/download/${CATEGORY_CONFIG[category].tag}/checksum.txt`
-        );
+        const response = await fetch(getChecksumUrl(category));
         if (!response.ok) {
           throw new Error(
             `Failed to fetch checksum.txt for ${category}: ${response.status}`
@@ -176,6 +194,8 @@ function isAssetSupportedForCategory(
         lower.includes('binary') &&
         lower.includes('seconds')
       );
+    case ModelCategory.Alignment:
+      return ext === 'tar.bz2';
     default:
       return false;
   }

package/src/download/types.ts CHANGED Viewed

@@ -8,6 +8,7 @@ export enum ModelCategory {
   Enhancement = 'enhancement',
   Separation = 'separation',
   Qnn = 'qnn',
+  Alignment = 'alignment',
 }
 /** TTS model type for meta; 'unknown' when id could not be classified. */

package/src/index.tsx CHANGED Viewed

@@ -17,17 +17,19 @@ export {
 } from './utils';
 export { copyFileToContentUri } from './tts';
+export * from './alignment';
 export { getModelLicenses, type ModelLicense } from './licenses';
 // Note: Feature-specific exports are available via subpath imports:
 // - import { createSTT, createStreamingSTT, ... } from 'react-native-sherpa-onnx/stt'
 // - import { createTTS, ... } from 'react-native-sherpa-onnx/tts'
+// - import { detectAlignmentModel, ... } from 'react-native-sherpa-onnx/alignment'
 // - import { ... } from 'react-native-sherpa-onnx/download'
 // - import { getBundledArchives, listBundledArchives, extractArchive } from 'react-native-sherpa-onnx/extraction'
-// - import { ... } from 'react-native-sherpa-onnx/vad' (planned)
-// - import { ... } from 'react-native-sherpa-onnx/diarization' (planned)
-// - import { ... } from 'react-native-sherpa-onnx/enhancement' (planned)
-// - import { ... } from 'react-native-sherpa-onnx/separation' (planned)
+// - import { ... } from 'react-native-sherpa-onnx/vad'
+// - import { ... } from 'react-native-sherpa-onnx/diarization'
+// - import { ... } from 'react-native-sherpa-onnx/enhancement'
+// - import { ... } from 'react-native-sherpa-onnx/separation'
 /**
  * Test method to verify sherpa-onnx native library is loaded.

package/src/licenses.ts CHANGED Viewed

@@ -13,6 +13,7 @@ export async function getModelLicenses(): Promise<ModelLicense[]> {
   const asrPath = 'model_licenses/asr-models-license-status.csv';
   const qnnPath = 'model_licenses/qnn-asr-models-license-status.csv';
   const ttsPath = 'model_licenses/tts-models-license-status.csv';
+  const alignmentPath = 'model_licenses/alignment-models-license-status.csv';
   const speechEnhancementPath =
     'model_licenses/speech-enhancement-models-license-status.csv';
@@ -20,10 +21,12 @@ export async function getModelLicenses(): Promise<ModelLicense[]> {
     SherpaOnnx.readAssetFileAsUtf8(asrPath),
     SherpaOnnx.readAssetFileAsUtf8(qnnPath),
     SherpaOnnx.readAssetFileAsUtf8(ttsPath),
+    SherpaOnnx.readAssetFileAsUtf8(alignmentPath),
     SherpaOnnx.readAssetFileAsUtf8(speechEnhancementPath),
   ]);
-  const [asrResult, qnnResult, ttsResult, enhancementResult] = results;
+  const [asrResult, qnnResult, ttsResult, alignmentResult, enhancementResult] =
+    results;
   const licenses: ModelLicense[] = [];
@@ -51,6 +54,14 @@ export async function getModelLicenses(): Promise<ModelLicense[]> {
     );
   }
+  if (alignmentResult.status === 'fulfilled') {
+    licenses.push(...parseCsv(alignmentResult.value));
+  } else {
+    console.warn(
+      `[SherpaOnnx] Failed to load alignment model licenses: ${alignmentResult.reason}`
+    );
+  }
   if (enhancementResult.status === 'fulfilled') {
     licenses.push(...parseCsv(enhancementResult.value));
   } else {

package/src/stt/types.ts CHANGED Viewed

@@ -80,9 +80,12 @@ export interface SttWhisperModelOptions {
   task?: 'transcribe' | 'translate';
   /** Padding at end of samples. Kotlin default 1000; C++ default -1. */
   tailPaddings?: number;
-  /** Token-level timestamps. Android only; ignored on iOS. */
+  /**
+   * Token-level timestamps (cross-attention / DTW). Requires Whisper ONNX models
+   * built with attention outputs (see sherpa-onnx).
+   */
   enableTokenTimestamps?: boolean;
-  /** Segment-level timestamps. Android only; ignored on iOS. */
+  /** Segment-level timestamps via Whisper timestamp tokens. */
   enableSegmentTimestamps?: boolean;
 }

package/src/tts/index.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+import { unlink } from '@dr.pogodin/react-native-fs';
 import SherpaOnnx from '../NativeSherpaOnnx';
 import type {
   TTSInitializeOptions,
@@ -12,6 +13,11 @@ import type {
 } from './types';
 import type { ModelPathConfig } from '../types';
 import { resolveModelPath } from '../utils';
+import {
+  assertSubtitleGranularityForMode,
+  generateSubtitlesFromAudio,
+} from './subtitles';
+import { saveAlignmentAudioToTempWav } from './tempAudio';
 let ttsInstanceCounter = 0;
@@ -159,6 +165,12 @@ function toNativeTtsOptions(
   if (options.numSteps !== undefined) out.numSteps = options.numSteps;
   if (options.extra != null && Object.keys(options.extra).length > 0)
     out.extra = options.extra;
+  if (options.subtitles?.mode !== undefined) {
+    out.subtitleMode = options.subtitles.mode;
+  }
+  if (options.subtitles?.granularity !== undefined) {
+    out.subtitleGranularity = options.subtitles.granularity;
+  }
   return out;
 }
@@ -276,7 +288,18 @@ export async function createTTS(
       opts?: TtsGenerationOptions
     ): Promise<GeneratedAudio> {
       guard();
-      return SherpaOnnx.generateTts(instanceId, text, toNativeTtsOptions(opts));
+      const optionsWithSubtitlesOff: TtsGenerationOptions = {
+        ...(opts ?? {}),
+        subtitles: {
+          ...(opts?.subtitles ?? {}),
+          mode: 'off',
+        },
+      };
+      return SherpaOnnx.generateTts(
+        instanceId,
+        text,
+        toNativeTtsOptions(optionsWithSubtitlesOff)
+      );
     },
     async generateSpeechWithTimestamps(
@@ -284,11 +307,89 @@ export async function createTTS(
       opts?: TtsGenerationOptions
     ): Promise<GeneratedAudioWithTimestamps> {
       guard();
-      return SherpaOnnx.generateTtsWithTimestamps(
+      const subtitleMode = opts?.subtitles?.mode ?? 'fast';
+      const subtitleGranularity = opts?.subtitles?.granularity ?? 'sentence';
+      assertSubtitleGranularityForMode(subtitleMode, subtitleGranularity);
+      if (subtitleMode !== 'accurate') {
+        const optionsWithDefaultSubtitleMode: TtsGenerationOptions = {
+          ...(opts ?? {}),
+          subtitles: {
+            ...(opts?.subtitles ?? {}),
+            mode: subtitleMode,
+          },
+        };
+        const native = await SherpaOnnx.generateTtsWithTimestamps(
+          instanceId,
+          text,
+          toNativeTtsOptions(optionsWithDefaultSubtitleMode)
+        );
+        const timingMode =
+          native.timingMode === 'off' ||
+          native.timingMode === 'estimated' ||
+          native.timingMode === 'aligned'
+            ? native.timingMode
+            : 'off';
+        return {
+          ...native,
+          timingMode,
+        };
+      }
+      const alignmentModelPath = opts?.subtitles?.alignmentModelPath?.trim();
+      if (!alignmentModelPath) {
+        throw new Error(
+          'ALIGNMENT_MODEL_MISSING: Provide subtitles.alignmentModelPath for accurate mode.'
+        );
+      }
+      const optionsWithSubtitlesOff: TtsGenerationOptions = {
+        ...(opts ?? {}),
+        subtitles: {
+          ...(opts?.subtitles ?? {}),
+          mode: 'off',
+        },
+      };
+      const generated = await SherpaOnnx.generateTts(
         instanceId,
         text,
-        toNativeTtsOptions(opts)
+        toNativeTtsOptions(optionsWithSubtitlesOff)
       );
+      let tempAudioPath: string | null = null;
+      try {
+        tempAudioPath = await saveAlignmentAudioToTempWav(
+          generated,
+          instanceId
+        );
+        const subtitleResult = await generateSubtitlesFromAudio(
+          text,
+          tempAudioPath,
+          {
+            mode: 'accurate',
+            granularity: subtitleGranularity,
+            alignmentModelPath,
+          }
+        );
+        return {
+          ...generated,
+          subtitles: subtitleResult.subtitles,
+          timingMode: subtitleResult.timingMode,
+        };
+      } finally {
+        if (tempAudioPath) {
+          unlink(tempAudioPath).catch(() => {
+            // ignore cleanup errors
+          });
+        }
+      }
     },
     async updateParams(opts: TtsUpdateOptions): Promise<{
@@ -437,6 +538,7 @@ export function shareAudioFile(
 // Streaming TTS (separate engine; use createStreamingTTS for chunk callbacks and PCM playback)
 export { createStreamingTTS } from './streaming';
 export type { StreamingTtsEngine } from './streamingTypes';
+export { generateSubtitlesFromAudio } from './subtitles';
 // Export types and runtime type list
 export type {
@@ -451,6 +553,11 @@ export type {
   TtsSupertonicModelOptions,
   TtsUpdateOptions,
   TtsGenerationOptions,
+  SubtitleMode,
+  SubtitleGranularity,
+  SubtitleOptions,
+  SubtitleFromAudioOptions,
+  SubtitleResult,
   GeneratedAudio,
   GeneratedAudioWithTimestamps,
   TtsSubtitleItem,